blob: ba5a7435459963419feab0c4df6642c5326c57bc [file] [log] [blame]
Alexandre Rames22aa54b2016-10-18 09:32:29 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
18#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_
19
20#include "scheduler.h"
21
22namespace art {
23namespace arm64 {
24
25static constexpr uint32_t kArm64MemoryLoadLatency = 5;
26static constexpr uint32_t kArm64MemoryStoreLatency = 3;
27
28static constexpr uint32_t kArm64CallInternalLatency = 10;
29static constexpr uint32_t kArm64CallLatency = 5;
30
31// AArch64 instruction latency.
32// We currently assume that all arm64 CPUs share the same instruction latency list.
33static constexpr uint32_t kArm64IntegerOpLatency = 2;
34static constexpr uint32_t kArm64FloatingPointOpLatency = 5;
35
36
37static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3;
38static constexpr uint32_t kArm64DivDoubleLatency = 30;
39static constexpr uint32_t kArm64DivFloatLatency = 15;
40static constexpr uint32_t kArm64DivIntegerLatency = 5;
41static constexpr uint32_t kArm64LoadStringInternalLatency = 7;
42static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
43static constexpr uint32_t kArm64MulIntegerLatency = 6;
44static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
Artem Serovf0fc4c62017-05-03 15:07:15 +010045static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
46
47static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
48static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
49static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
50static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
51static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
52static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
53static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
54static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
55static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
56static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
Alexandre Rames22aa54b2016-10-18 09:32:29 +010057
58class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
59 public:
60 // Default visitor for instructions not handled specifically below.
Yi Kong39402542019-03-24 02:47:16 -070061 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
Alexandre Rames22aa54b2016-10-18 09:32:29 +010062 last_visited_latency_ = kArm64IntegerOpLatency;
63 }
64
65// We add a second unused parameter to be able to use this macro like the others
66// defined in `nodes.h`.
Artem Serovf0fc4c62017-05-03 15:07:15 +010067#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
68 M(ArrayGet , unused) \
69 M(ArrayLength , unused) \
70 M(ArraySet , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +010071 M(BoundsCheck , unused) \
72 M(Div , unused) \
73 M(InstanceFieldGet , unused) \
74 M(InstanceOf , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +010075 M(LoadString , unused) \
76 M(Mul , unused) \
77 M(NewArray , unused) \
78 M(NewInstance , unused) \
79 M(Rem , unused) \
80 M(StaticFieldGet , unused) \
81 M(SuspendCheck , unused) \
82 M(TypeConversion , unused) \
83 M(VecReplicateScalar , unused) \
Aart Bik0148de42017-09-05 09:25:01 -070084 M(VecExtractScalar , unused) \
85 M(VecReduce , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +010086 M(VecCnv , unused) \
87 M(VecNeg , unused) \
88 M(VecAbs , unused) \
89 M(VecNot , unused) \
90 M(VecAdd , unused) \
91 M(VecHalvingAdd , unused) \
92 M(VecSub , unused) \
93 M(VecMul , unused) \
94 M(VecDiv , unused) \
95 M(VecMin , unused) \
96 M(VecMax , unused) \
97 M(VecAnd , unused) \
98 M(VecAndNot , unused) \
99 M(VecOr , unused) \
100 M(VecXor , unused) \
101 M(VecShl , unused) \
102 M(VecShr , unused) \
103 M(VecUShr , unused) \
Aart Bik0148de42017-09-05 09:25:01 -0700104 M(VecSetScalars , unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +0100105 M(VecMultiplyAccumulate, unused) \
106 M(VecLoad , unused) \
107 M(VecStore , unused)
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100108
Vladimir Markoe3946222018-05-04 14:18:47 +0100109#define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M) \
110 M(BinaryOperation , unused) \
111 M(Invoke , unused)
112
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100113#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
114 M(BitwiseNegatedRight, unused) \
115 M(MultiplyAccumulate, unused) \
Anton Kirilov74234da2017-01-13 14:42:47 +0000116 M(IntermediateAddress, unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +0100117 M(IntermediateAddressIndex, unused) \
Anton Kirilov74234da2017-01-13 14:42:47 +0000118 M(DataProcWithShifterOp, unused)
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100119
120#define DECLARE_VISIT_INSTRUCTION(type, unused) \
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100121 void Visit##type(H##type* instruction) override;
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100122
123 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
Vladimir Markoe3946222018-05-04 14:18:47 +0100124 FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100125 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
126 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
127
128#undef DECLARE_VISIT_INSTRUCTION
Artem Serovf0fc4c62017-05-03 15:07:15 +0100129
130 private:
131 void HandleSimpleArithmeticSIMD(HVecOperation *instr);
132 void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100133};
134
135class HSchedulerARM64 : public HScheduler {
136 public:
Vladimir Markoced04832018-07-26 14:42:17 +0100137 explicit HSchedulerARM64(SchedulingNodeSelector* selector)
138 : HScheduler(&arm64_latency_visitor_, selector) {}
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100139 ~HSchedulerARM64() override {}
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100140
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100141 bool IsSchedulable(const HInstruction* instruction) const override {
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100142#define CASE_INSTRUCTION_KIND(type, unused) case \
143 HInstruction::InstructionKind::k##type:
144 switch (instruction->GetKind()) {
145 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
146 return true;
147 FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
148 return true;
Artem Serovf0fc4c62017-05-03 15:07:15 +0100149 FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
150 return true;
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100151 default:
152 return HScheduler::IsSchedulable(instruction);
153 }
154#undef CASE_INSTRUCTION_KIND
155 }
156
Artem Serov89ff8b22017-11-20 11:51:05 +0000157 // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized
158 // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler;
159 // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of
160 // SIMD&FP registers are callee saved) so don't reorder such vector instructions.
161 //
162 // TODO: remove this when a proper support of SIMD registers is introduced to the compiler.
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100163 bool IsSchedulingBarrier(const HInstruction* instr) const override {
Artem Serov89ff8b22017-11-20 11:51:05 +0000164 return HScheduler::IsSchedulingBarrier(instr) ||
165 instr->IsVecReduce() ||
166 instr->IsVecExtractScalar() ||
167 instr->IsVecSetScalars() ||
168 instr->IsVecReplicateScalar();
169 }
170
Alexandre Rames22aa54b2016-10-18 09:32:29 +0100171 private:
172 SchedulingLatencyVisitorARM64 arm64_latency_visitor_;
173 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64);
174};
175
176} // namespace arm64
177} // namespace art
178
179#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_