blob: 5fcf5f9bfd40ed6e23f20bdd0235bccecdf557b6 [file] [log] [blame]
xueliang.zhongf7caf682017-03-01 16:07:02 +00001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19
Vladimir Markoe2727152019-10-10 10:46:42 +010020#include "base/macros.h"
xueliang.zhongf7caf682017-03-01 16:07:02 +000021#include "code_generator_arm_vixl.h"
xueliang.zhongf7caf682017-03-01 16:07:02 +000022#include "scheduler.h"
23
Vladimir Markoe2727152019-10-10 10:46:42 +010024namespace art HIDDEN {
xueliang.zhongf7caf682017-03-01 16:07:02 +000025namespace arm {
Roland Levillain9983e302017-07-14 14:34:22 +010026// TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
xueliang.zhongf7caf682017-03-01 16:07:02 +000027typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
xueliang.zhongf7caf682017-03-01 16:07:02 +000028
29// AArch32 instruction latencies.
30// We currently assume that all ARM CPUs share the same instruction latency list.
31// The following latencies were tuned based on performance experiments and
32// automatic tuning using differential evolution approach on various benchmarks.
33static constexpr uint32_t kArmIntegerOpLatency = 2;
34static constexpr uint32_t kArmFloatingPointOpLatency = 11;
35static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
36static constexpr uint32_t kArmMulIntegerLatency = 6;
37static constexpr uint32_t kArmMulFloatingPointLatency = 11;
38static constexpr uint32_t kArmDivIntegerLatency = 10;
39static constexpr uint32_t kArmDivFloatLatency = 20;
40static constexpr uint32_t kArmDivDoubleLatency = 25;
41static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
42static constexpr uint32_t kArmMemoryLoadLatency = 9;
43static constexpr uint32_t kArmMemoryStoreLatency = 9;
44static constexpr uint32_t kArmMemoryBarrierLatency = 6;
45static constexpr uint32_t kArmBranchLatency = 4;
46static constexpr uint32_t kArmCallLatency = 5;
47static constexpr uint32_t kArmCallInternalLatency = 29;
48static constexpr uint32_t kArmLoadStringInternalLatency = 10;
49static constexpr uint32_t kArmNopLatency = 2;
50static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
51static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
52
53class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
54 public:
55 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
56 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
57
58 // Default visitor for instructions not handled specifically below.
Yi Kong39402542019-03-24 02:47:16 -070059 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
xueliang.zhongf7caf682017-03-01 16:07:02 +000060 last_visited_latency_ = kArmIntegerOpLatency;
61 }
62
63// We add a second unused parameter to be able to use this macro like the others
64// defined in `nodes.h`.
65#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
66 M(ArrayGet , unused) \
67 M(ArrayLength , unused) \
68 M(ArraySet , unused) \
69 M(Add , unused) \
70 M(Sub , unused) \
71 M(And , unused) \
72 M(Or , unused) \
73 M(Ror , unused) \
74 M(Xor , unused) \
75 M(Shl , unused) \
76 M(Shr , unused) \
77 M(UShr , unused) \
78 M(Mul , unused) \
79 M(Div , unused) \
80 M(Condition , unused) \
81 M(Compare , unused) \
82 M(BoundsCheck , unused) \
83 M(InstanceFieldGet , unused) \
84 M(InstanceFieldSet , unused) \
85 M(InstanceOf , unused) \
86 M(Invoke , unused) \
87 M(LoadString , unused) \
88 M(NewArray , unused) \
89 M(NewInstance , unused) \
90 M(Rem , unused) \
91 M(StaticFieldGet , unused) \
92 M(StaticFieldSet , unused) \
93 M(SuspendCheck , unused) \
94 M(TypeConversion , unused)
95
96#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
97 M(BitwiseNegatedRight, unused) \
98 M(MultiplyAccumulate, unused) \
99 M(IntermediateAddress, unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +0100100 M(IntermediateAddressIndex, unused) \
xueliang.zhongf7caf682017-03-01 16:07:02 +0000101 M(DataProcWithShifterOp, unused)
102
103#define DECLARE_VISIT_INSTRUCTION(type, unused) \
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100104 void Visit##type(H##type* instruction) override;
xueliang.zhongf7caf682017-03-01 16:07:02 +0000105
106 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
107 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
108 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
109
110#undef DECLARE_VISIT_INSTRUCTION
111
112 private:
xueliang.zhongbf9e21a2017-06-15 11:01:11 +0100113 bool CanGenerateTest(HCondition* cond);
114 void HandleGenerateConditionWithZero(IfCondition cond);
115 void HandleGenerateLongTestConstant(HCondition* cond);
116 void HandleGenerateLongTest(HCondition* cond);
117 void HandleGenerateLongComparesAndJumps();
118 void HandleGenerateTest(HCondition* cond);
119 void HandleGenerateConditionGeneric(HCondition* cond);
120 void HandleGenerateEqualLong(HCondition* cond);
121 void HandleGenerateConditionLong(HCondition* cond);
122 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
123 void HandleCondition(HCondition* instr);
xueliang.zhongf7caf682017-03-01 16:07:02 +0000124 void HandleBinaryOperationLantencies(HBinaryOperation* instr);
125 void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
126 void HandleShiftLatencies(HBinaryOperation* instr);
127 void HandleDivRemConstantIntegralLatencies(int32_t imm);
128 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
129 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
130 void HandleGenerateDataProcInstruction(bool internal_latency = false);
131 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
132 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
133
134 // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
135 // latency visitors may query CodeGenerator for such information for accurate latency settings.
136 CodeGeneratorARMType* codegen_;
137};
138
139class HSchedulerARM : public HScheduler {
140 public:
Vladimir Markoced04832018-07-26 14:42:17 +0100141 HSchedulerARM(SchedulingNodeSelector* selector,
xueliang.zhongf7caf682017-03-01 16:07:02 +0000142 SchedulingLatencyVisitorARM* arm_latency_visitor)
Vladimir Markoced04832018-07-26 14:42:17 +0100143 : HScheduler(arm_latency_visitor, selector) {}
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100144 ~HSchedulerARM() override {}
xueliang.zhongf7caf682017-03-01 16:07:02 +0000145
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100146 bool IsSchedulable(const HInstruction* instruction) const override {
xueliang.zhongf7caf682017-03-01 16:07:02 +0000147#define CASE_INSTRUCTION_KIND(type, unused) case \
148 HInstruction::InstructionKind::k##type:
149 switch (instruction->GetKind()) {
150 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
151 return true;
152 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
153 return true;
154 default:
155 return HScheduler::IsSchedulable(instruction);
156 }
157#undef CASE_INSTRUCTION_KIND
158 }
159
160 private:
161 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
162};
163
164} // namespace arm
165} // namespace art
166
167#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_