blob: 4c7a3bb4d6e46e6c95af29a0d0f96448efff3b96 [file] [log] [blame]
xueliang.zhongf7caf682017-03-01 16:07:02 +00001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19
20#include "code_generator_arm_vixl.h"
xueliang.zhongf7caf682017-03-01 16:07:02 +000021#include "scheduler.h"
22
23namespace art {
24namespace arm {
Roland Levillain9983e302017-07-14 14:34:22 +010025// TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere?
xueliang.zhongf7caf682017-03-01 16:07:02 +000026typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
xueliang.zhongf7caf682017-03-01 16:07:02 +000027
28// AArch32 instruction latencies.
29// We currently assume that all ARM CPUs share the same instruction latency list.
30// The following latencies were tuned based on performance experiments and
31// automatic tuning using differential evolution approach on various benchmarks.
32static constexpr uint32_t kArmIntegerOpLatency = 2;
33static constexpr uint32_t kArmFloatingPointOpLatency = 11;
34static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
35static constexpr uint32_t kArmMulIntegerLatency = 6;
36static constexpr uint32_t kArmMulFloatingPointLatency = 11;
37static constexpr uint32_t kArmDivIntegerLatency = 10;
38static constexpr uint32_t kArmDivFloatLatency = 20;
39static constexpr uint32_t kArmDivDoubleLatency = 25;
40static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
41static constexpr uint32_t kArmMemoryLoadLatency = 9;
42static constexpr uint32_t kArmMemoryStoreLatency = 9;
43static constexpr uint32_t kArmMemoryBarrierLatency = 6;
44static constexpr uint32_t kArmBranchLatency = 4;
45static constexpr uint32_t kArmCallLatency = 5;
46static constexpr uint32_t kArmCallInternalLatency = 29;
47static constexpr uint32_t kArmLoadStringInternalLatency = 10;
48static constexpr uint32_t kArmNopLatency = 2;
49static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
50static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
51
52class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
53 public:
54 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
55 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
56
57 // Default visitor for instructions not handled specifically below.
Yi Kong39402542019-03-24 02:47:16 -070058 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override {
xueliang.zhongf7caf682017-03-01 16:07:02 +000059 last_visited_latency_ = kArmIntegerOpLatency;
60 }
61
62// We add a second unused parameter to be able to use this macro like the others
63// defined in `nodes.h`.
64#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
65 M(ArrayGet , unused) \
66 M(ArrayLength , unused) \
67 M(ArraySet , unused) \
68 M(Add , unused) \
69 M(Sub , unused) \
70 M(And , unused) \
71 M(Or , unused) \
72 M(Ror , unused) \
73 M(Xor , unused) \
74 M(Shl , unused) \
75 M(Shr , unused) \
76 M(UShr , unused) \
77 M(Mul , unused) \
78 M(Div , unused) \
79 M(Condition , unused) \
80 M(Compare , unused) \
81 M(BoundsCheck , unused) \
82 M(InstanceFieldGet , unused) \
83 M(InstanceFieldSet , unused) \
84 M(InstanceOf , unused) \
85 M(Invoke , unused) \
86 M(LoadString , unused) \
87 M(NewArray , unused) \
88 M(NewInstance , unused) \
89 M(Rem , unused) \
90 M(StaticFieldGet , unused) \
91 M(StaticFieldSet , unused) \
92 M(SuspendCheck , unused) \
93 M(TypeConversion , unused)
94
95#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
96 M(BitwiseNegatedRight, unused) \
97 M(MultiplyAccumulate, unused) \
98 M(IntermediateAddress, unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +010099 M(IntermediateAddressIndex, unused) \
xueliang.zhongf7caf682017-03-01 16:07:02 +0000100 M(DataProcWithShifterOp, unused)
101
102#define DECLARE_VISIT_INSTRUCTION(type, unused) \
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100103 void Visit##type(H##type* instruction) override;
xueliang.zhongf7caf682017-03-01 16:07:02 +0000104
105 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
106 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
107 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
108
109#undef DECLARE_VISIT_INSTRUCTION
110
111 private:
xueliang.zhongbf9e21a2017-06-15 11:01:11 +0100112 bool CanGenerateTest(HCondition* cond);
113 void HandleGenerateConditionWithZero(IfCondition cond);
114 void HandleGenerateLongTestConstant(HCondition* cond);
115 void HandleGenerateLongTest(HCondition* cond);
116 void HandleGenerateLongComparesAndJumps();
117 void HandleGenerateTest(HCondition* cond);
118 void HandleGenerateConditionGeneric(HCondition* cond);
119 void HandleGenerateEqualLong(HCondition* cond);
120 void HandleGenerateConditionLong(HCondition* cond);
121 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
122 void HandleCondition(HCondition* instr);
xueliang.zhongf7caf682017-03-01 16:07:02 +0000123 void HandleBinaryOperationLantencies(HBinaryOperation* instr);
124 void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
125 void HandleShiftLatencies(HBinaryOperation* instr);
126 void HandleDivRemConstantIntegralLatencies(int32_t imm);
127 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
128 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
129 void HandleGenerateDataProcInstruction(bool internal_latency = false);
130 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
131 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
132
133 // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
134 // latency visitors may query CodeGenerator for such information for accurate latency settings.
135 CodeGeneratorARMType* codegen_;
136};
137
138class HSchedulerARM : public HScheduler {
139 public:
Vladimir Markoced04832018-07-26 14:42:17 +0100140 HSchedulerARM(SchedulingNodeSelector* selector,
xueliang.zhongf7caf682017-03-01 16:07:02 +0000141 SchedulingLatencyVisitorARM* arm_latency_visitor)
Vladimir Markoced04832018-07-26 14:42:17 +0100142 : HScheduler(arm_latency_visitor, selector) {}
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100143 ~HSchedulerARM() override {}
xueliang.zhongf7caf682017-03-01 16:07:02 +0000144
Roland Levillainbbc6e7e2018-08-24 16:58:47 +0100145 bool IsSchedulable(const HInstruction* instruction) const override {
xueliang.zhongf7caf682017-03-01 16:07:02 +0000146#define CASE_INSTRUCTION_KIND(type, unused) case \
147 HInstruction::InstructionKind::k##type:
148 switch (instruction->GetKind()) {
149 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
150 return true;
151 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
152 return true;
153 default:
154 return HScheduler::IsSchedulable(instruction);
155 }
156#undef CASE_INSTRUCTION_KIND
157 }
158
159 private:
160 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
161};
162
163} // namespace arm
164} // namespace art
165
166#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_