blob: b89c4fe72e23f3102c21c87f98e3b774f3db3f13 [file] [log] [blame]
xueliang.zhongf7caf682017-03-01 16:07:02 +00001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
18#define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
19
xueliang.zhong72a75f72017-05-25 16:50:39 +010020#ifdef ART_USE_OLD_ARM_BACKEND
21#include "code_generator_arm.h"
22#else
xueliang.zhongf7caf682017-03-01 16:07:02 +000023#include "code_generator_arm_vixl.h"
xueliang.zhong72a75f72017-05-25 16:50:39 +010024#endif
xueliang.zhongf7caf682017-03-01 16:07:02 +000025#include "scheduler.h"
26
27namespace art {
28namespace arm {
29#ifdef ART_USE_OLD_ARM_BACKEND
30typedef CodeGeneratorARM CodeGeneratorARMType;
31#else
32typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
33#endif
34
35// AArch32 instruction latencies.
36// We currently assume that all ARM CPUs share the same instruction latency list.
37// The following latencies were tuned based on performance experiments and
38// automatic tuning using differential evolution approach on various benchmarks.
39static constexpr uint32_t kArmIntegerOpLatency = 2;
40static constexpr uint32_t kArmFloatingPointOpLatency = 11;
41static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4;
42static constexpr uint32_t kArmMulIntegerLatency = 6;
43static constexpr uint32_t kArmMulFloatingPointLatency = 11;
44static constexpr uint32_t kArmDivIntegerLatency = 10;
45static constexpr uint32_t kArmDivFloatLatency = 20;
46static constexpr uint32_t kArmDivDoubleLatency = 25;
47static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11;
48static constexpr uint32_t kArmMemoryLoadLatency = 9;
49static constexpr uint32_t kArmMemoryStoreLatency = 9;
50static constexpr uint32_t kArmMemoryBarrierLatency = 6;
51static constexpr uint32_t kArmBranchLatency = 4;
52static constexpr uint32_t kArmCallLatency = 5;
53static constexpr uint32_t kArmCallInternalLatency = 29;
54static constexpr uint32_t kArmLoadStringInternalLatency = 10;
55static constexpr uint32_t kArmNopLatency = 2;
56static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18;
57static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46;
58
59class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor {
60 public:
61 explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen)
62 : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {}
63
64 // Default visitor for instructions not handled specifically below.
65 void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) {
66 last_visited_latency_ = kArmIntegerOpLatency;
67 }
68
69// We add a second unused parameter to be able to use this macro like the others
70// defined in `nodes.h`.
71#define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M) \
72 M(ArrayGet , unused) \
73 M(ArrayLength , unused) \
74 M(ArraySet , unused) \
75 M(Add , unused) \
76 M(Sub , unused) \
77 M(And , unused) \
78 M(Or , unused) \
79 M(Ror , unused) \
80 M(Xor , unused) \
81 M(Shl , unused) \
82 M(Shr , unused) \
83 M(UShr , unused) \
84 M(Mul , unused) \
85 M(Div , unused) \
86 M(Condition , unused) \
87 M(Compare , unused) \
88 M(BoundsCheck , unused) \
89 M(InstanceFieldGet , unused) \
90 M(InstanceFieldSet , unused) \
91 M(InstanceOf , unused) \
92 M(Invoke , unused) \
93 M(LoadString , unused) \
94 M(NewArray , unused) \
95 M(NewInstance , unused) \
96 M(Rem , unused) \
97 M(StaticFieldGet , unused) \
98 M(StaticFieldSet , unused) \
99 M(SuspendCheck , unused) \
100 M(TypeConversion , unused)
101
102#define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
103 M(BitwiseNegatedRight, unused) \
104 M(MultiplyAccumulate, unused) \
105 M(IntermediateAddress, unused) \
Artem Serovf0fc4c62017-05-03 15:07:15 +0100106 M(IntermediateAddressIndex, unused) \
xueliang.zhongf7caf682017-03-01 16:07:02 +0000107 M(DataProcWithShifterOp, unused)
108
109#define DECLARE_VISIT_INSTRUCTION(type, unused) \
110 void Visit##type(H##type* instruction) OVERRIDE;
111
112 FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
113 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION)
114 FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION)
115
116#undef DECLARE_VISIT_INSTRUCTION
117
118 private:
xueliang.zhongbf9e21a2017-06-15 11:01:11 +0100119 bool CanGenerateTest(HCondition* cond);
120 void HandleGenerateConditionWithZero(IfCondition cond);
121 void HandleGenerateLongTestConstant(HCondition* cond);
122 void HandleGenerateLongTest(HCondition* cond);
123 void HandleGenerateLongComparesAndJumps();
124 void HandleGenerateTest(HCondition* cond);
125 void HandleGenerateConditionGeneric(HCondition* cond);
126 void HandleGenerateEqualLong(HCondition* cond);
127 void HandleGenerateConditionLong(HCondition* cond);
128 void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond);
129 void HandleCondition(HCondition* instr);
xueliang.zhongf7caf682017-03-01 16:07:02 +0000130 void HandleBinaryOperationLantencies(HBinaryOperation* instr);
131 void HandleBitwiseOperationLantencies(HBinaryOperation* instr);
132 void HandleShiftLatencies(HBinaryOperation* instr);
133 void HandleDivRemConstantIntegralLatencies(int32_t imm);
134 void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info);
135 void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info);
136 void HandleGenerateDataProcInstruction(bool internal_latency = false);
137 void HandleGenerateDataProc(HDataProcWithShifterOp* instruction);
138 void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction);
139
140 // The latency setting for each HInstruction depends on how CodeGenerator may generate code,
141 // latency visitors may query CodeGenerator for such information for accurate latency settings.
142 CodeGeneratorARMType* codegen_;
143};
144
145class HSchedulerARM : public HScheduler {
146 public:
147 HSchedulerARM(ArenaAllocator* arena,
148 SchedulingNodeSelector* selector,
149 SchedulingLatencyVisitorARM* arm_latency_visitor)
150 : HScheduler(arena, arm_latency_visitor, selector) {}
151 ~HSchedulerARM() OVERRIDE {}
152
153 bool IsSchedulable(const HInstruction* instruction) const OVERRIDE {
154#define CASE_INSTRUCTION_KIND(type, unused) case \
155 HInstruction::InstructionKind::k##type:
156 switch (instruction->GetKind()) {
157 FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND)
158 return true;
159 FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND)
160 return true;
161 default:
162 return HScheduler::IsSchedulable(instruction);
163 }
164#undef CASE_INSTRUCTION_KIND
165 }
166
167 private:
168 DISALLOW_COPY_AND_ASSIGN(HSchedulerARM);
169};
170
171} // namespace arm
172} // namespace art
173
174#endif // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_