| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * Copyright (C) 2017 The Android Open Source Project | 
 | 3 |  * | 
 | 4 |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
 | 5 |  * you may not use this file except in compliance with the License. | 
 | 6 |  * You may obtain a copy of the License at | 
 | 7 |  * | 
 | 8 |  *      http://www.apache.org/licenses/LICENSE-2.0 | 
 | 9 |  * | 
 | 10 |  * Unless required by applicable law or agreed to in writing, software | 
 | 11 |  * distributed under the License is distributed on an "AS IS" BASIS, | 
 | 12 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | 13 |  * See the License for the specific language governing permissions and | 
 | 14 |  * limitations under the License. | 
 | 15 |  */ | 
 | 16 |  | 
 | 17 | #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ | 
 | 18 | #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ | 
 | 19 |  | 
 | 20 | #include "code_generator_arm_vixl.h" | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 21 | #include "scheduler.h" | 
 | 22 |  | 
| Vladimir Marko | 0a51605 | 2019-10-14 13:00:44 +0000 | [diff] [blame] | 23 | namespace art { | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 24 | namespace arm { | 
| Roland Levillain | 9983e30 | 2017-07-14 14:34:22 +0100 | [diff] [blame] | 25 | // TODO: Replace CodeGeneratorARMType with CodeGeneratorARMVIXL everywhere? | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 26 | typedef CodeGeneratorARMVIXL CodeGeneratorARMType; | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 27 |  | 
 | 28 | // AArch32 instruction latencies. | 
 | 29 | // We currently assume that all ARM CPUs share the same instruction latency list. | 
 | 30 | // The following latencies were tuned based on performance experiments and | 
 | 31 | // automatic tuning using differential evolution approach on various benchmarks. | 
 | 32 | static constexpr uint32_t kArmIntegerOpLatency = 2; | 
 | 33 | static constexpr uint32_t kArmFloatingPointOpLatency = 11; | 
 | 34 | static constexpr uint32_t kArmDataProcWithShifterOpLatency = 4; | 
 | 35 | static constexpr uint32_t kArmMulIntegerLatency = 6; | 
 | 36 | static constexpr uint32_t kArmMulFloatingPointLatency = 11; | 
 | 37 | static constexpr uint32_t kArmDivIntegerLatency = 10; | 
 | 38 | static constexpr uint32_t kArmDivFloatLatency = 20; | 
 | 39 | static constexpr uint32_t kArmDivDoubleLatency = 25; | 
 | 40 | static constexpr uint32_t kArmTypeConversionFloatingPointIntegerLatency = 11; | 
 | 41 | static constexpr uint32_t kArmMemoryLoadLatency = 9; | 
 | 42 | static constexpr uint32_t kArmMemoryStoreLatency = 9; | 
 | 43 | static constexpr uint32_t kArmMemoryBarrierLatency = 6; | 
 | 44 | static constexpr uint32_t kArmBranchLatency = 4; | 
 | 45 | static constexpr uint32_t kArmCallLatency = 5; | 
 | 46 | static constexpr uint32_t kArmCallInternalLatency = 29; | 
 | 47 | static constexpr uint32_t kArmLoadStringInternalLatency = 10; | 
 | 48 | static constexpr uint32_t kArmNopLatency = 2; | 
 | 49 | static constexpr uint32_t kArmLoadWithBakerReadBarrierLatency = 18; | 
 | 50 | static constexpr uint32_t kArmRuntimeTypeCheckLatency = 46; | 
 | 51 |  | 
 | 52 | class SchedulingLatencyVisitorARM : public SchedulingLatencyVisitor { | 
 | 53 |  public: | 
 | 54 |   explicit SchedulingLatencyVisitorARM(CodeGenerator* codegen) | 
 | 55 |       : codegen_(down_cast<CodeGeneratorARMType*>(codegen)) {} | 
 | 56 |  | 
 | 57 |   // Default visitor for instructions not handled specifically below. | 
| Yi Kong | 3940254 | 2019-03-24 02:47:16 -0700 | [diff] [blame] | 58 |   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 59 |     last_visited_latency_ = kArmIntegerOpLatency; | 
 | 60 |   } | 
 | 61 |  | 
 | 62 | // We add a second unused parameter to be able to use this macro like the others | 
 | 63 | // defined in `nodes.h`. | 
 | 64 | #define FOR_EACH_SCHEDULED_ARM_INSTRUCTION(M)    \ | 
 | 65 |   M(ArrayGet         , unused)                   \ | 
 | 66 |   M(ArrayLength      , unused)                   \ | 
 | 67 |   M(ArraySet         , unused)                   \ | 
 | 68 |   M(Add              , unused)                   \ | 
 | 69 |   M(Sub              , unused)                   \ | 
 | 70 |   M(And              , unused)                   \ | 
 | 71 |   M(Or               , unused)                   \ | 
 | 72 |   M(Ror              , unused)                   \ | 
 | 73 |   M(Xor              , unused)                   \ | 
 | 74 |   M(Shl              , unused)                   \ | 
 | 75 |   M(Shr              , unused)                   \ | 
 | 76 |   M(UShr             , unused)                   \ | 
 | 77 |   M(Mul              , unused)                   \ | 
 | 78 |   M(Div              , unused)                   \ | 
 | 79 |   M(Condition        , unused)                   \ | 
 | 80 |   M(Compare          , unused)                   \ | 
 | 81 |   M(BoundsCheck      , unused)                   \ | 
 | 82 |   M(InstanceFieldGet , unused)                   \ | 
 | 83 |   M(InstanceFieldSet , unused)                   \ | 
 | 84 |   M(InstanceOf       , unused)                   \ | 
 | 85 |   M(Invoke           , unused)                   \ | 
 | 86 |   M(LoadString       , unused)                   \ | 
 | 87 |   M(NewArray         , unused)                   \ | 
 | 88 |   M(NewInstance      , unused)                   \ | 
 | 89 |   M(Rem              , unused)                   \ | 
 | 90 |   M(StaticFieldGet   , unused)                   \ | 
 | 91 |   M(StaticFieldSet   , unused)                   \ | 
 | 92 |   M(SuspendCheck     , unused)                   \ | 
 | 93 |   M(TypeConversion   , unused) | 
 | 94 |  | 
 | 95 | #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ | 
 | 96 |   M(BitwiseNegatedRight, unused)                 \ | 
 | 97 |   M(MultiplyAccumulate, unused)                  \ | 
 | 98 |   M(IntermediateAddress, unused)                 \ | 
| Artem Serov | f0fc4c6 | 2017-05-03 15:07:15 +0100 | [diff] [blame] | 99 |   M(IntermediateAddressIndex, unused)            \ | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 100 |   M(DataProcWithShifterOp, unused) | 
 | 101 |  | 
 | 102 | #define DECLARE_VISIT_INSTRUCTION(type, unused)  \ | 
| Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 103 |   void Visit##type(H##type* instruction) override; | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 104 |  | 
 | 105 |   FOR_EACH_SCHEDULED_ARM_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) | 
 | 106 |   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) | 
 | 107 |   FOR_EACH_CONCRETE_INSTRUCTION_ARM(DECLARE_VISIT_INSTRUCTION) | 
 | 108 |  | 
 | 109 | #undef DECLARE_VISIT_INSTRUCTION | 
 | 110 |  | 
 | 111 |  private: | 
| xueliang.zhong | bf9e21a | 2017-06-15 11:01:11 +0100 | [diff] [blame] | 112 |   bool CanGenerateTest(HCondition* cond); | 
 | 113 |   void HandleGenerateConditionWithZero(IfCondition cond); | 
 | 114 |   void HandleGenerateLongTestConstant(HCondition* cond); | 
 | 115 |   void HandleGenerateLongTest(HCondition* cond); | 
 | 116 |   void HandleGenerateLongComparesAndJumps(); | 
 | 117 |   void HandleGenerateTest(HCondition* cond); | 
 | 118 |   void HandleGenerateConditionGeneric(HCondition* cond); | 
 | 119 |   void HandleGenerateEqualLong(HCondition* cond); | 
 | 120 |   void HandleGenerateConditionLong(HCondition* cond); | 
 | 121 |   void HandleGenerateConditionIntegralOrNonPrimitive(HCondition* cond); | 
 | 122 |   void HandleCondition(HCondition* instr); | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 123 |   void HandleBinaryOperationLantencies(HBinaryOperation* instr); | 
 | 124 |   void HandleBitwiseOperationLantencies(HBinaryOperation* instr); | 
 | 125 |   void HandleShiftLatencies(HBinaryOperation* instr); | 
 | 126 |   void HandleDivRemConstantIntegralLatencies(int32_t imm); | 
 | 127 |   void HandleFieldSetLatencies(HInstruction* instruction, const FieldInfo& field_info); | 
 | 128 |   void HandleFieldGetLatencies(HInstruction* instruction, const FieldInfo& field_info); | 
 | 129 |   void HandleGenerateDataProcInstruction(bool internal_latency = false); | 
 | 130 |   void HandleGenerateDataProc(HDataProcWithShifterOp* instruction); | 
 | 131 |   void HandleGenerateLongDataProc(HDataProcWithShifterOp* instruction); | 
 | 132 |  | 
 | 133 |   // The latency setting for each HInstruction depends on how CodeGenerator may generate code, | 
 | 134 |   // latency visitors may query CodeGenerator for such information for accurate latency settings. | 
 | 135 |   CodeGeneratorARMType* codegen_; | 
 | 136 | }; | 
 | 137 |  | 
 | 138 | class HSchedulerARM : public HScheduler { | 
 | 139 |  public: | 
| Vladimir Marko | ced0483 | 2018-07-26 14:42:17 +0100 | [diff] [blame] | 140 |   HSchedulerARM(SchedulingNodeSelector* selector, | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 141 |                 SchedulingLatencyVisitorARM* arm_latency_visitor) | 
| Vladimir Marko | ced0483 | 2018-07-26 14:42:17 +0100 | [diff] [blame] | 142 |       : HScheduler(arm_latency_visitor, selector) {} | 
| Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 143 |   ~HSchedulerARM() override {} | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 144 |  | 
| Roland Levillain | bbc6e7e | 2018-08-24 16:58:47 +0100 | [diff] [blame] | 145 |   bool IsSchedulable(const HInstruction* instruction) const override { | 
| xueliang.zhong | f7caf68 | 2017-03-01 16:07:02 +0000 | [diff] [blame] | 146 | #define CASE_INSTRUCTION_KIND(type, unused) case \ | 
 | 147 |   HInstruction::InstructionKind::k##type: | 
 | 148 |     switch (instruction->GetKind()) { | 
 | 149 |       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) | 
 | 150 |         return true; | 
 | 151 |       FOR_EACH_CONCRETE_INSTRUCTION_ARM(CASE_INSTRUCTION_KIND) | 
 | 152 |         return true; | 
 | 153 |       default: | 
 | 154 |         return HScheduler::IsSchedulable(instruction); | 
 | 155 |     } | 
 | 156 | #undef CASE_INSTRUCTION_KIND | 
 | 157 |   } | 
 | 158 |  | 
 | 159 |  private: | 
 | 160 |   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM); | 
 | 161 | }; | 
 | 162 |  | 
 | 163 | }  // namespace arm | 
 | 164 | }  // namespace art | 
 | 165 |  | 
 | 166 | #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_ |