| /* | 
 |  * Copyright (C) 2016 The Android Open Source Project | 
 |  * | 
 |  * Licensed under the Apache License, Version 2.0 (the "License"); | 
 |  * you may not use this file except in compliance with the License. | 
 |  * You may obtain a copy of the License at | 
 |  * | 
 |  *      http://www.apache.org/licenses/LICENSE-2.0 | 
 |  * | 
 |  * Unless required by applicable law or agreed to in writing, software | 
 |  * distributed under the License is distributed on an "AS IS" BASIS, | 
 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 |  * See the License for the specific language governing permissions and | 
 |  * limitations under the License. | 
 |  */ | 
 |  | 
 | #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ | 
 | #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ | 
 |  | 
 | #include "scheduler.h" | 
 |  | 
 | namespace art { | 
 | namespace arm64 { | 
 |  | 
 | static constexpr uint32_t kArm64MemoryLoadLatency = 5; | 
 | static constexpr uint32_t kArm64MemoryStoreLatency = 3; | 
 |  | 
 | static constexpr uint32_t kArm64CallInternalLatency = 10; | 
 | static constexpr uint32_t kArm64CallLatency = 5; | 
 |  | 
 | // AArch64 instruction latency. | 
 | // We currently assume that all arm64 CPUs share the same instruction latency list. | 
 | static constexpr uint32_t kArm64IntegerOpLatency = 2; | 
 | static constexpr uint32_t kArm64FloatingPointOpLatency = 5; | 
 |  | 
 |  | 
 | static constexpr uint32_t kArm64DataProcWithShifterOpLatency = 3; | 
 | static constexpr uint32_t kArm64DivDoubleLatency = 30; | 
 | static constexpr uint32_t kArm64DivFloatLatency = 15; | 
 | static constexpr uint32_t kArm64DivIntegerLatency = 5; | 
 | static constexpr uint32_t kArm64LoadStringInternalLatency = 7; | 
 | static constexpr uint32_t kArm64MulFloatingPointLatency = 6; | 
 | static constexpr uint32_t kArm64MulIntegerLatency = 6; | 
 | static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5; | 
 | static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency; | 
 |  | 
 | static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10; | 
 | static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6; | 
 | static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10; | 
 | static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6; | 
 | static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12; | 
 | static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12; | 
 | static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16; | 
 | static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60; | 
 | static constexpr uint32_t kArm64SIMDDivFloatLatency = 30; | 
 | static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10; | 
 |  | 
 | class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor { | 
 |  public: | 
 |   // Default visitor for instructions not handled specifically below. | 
 |   void VisitInstruction(HInstruction* ATTRIBUTE_UNUSED) override { | 
 |     last_visited_latency_ = kArm64IntegerOpLatency; | 
 |   } | 
 |  | 
 | // We add a second unused parameter to be able to use this macro like the others | 
 | // defined in `nodes.h`. | 
 | #define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \ | 
 |   M(ArrayGet             , unused)                   \ | 
 |   M(ArrayLength          , unused)                   \ | 
 |   M(ArraySet             , unused)                   \ | 
 |   M(BoundsCheck          , unused)                   \ | 
 |   M(Div                  , unused)                   \ | 
 |   M(InstanceFieldGet     , unused)                   \ | 
 |   M(InstanceOf           , unused)                   \ | 
 |   M(LoadString           , unused)                   \ | 
 |   M(Mul                  , unused)                   \ | 
 |   M(NewArray             , unused)                   \ | 
 |   M(NewInstance          , unused)                   \ | 
 |   M(Rem                  , unused)                   \ | 
 |   M(StaticFieldGet       , unused)                   \ | 
 |   M(SuspendCheck         , unused)                   \ | 
 |   M(TypeConversion       , unused)                   \ | 
 |   M(VecReplicateScalar   , unused)                   \ | 
 |   M(VecExtractScalar     , unused)                   \ | 
 |   M(VecReduce            , unused)                   \ | 
 |   M(VecCnv               , unused)                   \ | 
 |   M(VecNeg               , unused)                   \ | 
 |   M(VecAbs               , unused)                   \ | 
 |   M(VecNot               , unused)                   \ | 
 |   M(VecAdd               , unused)                   \ | 
 |   M(VecHalvingAdd        , unused)                   \ | 
 |   M(VecSub               , unused)                   \ | 
 |   M(VecMul               , unused)                   \ | 
 |   M(VecDiv               , unused)                   \ | 
 |   M(VecMin               , unused)                   \ | 
 |   M(VecMax               , unused)                   \ | 
 |   M(VecAnd               , unused)                   \ | 
 |   M(VecAndNot            , unused)                   \ | 
 |   M(VecOr                , unused)                   \ | 
 |   M(VecXor               , unused)                   \ | 
 |   M(VecShl               , unused)                   \ | 
 |   M(VecShr               , unused)                   \ | 
 |   M(VecUShr              , unused)                   \ | 
 |   M(VecSetScalars        , unused)                   \ | 
 |   M(VecMultiplyAccumulate, unused)                   \ | 
 |   M(VecLoad              , unused)                   \ | 
 |   M(VecStore             , unused) | 
 |  | 
 | #define FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(M)   \ | 
 |   M(BinaryOperation      , unused)                   \ | 
 |   M(Invoke               , unused) | 
 |  | 
 | #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \ | 
 |   M(BitwiseNegatedRight, unused)                 \ | 
 |   M(MultiplyAccumulate, unused)                  \ | 
 |   M(IntermediateAddress, unused)                 \ | 
 |   M(IntermediateAddressIndex, unused)            \ | 
 |   M(DataProcWithShifterOp, unused) | 
 |  | 
 | #define DECLARE_VISIT_INSTRUCTION(type, unused)  \ | 
 |   void Visit##type(H##type* instruction) override; | 
 |  | 
 |   FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) | 
 |   FOR_EACH_SCHEDULED_ABSTRACT_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) | 
 |   FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(DECLARE_VISIT_INSTRUCTION) | 
 |   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION) | 
 |  | 
 | #undef DECLARE_VISIT_INSTRUCTION | 
 |  | 
 |  private: | 
 |   void HandleSimpleArithmeticSIMD(HVecOperation *instr); | 
 |   void HandleVecAddress(HVecMemoryOperation* instruction, size_t size); | 
 | }; | 
 |  | 
 | class HSchedulerARM64 : public HScheduler { | 
 |  public: | 
 |   explicit HSchedulerARM64(SchedulingNodeSelector* selector) | 
 |       : HScheduler(&arm64_latency_visitor_, selector) {} | 
 |   ~HSchedulerARM64() override {} | 
 |  | 
 |   bool IsSchedulable(const HInstruction* instruction) const override { | 
 | #define CASE_INSTRUCTION_KIND(type, unused) case \ | 
 |   HInstruction::InstructionKind::k##type: | 
 |     switch (instruction->GetKind()) { | 
 |       FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(CASE_INSTRUCTION_KIND) | 
 |         return true; | 
 |       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND) | 
 |         return true; | 
 |       FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND) | 
 |         return true; | 
 |       default: | 
 |         return HScheduler::IsSchedulable(instruction); | 
 |     } | 
 | #undef CASE_INSTRUCTION_KIND | 
 |   } | 
 |  | 
 |   // Treat as scheduling barriers those vector instructions whose live ranges exceed the vectorized | 
 |   // loop boundaries. This is a workaround for the lack of notion of SIMD register in the compiler; | 
 |   // around a call we have to save/restore all live SIMD&FP registers (only lower 64 bits of | 
 |   // SIMD&FP registers are callee saved) so don't reorder such vector instructions. | 
 |   // | 
 |   // TODO: remove this when a proper support of SIMD registers is introduced to the compiler. | 
 |   bool IsSchedulingBarrier(const HInstruction* instr) const override { | 
 |     return HScheduler::IsSchedulingBarrier(instr) || | 
 |            instr->IsVecReduce() || | 
 |            instr->IsVecExtractScalar() || | 
 |            instr->IsVecSetScalars() || | 
 |            instr->IsVecReplicateScalar(); | 
 |   } | 
 |  | 
 |  private: | 
 |   SchedulingLatencyVisitorARM64 arm64_latency_visitor_; | 
 |   DISALLOW_COPY_AND_ASSIGN(HSchedulerARM64); | 
 | }; | 
 |  | 
 | }  // namespace arm64 | 
 | }  // namespace art | 
 |  | 
 | #endif  // ART_COMPILER_OPTIMIZING_SCHEDULER_ARM64_H_ |