blob: 6e6e3873f967062fe7dfdff826e569015014922a [file] [log] [blame]
Aart Bik281c6812016-08-26 11:31:48 -07001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
18#define ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_
19
Aart Bik281c6812016-08-26 11:31:48 -070020#include "induction_var_range.h"
21#include "nodes.h"
22#include "optimization.h"
23
24namespace art {
25
Aart Bik92685a82017-03-06 11:13:43 -080026class CompilerDriver;
27
Aart Bik281c6812016-08-26 11:31:48 -070028/**
29 * Loop optimizations. Builds a loop hierarchy and applies optimizations to
Aart Bikf8f5a162017-02-06 15:35:29 -080030 * the detected nested loops, such as removal of dead induction and empty loops
31 * and inner loop vectorization.
Aart Bik281c6812016-08-26 11:31:48 -070032 */
33class HLoopOptimization : public HOptimization {
34 public:
Aart Bik92685a82017-03-06 11:13:43 -080035 HLoopOptimization(HGraph* graph,
36 CompilerDriver* compiler_driver,
Aart Bikb92cc332017-09-06 15:53:17 -070037 HInductionVarAnalysis* induction_analysis,
38 OptimizingCompilerStats* stats);
Aart Bik281c6812016-08-26 11:31:48 -070039
40 void Run() OVERRIDE;
41
42 static constexpr const char* kLoopOptimizationPassName = "loop_optimization";
43
44 private:
45 /**
46 * A single loop inside the loop hierarchy representation.
47 */
Aart Bik96202302016-10-04 17:33:56 -070048 struct LoopNode : public ArenaObject<kArenaAllocLoopOptimization> {
Aart Bik281c6812016-08-26 11:31:48 -070049 explicit LoopNode(HLoopInformation* lp_info)
50 : loop_info(lp_info),
51 outer(nullptr),
52 inner(nullptr),
53 previous(nullptr),
54 next(nullptr) {}
Aart Bikf8f5a162017-02-06 15:35:29 -080055 HLoopInformation* loop_info;
Aart Bik281c6812016-08-26 11:31:48 -070056 LoopNode* outer;
57 LoopNode* inner;
58 LoopNode* previous;
59 LoopNode* next;
60 };
61
Aart Bikf8f5a162017-02-06 15:35:29 -080062 /*
63 * Vectorization restrictions (bit mask).
64 */
65 enum VectorRestrictions {
Aart Bik0148de42017-09-05 09:25:01 -070066 kNone = 0, // no restrictions
67 kNoMul = 1 << 0, // no multiplication
68 kNoDiv = 1 << 1, // no division
69 kNoShift = 1 << 2, // no shift
70 kNoShr = 1 << 3, // no arithmetic shift right
71 kNoHiBits = 1 << 4, // "wider" operations cannot bring in higher order bits
72 kNoSignedHAdd = 1 << 5, // no signed halving add
73 kNoUnroundedHAdd = 1 << 6, // no unrounded halving add
74 kNoAbs = 1 << 7, // no absolute value
75 kNoMinMax = 1 << 8, // no min/max
76 kNoStringCharAt = 1 << 9, // no StringCharAt
77 kNoReduction = 1 << 10, // no reduction
Aart Bikdbbac8f2017-09-01 13:06:08 -070078 kNoSAD = 1 << 11, // no sum of absolute differences (SAD)
Aart Bikf8f5a162017-02-06 15:35:29 -080079 };
Aart Bik96202302016-10-04 17:33:56 -070080
Aart Bikf8f5a162017-02-06 15:35:29 -080081 /*
82 * Vectorization mode during synthesis
83 * (sequential peeling/cleanup loop or vector loop).
84 */
85 enum VectorMode {
86 kSequential,
87 kVector
88 };
89
90 /*
91 * Representation of a unit-stride array reference.
92 */
93 struct ArrayReference {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +010094 ArrayReference(HInstruction* b, HInstruction* o, DataType::Type t, bool l)
Aart Bikf8f5a162017-02-06 15:35:29 -080095 : base(b), offset(o), type(t), lhs(l) { }
96 bool operator<(const ArrayReference& other) const {
97 return
98 (base < other.base) ||
99 (base == other.base &&
100 (offset < other.offset || (offset == other.offset &&
101 (type < other.type ||
102 (type == other.type && lhs < other.lhs)))));
103 }
104 HInstruction* base; // base address
105 HInstruction* offset; // offset + i
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100106 DataType::Type type; // component type
Aart Bikf8f5a162017-02-06 15:35:29 -0800107 bool lhs; // def/use
108 };
109
Aart Bikb29f6842017-07-28 15:58:41 -0700110 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800111 // Loop setup and traversal.
Aart Bikb29f6842017-07-28 15:58:41 -0700112 //
113
Aart Bikf8f5a162017-02-06 15:35:29 -0800114 void LocalRun();
Aart Bik281c6812016-08-26 11:31:48 -0700115 void AddLoop(HLoopInformation* loop_info);
116 void RemoveLoop(LoopNode* node);
Aart Bik281c6812016-08-26 11:31:48 -0700117
Aart Bikb29f6842017-07-28 15:58:41 -0700118 // Traverses all loops inner to outer to perform simplifications and optimizations.
119 // Returns true if loops nested inside current loop (node) have changed.
120 bool TraverseLoopsInnerToOuter(LoopNode* node);
121
122 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800123 // Optimization.
Aart Bikb29f6842017-07-28 15:58:41 -0700124 //
125
Aart Bik281c6812016-08-26 11:31:48 -0700126 void SimplifyInduction(LoopNode* node);
Aart Bik482095d2016-10-10 15:39:10 -0700127 void SimplifyBlocks(LoopNode* node);
Aart Bikf8f5a162017-02-06 15:35:29 -0800128
Aart Bikb29f6842017-07-28 15:58:41 -0700129 // Performs optimizations specific to inner loop (empty loop removal,
130 // unrolling, vectorization). Returns true if anything changed.
131 bool OptimizeInnerLoop(LoopNode* node);
132
133 //
Aart Bikf8f5a162017-02-06 15:35:29 -0800134 // Vectorization analysis and synthesis.
Aart Bikb29f6842017-07-28 15:58:41 -0700135 //
136
Aart Bik14a68b42017-06-08 14:06:58 -0700137 bool ShouldVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800138 void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
139 void GenerateNewLoop(LoopNode* node,
140 HBasicBlock* block,
141 HBasicBlock* new_preheader,
142 HInstruction* lo,
143 HInstruction* hi,
Aart Bik14a68b42017-06-08 14:06:58 -0700144 HInstruction* step,
145 uint32_t unroll);
Aart Bikf8f5a162017-02-06 15:35:29 -0800146 bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
147 bool VectorizeUse(LoopNode* node,
148 HInstruction* instruction,
149 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100150 DataType::Type type,
Aart Bikf8f5a162017-02-06 15:35:29 -0800151 uint64_t restrictions);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100152 bool TrySetVectorType(DataType::Type type, /*out*/ uint64_t* restrictions);
Aart Bikf8f5a162017-02-06 15:35:29 -0800153 bool TrySetVectorLength(uint32_t length);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100154 void GenerateVecInv(HInstruction* org, DataType::Type type);
Aart Bik14a68b42017-06-08 14:06:58 -0700155 void GenerateVecSub(HInstruction* org, HInstruction* offset);
Aart Bikf8f5a162017-02-06 15:35:29 -0800156 void GenerateVecMem(HInstruction* org,
157 HInstruction* opa,
158 HInstruction* opb,
Aart Bik14a68b42017-06-08 14:06:58 -0700159 HInstruction* offset,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100160 DataType::Type type);
Aart Bik0148de42017-09-05 09:25:01 -0700161 void GenerateVecReductionPhi(HPhi* phi);
162 void GenerateVecReductionPhiInputs(HPhi* phi, HInstruction* reduction);
163 HInstruction* ReduceAndExtractIfNeeded(HInstruction* instruction);
Aart Bik304c8a52017-05-23 11:01:13 -0700164 void GenerateVecOp(HInstruction* org,
165 HInstruction* opa,
166 HInstruction* opb,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100167 DataType::Type type,
Aart Bik304c8a52017-05-23 11:01:13 -0700168 bool is_unsigned = false);
Aart Bik281c6812016-08-26 11:31:48 -0700169
Aart Bikf3e61ee2017-04-12 17:09:20 -0700170 // Vectorization idioms.
171 bool VectorizeHalvingAddIdiom(LoopNode* node,
172 HInstruction* instruction,
173 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100174 DataType::Type type,
Aart Bikf3e61ee2017-04-12 17:09:20 -0700175 uint64_t restrictions);
Aart Bikdbbac8f2017-09-01 13:06:08 -0700176 bool VectorizeSADIdiom(LoopNode* node,
177 HInstruction* instruction,
178 bool generate_code,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100179 DataType::Type type,
Aart Bikdbbac8f2017-09-01 13:06:08 -0700180 uint64_t restrictions);
Aart Bikf3e61ee2017-04-12 17:09:20 -0700181
Aart Bik14a68b42017-06-08 14:06:58 -0700182 // Vectorization heuristics.
183 bool IsVectorizationProfitable(int64_t trip_count);
Aart Bikb29f6842017-07-28 15:58:41 -0700184 void SetPeelingCandidate(const ArrayReference* candidate, int64_t trip_count);
Aart Bik14a68b42017-06-08 14:06:58 -0700185 uint32_t GetUnrollingFactor(HBasicBlock* block, int64_t trip_count);
186
Aart Bikb29f6842017-07-28 15:58:41 -0700187 //
Aart Bik6b69e0a2017-01-11 10:20:43 -0800188 // Helpers.
Aart Bikb29f6842017-07-28 15:58:41 -0700189 //
190
Aart Bikf8f5a162017-02-06 15:35:29 -0800191 bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
Aart Bikb29f6842017-07-28 15:58:41 -0700192 bool TrySetPhiReduction(HPhi* phi);
193
194 // Detects loop header with a single induction (returned in main_phi), possibly
195 // other phis for reductions, but no other side effects. Returns true on success.
196 bool TrySetSimpleLoopHeader(HBasicBlock* block, /*out*/ HPhi** main_phi);
197
Aart Bikcc42be02016-10-20 16:14:16 -0700198 bool IsEmptyBody(HBasicBlock* block);
Aart Bik482095d2016-10-10 15:39:10 -0700199 bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
Aart Bik8c4a8542016-10-06 11:36:57 -0700200 HInstruction* instruction,
Aart Bik6b69e0a2017-01-11 10:20:43 -0800201 bool collect_loop_uses,
Aart Bik8c4a8542016-10-06 11:36:57 -0700202 /*out*/ int32_t* use_count);
Aart Bikf8f5a162017-02-06 15:35:29 -0800203 bool IsUsedOutsideLoop(HLoopInformation* loop_info,
204 HInstruction* instruction);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100205 bool TryReplaceWithLastValue(HLoopInformation* loop_info,
206 HInstruction* instruction,
207 HBasicBlock* block);
Aart Bikf8f5a162017-02-06 15:35:29 -0800208 bool TryAssignLastValue(HLoopInformation* loop_info,
209 HInstruction* instruction,
210 HBasicBlock* block,
211 bool collect_loop_uses);
Aart Bik6b69e0a2017-01-11 10:20:43 -0800212 void RemoveDeadInstructions(const HInstructionList& list);
Nicolas Geoffray1a0a5192017-06-22 11:56:01 +0100213 bool CanRemoveCycle(); // Whether the current 'iset_' is removable.
Aart Bik281c6812016-08-26 11:31:48 -0700214
Aart Bik92685a82017-03-06 11:13:43 -0800215 // Compiler driver (to query ISA features).
216 const CompilerDriver* compiler_driver_;
217
Aart Bik96202302016-10-04 17:33:56 -0700218 // Range information based on prior induction variable analysis.
Aart Bik281c6812016-08-26 11:31:48 -0700219 InductionVarRange induction_range_;
220
221 // Phase-local heap memory allocator for the loop optimizer. Storage obtained
Aart Bik96202302016-10-04 17:33:56 -0700222 // through this allocator is immediately released when the loop optimizer is done.
Nicolas Geoffrayebe16742016-10-05 09:55:42 +0100223 ArenaAllocator* loop_allocator_;
Aart Bik281c6812016-08-26 11:31:48 -0700224
Aart Bikf8f5a162017-02-06 15:35:29 -0800225 // Global heap memory allocator. Used to build HIR.
226 ArenaAllocator* global_allocator_;
227
Aart Bik96202302016-10-04 17:33:56 -0700228 // Entries into the loop hierarchy representation. The hierarchy resides
229 // in phase-local heap memory.
Aart Bik281c6812016-08-26 11:31:48 -0700230 LoopNode* top_loop_;
231 LoopNode* last_loop_;
232
Aart Bik8c4a8542016-10-06 11:36:57 -0700233 // Temporary bookkeeping of a set of instructions.
234 // Contents reside in phase-local heap memory.
235 ArenaSet<HInstruction*>* iset_;
236
Aart Bikb29f6842017-07-28 15:58:41 -0700237 // Temporary bookkeeping of reduction instructions. Mapping is two-fold:
238 // (1) reductions in the loop-body are mapped back to their phi definition,
239 // (2) phi definitions are mapped to their initial value (updated during
240 // code generation to feed the proper values into the new chain).
241 // Contents reside in phase-local heap memory.
242 ArenaSafeMap<HInstruction*, HInstruction*>* reductions_;
Aart Bik482095d2016-10-10 15:39:10 -0700243
Aart Bikdf7822e2016-12-06 10:05:30 -0800244 // Flag that tracks if any simplifications have occurred.
245 bool simplified_;
246
Aart Bikf8f5a162017-02-06 15:35:29 -0800247 // Number of "lanes" for selected packed type.
248 uint32_t vector_length_;
249
250 // Set of array references in the vector loop.
251 // Contents reside in phase-local heap memory.
252 ArenaSet<ArrayReference>* vector_refs_;
253
Aart Bik14a68b42017-06-08 14:06:58 -0700254 // Dynamic loop peeling candidate for alignment.
255 const ArrayReference* vector_peeling_candidate_;
256
257 // Dynamic data dependence test of the form a != b.
258 HInstruction* vector_runtime_test_a_;
259 HInstruction* vector_runtime_test_b_;
260
Aart Bikf8f5a162017-02-06 15:35:29 -0800261 // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
Aart Bik14a68b42017-06-08 14:06:58 -0700262 // loop (mode is kSequential) and the actual vector loop (mode is kVector). The data
Aart Bikf8f5a162017-02-06 15:35:29 -0800263 // structure maps original instructions into the new instructions.
264 // Contents reside in phase-local heap memory.
265 ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
266
Aart Bik0148de42017-09-05 09:25:01 -0700267 // Permanent mapping used during vectorization synthesis.
268 // Contents reside in phase-local heap memory.
269 ArenaSafeMap<HInstruction*, HInstruction*>* vector_permanent_map_;
270
Aart Bikf8f5a162017-02-06 15:35:29 -0800271 // Temporary vectorization bookkeeping.
Aart Bik14a68b42017-06-08 14:06:58 -0700272 VectorMode vector_mode_; // synthesis mode
Aart Bikf8f5a162017-02-06 15:35:29 -0800273 HBasicBlock* vector_preheader_; // preheader of the new loop
274 HBasicBlock* vector_header_; // header of the new loop
275 HBasicBlock* vector_body_; // body of the new loop
Aart Bik14a68b42017-06-08 14:06:58 -0700276 HInstruction* vector_index_; // normalized index of the new loop
Aart Bikf8f5a162017-02-06 15:35:29 -0800277
Aart Bik281c6812016-08-26 11:31:48 -0700278 friend class LoopOptimizationTest;
279
280 DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
281};
282
283} // namespace art
284
285#endif // ART_COMPILER_OPTIMIZING_LOOP_OPTIMIZATION_H_