blob: e9b9b5d8b50b4baa299535104d3b51ced5e8cb55 [file] [log] [blame]
Matteo Franchin43ec8732014-03-31 15:00:14 +01001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/* This file contains codegen for the Thumb2 ISA. */
18
Andreas Gampe0b9203e2015-01-22 20:39:27 -080019#include "codegen_arm64.h"
20
Vladimir Marko20f85592015-03-19 10:07:02 +000021#include "arch/arm64/instruction_set_features_arm64.h"
Elliott Hughes8366ca02014-11-17 12:02:05 -080022#include "arch/instruction_set_features.h"
Matteo Franchin43ec8732014-03-31 15:00:14 +010023#include "arm64_lir.h"
Andreas Gampe0b9203e2015-01-22 20:39:27 -080024#include "base/logging.h"
25#include "dex/compiler_ir.h"
26#include "dex/mir_graph.h"
Matteo Franchin43ec8732014-03-31 15:00:14 +010027#include "dex/quick/mir_to_lir-inl.h"
buzbeeb5860fb2014-06-21 15:31:01 -070028#include "dex/reg_storage_eq.h"
Andreas Gampe0b9203e2015-01-22 20:39:27 -080029#include "driver/compiler_driver.h"
Matteo Franchin43ec8732014-03-31 15:00:14 +010030#include "entrypoints/quick/quick_entrypoints.h"
Ian Rogers7e70b002014-10-08 11:47:24 -070031#include "mirror/array-inl.h"
Andreas Gampef29ecd62014-07-29 00:35:00 -070032#include "utils.h"
Matteo Franchin43ec8732014-03-31 15:00:14 +010033
34namespace art {
35
36LIR* Arm64Mir2Lir::OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target) {
37 OpRegReg(kOpCmp, src1, src2);
38 return OpCondBranch(cond, target);
39}
40
Matteo Franchin43ec8732014-03-31 15:00:14 +010041LIR* Arm64Mir2Lir::OpIT(ConditionCode ccode, const char* guide) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -070042 UNUSED(ccode, guide);
Matteo Franchine45fb9e2014-05-06 10:10:30 +010043 LOG(FATAL) << "Unexpected use of OpIT for Arm64";
Ian Rogers6a3c1fc2014-10-31 00:33:20 -070044 UNREACHABLE();
Matteo Franchin43ec8732014-03-31 15:00:14 +010045}
46
47void Arm64Mir2Lir::OpEndIT(LIR* it) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -070048 UNUSED(it);
Matteo Franchine45fb9e2014-05-06 10:10:30 +010049 LOG(FATAL) << "Unexpected use of OpEndIT for Arm64";
Matteo Franchin43ec8732014-03-31 15:00:14 +010050}
51
52/*
53 * 64-bit 3way compare function.
Matteo Franchine45fb9e2014-05-06 10:10:30 +010054 * cmp xA, xB
Zheng Xu511c8a62014-06-03 16:22:23 +080055 * csinc wC, wzr, wzr, eq // wC = (xA == xB) ? 0 : 1
56 * csneg wC, wC, wC, ge // wC = (xA >= xB) ? wC : -wC
Matteo Franchin43ec8732014-03-31 15:00:14 +010057 */
Matteo Franchine45fb9e2014-05-06 10:10:30 +010058void Arm64Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
59 RegLocation rl_src2) {
60 RegLocation rl_result;
Matteo Franchin43ec8732014-03-31 15:00:14 +010061 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
62 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
Matteo Franchine45fb9e2014-05-06 10:10:30 +010063 rl_result = EvalLoc(rl_dest, kCoreReg, true);
Matteo Franchin43ec8732014-03-31 15:00:14 +010064
Matteo Franchine45fb9e2014-05-06 10:10:30 +010065 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
Zheng Xu511c8a62014-06-03 16:22:23 +080066 NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondEq);
67 NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_result.reg.GetReg(),
68 rl_result.reg.GetReg(), kArmCondGe);
69 StoreValue(rl_dest, rl_result);
Serban Constantinescued65c5e2014-05-22 15:10:18 +010070}
71
72void Arm64Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
73 RegLocation rl_src1, RegLocation rl_shift) {
74 OpKind op = kOpBkpt;
75 switch (opcode) {
76 case Instruction::SHL_LONG:
77 case Instruction::SHL_LONG_2ADDR:
78 op = kOpLsl;
79 break;
80 case Instruction::SHR_LONG:
81 case Instruction::SHR_LONG_2ADDR:
82 op = kOpAsr;
83 break;
84 case Instruction::USHR_LONG:
85 case Instruction::USHR_LONG_2ADDR:
86 op = kOpLsr;
87 break;
88 default:
89 LOG(FATAL) << "Unexpected case: " << opcode;
90 }
Zheng Xue2eb29e2014-06-12 10:22:33 +080091 rl_shift = LoadValue(rl_shift, kCoreReg);
Serban Constantinescued65c5e2014-05-22 15:10:18 +010092 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
93 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
Zheng Xue2eb29e2014-06-12 10:22:33 +080094 OpRegRegReg(op, rl_result.reg, rl_src1.reg, As64BitReg(rl_shift.reg));
Serban Constantinescued65c5e2014-05-22 15:10:18 +010095 StoreValueWide(rl_dest, rl_result);
Matteo Franchin43ec8732014-03-31 15:00:14 +010096}
97
Andreas Gampe90969af2014-07-15 23:02:11 -070098static constexpr bool kUseDeltaEncodingInGenSelect = false;
Andreas Gampe381f8ac2014-07-10 03:23:41 -070099
Andreas Gampe90969af2014-07-15 23:02:11 -0700100void Arm64Mir2Lir::GenSelect(int32_t true_val, int32_t false_val, ConditionCode ccode,
101 RegStorage rs_dest, int result_reg_class) {
102 if (false_val == 0 || // 0 is better as first operand.
103 true_val == 1 || // Potentially Csinc.
104 true_val == -1 || // Potentially Csinv.
105 true_val == false_val + 1) { // Potentially Csinc.
106 ccode = NegateComparison(ccode);
107 std::swap(true_val, false_val);
108 }
109
110 ArmConditionCode code = ArmConditionEncoding(ccode);
111
112 int opcode; // The opcode.
113 RegStorage left_op = RegStorage::InvalidReg(); // The operands.
114 RegStorage right_op = RegStorage::InvalidReg(); // The operands.
115
116 bool is_wide = rs_dest.Is64Bit();
117
118 RegStorage zero_reg = is_wide ? rs_xzr : rs_wzr;
119
120 if (true_val == 0) {
121 left_op = zero_reg;
122 } else {
123 left_op = rs_dest;
124 LoadConstantNoClobber(rs_dest, true_val);
125 }
126 if (false_val == 1) {
127 right_op = zero_reg;
128 opcode = kA64Csinc4rrrc;
129 } else if (false_val == -1) {
130 right_op = zero_reg;
131 opcode = kA64Csinv4rrrc;
132 } else if (false_val == true_val + 1) {
133 right_op = left_op;
134 opcode = kA64Csinc4rrrc;
135 } else if (false_val == -true_val) {
136 right_op = left_op;
137 opcode = kA64Csneg4rrrc;
138 } else if (false_val == ~true_val) {
139 right_op = left_op;
140 opcode = kA64Csinv4rrrc;
141 } else if (true_val == 0) {
142 // left_op is zero_reg.
143 right_op = rs_dest;
144 LoadConstantNoClobber(rs_dest, false_val);
145 opcode = kA64Csel4rrrc;
146 } else {
147 // Generic case.
148 RegStorage t_reg2 = AllocTypedTemp(false, result_reg_class);
149 if (is_wide) {
150 if (t_reg2.Is32Bit()) {
151 t_reg2 = As64BitReg(t_reg2);
152 }
153 } else {
154 if (t_reg2.Is64Bit()) {
155 t_reg2 = As32BitReg(t_reg2);
156 }
157 }
158
159 if (kUseDeltaEncodingInGenSelect) {
160 int32_t delta = false_val - true_val;
161 uint32_t abs_val = delta < 0 ? -delta : delta;
162
163 if (abs_val < 0x1000) { // TODO: Replace with InexpensiveConstant with opcode.
164 // Can encode as immediate to an add.
165 right_op = t_reg2;
166 OpRegRegImm(kOpAdd, t_reg2, left_op, delta);
167 }
168 }
169
170 // Load as constant.
171 if (!right_op.Valid()) {
172 LoadConstantNoClobber(t_reg2, false_val);
173 right_op = t_reg2;
174 }
175
176 opcode = kA64Csel4rrrc;
177 }
178
179 DCHECK(left_op.Valid() && right_op.Valid());
180 NewLIR4(is_wide ? WIDE(opcode) : opcode, rs_dest.GetReg(), left_op.GetReg(), right_op.GetReg(),
181 code);
182}
183
184void Arm64Mir2Lir::GenSelectConst32(RegStorage left_op, RegStorage right_op, ConditionCode code,
185 int32_t true_val, int32_t false_val, RegStorage rs_dest,
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700186 RegisterClass dest_reg_class) {
Andreas Gampe90969af2014-07-15 23:02:11 -0700187 DCHECK(rs_dest.Valid());
188 OpRegReg(kOpCmp, left_op, right_op);
189 GenSelect(true_val, false_val, code, rs_dest, dest_reg_class);
190}
191
192void Arm64Mir2Lir::GenSelect(BasicBlock* bb, MIR* mir) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700193 UNUSED(bb);
Andreas Gampe90969af2014-07-15 23:02:11 -0700194 RegLocation rl_src = mir_graph_->GetSrc(mir, 0);
195 rl_src = LoadValue(rl_src, rl_src.ref ? kRefReg : kCoreReg);
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700196 // rl_src may be aliased with rl_result/rl_dest, so do compare early.
197 OpRegImm(kOpCmp, rl_src.reg, 0);
198
Andreas Gampe90969af2014-07-15 23:02:11 -0700199 RegLocation rl_dest = mir_graph_->GetDest(mir);
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100200
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700201 // The kMirOpSelect has two variants, one for constants and one for moves.
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700202 if (mir->ssa_rep->num_uses == 1) {
Andreas Gampe90969af2014-07-15 23:02:11 -0700203 RegLocation rl_result = EvalLoc(rl_dest, rl_dest.ref ? kRefReg : kCoreReg, true);
204 GenSelect(mir->dalvikInsn.vB, mir->dalvikInsn.vC, mir->meta.ccode, rl_result.reg,
205 rl_dest.ref ? kRefReg : kCoreReg);
206 StoreValue(rl_dest, rl_result);
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700207 } else {
208 RegLocation rl_true = mir_graph_->reg_location_[mir->ssa_rep->uses[1]];
209 RegLocation rl_false = mir_graph_->reg_location_[mir->ssa_rep->uses[2]];
210
Andreas Gampe90969af2014-07-15 23:02:11 -0700211 RegisterClass result_reg_class = rl_dest.ref ? kRefReg : kCoreReg;
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700212 rl_true = LoadValue(rl_true, result_reg_class);
213 rl_false = LoadValue(rl_false, result_reg_class);
Andreas Gampe90969af2014-07-15 23:02:11 -0700214 RegLocation rl_result = EvalLoc(rl_dest, result_reg_class, true);
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700215
Andreas Gampe90969af2014-07-15 23:02:11 -0700216 bool is_wide = rl_dest.ref || rl_dest.wide;
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700217 int opcode = is_wide ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc;
218 NewLIR4(opcode, rl_result.reg.GetReg(),
Andreas Gampe90969af2014-07-15 23:02:11 -0700219 rl_true.reg.GetReg(), rl_false.reg.GetReg(), ArmConditionEncoding(mir->meta.ccode));
220 StoreValue(rl_dest, rl_result);
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700221 }
Matteo Franchin43ec8732014-03-31 15:00:14 +0100222}
223
224void Arm64Mir2Lir::GenFusedLongCmpBranch(BasicBlock* bb, MIR* mir) {
225 RegLocation rl_src1 = mir_graph_->GetSrcWide(mir, 0);
226 RegLocation rl_src2 = mir_graph_->GetSrcWide(mir, 2);
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100227 LIR* taken = &block_label_list_[bb->taken];
228 LIR* not_taken = &block_label_list_[bb->fall_through];
Matteo Franchin43ec8732014-03-31 15:00:14 +0100229 // Normalize such that if either operand is constant, src2 will be constant.
230 ConditionCode ccode = mir->meta.ccode;
231 if (rl_src1.is_const) {
232 std::swap(rl_src1, rl_src2);
233 ccode = FlipComparisonOrder(ccode);
234 }
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100235
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700236 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
237
Matteo Franchin43ec8732014-03-31 15:00:14 +0100238 if (rl_src2.is_const) {
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700239 // TODO: Optimize for rl_src1.is_const? (Does happen in the boot image at the moment.)
240
Matteo Franchin43ec8732014-03-31 15:00:14 +0100241 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100242 // Special handling using cbz & cbnz.
243 if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
244 OpCmpImmBranch(ccode, rl_src1.reg, 0, taken);
245 OpCmpImmBranch(NegateComparison(ccode), rl_src1.reg, 0, not_taken);
246 return;
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700247 }
248
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100249 // Only handle Imm if src2 is not already in a register.
Andreas Gampe381f8ac2014-07-10 03:23:41 -0700250 rl_src2 = UpdateLocWide(rl_src2);
251 if (rl_src2.location != kLocPhysReg) {
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100252 OpRegImm64(kOpCmp, rl_src1.reg, val);
253 OpCondBranch(ccode, taken);
254 OpCondBranch(NegateComparison(ccode), not_taken);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100255 return;
256 }
257 }
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100258
Matteo Franchin43ec8732014-03-31 15:00:14 +0100259 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100260 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100261 OpCondBranch(ccode, taken);
Serban Constantinescu05e27ff2014-05-28 13:21:45 +0100262 OpCondBranch(NegateComparison(ccode), not_taken);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100263}
264
265/*
266 * Generate a register comparison to an immediate and branch. Caller
267 * is responsible for setting branch target field.
268 */
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100269LIR* Arm64Mir2Lir::OpCmpImmBranch(ConditionCode cond, RegStorage reg, int check_value,
270 LIR* target) {
Andreas Gampe9522af92014-07-14 20:16:59 -0700271 LIR* branch = nullptr;
Matteo Franchin43ec8732014-03-31 15:00:14 +0100272 ArmConditionCode arm_cond = ArmConditionEncoding(cond);
Andreas Gampe9522af92014-07-14 20:16:59 -0700273 if (check_value == 0) {
274 if (arm_cond == kArmCondEq || arm_cond == kArmCondNe) {
Matteo Franchin4163c532014-07-15 15:20:27 +0100275 A64Opcode opcode = (arm_cond == kArmCondEq) ? kA64Cbz2rt : kA64Cbnz2rt;
276 A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
Andreas Gampe9522af92014-07-14 20:16:59 -0700277 branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
278 } else if (arm_cond == kArmCondLs) {
279 // kArmCondLs is an unsigned less or equal. A comparison r <= 0 is then the same as cbz.
280 // This case happens for a bounds check of array[0].
Matteo Franchin4163c532014-07-15 15:20:27 +0100281 A64Opcode opcode = kA64Cbz2rt;
282 A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
Andreas Gampe9522af92014-07-14 20:16:59 -0700283 branch = NewLIR2(opcode | wide, reg.GetReg(), 0);
Zheng Xu5d7cdec2014-08-18 17:28:22 +0800284 } else if (arm_cond == kArmCondLt || arm_cond == kArmCondGe) {
Matteo Franchin4163c532014-07-15 15:20:27 +0100285 A64Opcode opcode = (arm_cond == kArmCondLt) ? kA64Tbnz3rht : kA64Tbz3rht;
286 A64Opcode wide = reg.Is64Bit() ? WIDE(0) : UNWIDE(0);
Zheng Xu5d7cdec2014-08-18 17:28:22 +0800287 int value = reg.Is64Bit() ? 63 : 31;
288 branch = NewLIR3(opcode | wide, reg.GetReg(), value, 0);
Andreas Gampe9522af92014-07-14 20:16:59 -0700289 }
290 }
291
292 if (branch == nullptr) {
Matteo Franchin43ec8732014-03-31 15:00:14 +0100293 OpRegImm(kOpCmp, reg, check_value);
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100294 branch = NewLIR2(kA64B2ct, arm_cond, 0);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100295 }
Andreas Gampe9522af92014-07-14 20:16:59 -0700296
Matteo Franchin43ec8732014-03-31 15:00:14 +0100297 branch->target = target;
298 return branch;
299}
300
Zheng Xu7c1c2632014-06-17 18:17:31 +0800301LIR* Arm64Mir2Lir::OpCmpMemImmBranch(ConditionCode cond, RegStorage temp_reg,
302 RegStorage base_reg, int offset, int check_value,
Dave Allison69dfe512014-07-11 17:11:58 +0000303 LIR* target, LIR** compare) {
304 DCHECK(compare == nullptr);
Zheng Xu7c1c2632014-06-17 18:17:31 +0800305 // It is possible that temp register is 64-bit. (ArgReg or RefReg)
306 // Always compare 32-bit value no matter what temp_reg is.
307 if (temp_reg.Is64Bit()) {
308 temp_reg = As32BitReg(temp_reg);
309 }
310 Load32Disp(base_reg, offset, temp_reg);
311 LIR* branch = OpCmpImmBranch(cond, temp_reg, check_value, target);
312 return branch;
313}
314
Matteo Franchin43ec8732014-03-31 15:00:14 +0100315LIR* Arm64Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100316 bool dest_is_fp = r_dest.IsFloat();
317 bool src_is_fp = r_src.IsFloat();
Matteo Franchin4163c532014-07-15 15:20:27 +0100318 A64Opcode opcode = kA64Brk1d;
Matteo Franchin43ec8732014-03-31 15:00:14 +0100319 LIR* res;
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100320
321 if (LIKELY(dest_is_fp == src_is_fp)) {
322 if (LIKELY(!dest_is_fp)) {
Andreas Gampe4b537a82014-06-30 22:24:53 -0700323 DCHECK_EQ(r_dest.Is64Bit(), r_src.Is64Bit());
324
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100325 // Core/core copy.
326 // Copies involving the sp register require a different instruction.
327 opcode = UNLIKELY(A64_REG_IS_SP(r_dest.GetReg())) ? kA64Add4RRdT : kA64Mov2rr;
328
329 // TODO(Arm64): kA64Add4RRdT formally has 4 args, but is used as a 2 args instruction.
330 // This currently works because the other arguments are set to 0 by default. We should
331 // rather introduce an alias kA64Mov2RR.
332
333 // core/core copy. Do a x/x copy only if both registers are x.
334 if (r_dest.Is64Bit() && r_src.Is64Bit()) {
335 opcode = WIDE(opcode);
336 }
337 } else {
338 // Float/float copy.
339 bool dest_is_double = r_dest.IsDouble();
340 bool src_is_double = r_src.IsDouble();
341
342 // We do not do float/double or double/float casts here.
343 DCHECK_EQ(dest_is_double, src_is_double);
344
345 // Homogeneous float/float copy.
Matteo Franchin4163c532014-07-15 15:20:27 +0100346 opcode = (dest_is_double) ? WIDE(kA64Fmov2ff) : kA64Fmov2ff;
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100347 }
348 } else {
349 // Inhomogeneous register copy.
350 if (dest_is_fp) {
351 if (r_dest.IsDouble()) {
352 opcode = kA64Fmov2Sx;
353 } else {
Andreas Gampe4b537a82014-06-30 22:24:53 -0700354 r_src = Check32BitReg(r_src);
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100355 opcode = kA64Fmov2sw;
356 }
357 } else {
358 if (r_src.IsDouble()) {
359 opcode = kA64Fmov2xS;
360 } else {
Andreas Gampe4b537a82014-06-30 22:24:53 -0700361 r_dest = Check32BitReg(r_dest);
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100362 opcode = kA64Fmov2ws;
363 }
364 }
Matteo Franchin43ec8732014-03-31 15:00:14 +0100365 }
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100366
Matteo Franchin43ec8732014-03-31 15:00:14 +0100367 res = RawLIR(current_dalvik_offset_, opcode, r_dest.GetReg(), r_src.GetReg());
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100368
Matteo Franchin43ec8732014-03-31 15:00:14 +0100369 if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
370 res->flags.is_nop = true;
371 }
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100372
Matteo Franchin43ec8732014-03-31 15:00:14 +0100373 return res;
374}
375
376void Arm64Mir2Lir::OpRegCopy(RegStorage r_dest, RegStorage r_src) {
377 if (r_dest != r_src) {
378 LIR* res = OpRegCopyNoInsert(r_dest, r_src);
379 AppendLIR(res);
380 }
381}
382
383void Arm64Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100384 OpRegCopy(r_dest, r_src);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100385}
386
387// Table of magic divisors
388struct MagicTable {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100389 int magic64_base;
390 int magic64_eor;
391 uint64_t magic64;
392 uint32_t magic32;
Matteo Franchin43ec8732014-03-31 15:00:14 +0100393 uint32_t shift;
394 DividePattern pattern;
395};
396
397static const MagicTable magic_table[] = {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100398 { 0, 0, 0, 0, 0, DivideNone}, // 0
399 { 0, 0, 0, 0, 0, DivideNone}, // 1
400 { 0, 0, 0, 0, 0, DivideNone}, // 2
401 {0x3c, -1, 0x5555555555555556, 0x55555556, 0, Divide3}, // 3
402 { 0, 0, 0, 0, 0, DivideNone}, // 4
403 {0xf9, -1, 0x6666666666666667, 0x66666667, 1, Divide5}, // 5
404 {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 0, Divide3}, // 6
405 { -1, -1, 0x924924924924924A, 0x92492493, 2, Divide7}, // 7
406 { 0, 0, 0, 0, 0, DivideNone}, // 8
407 { -1, -1, 0x38E38E38E38E38E4, 0x38E38E39, 1, Divide5}, // 9
408 {0xf9, -1, 0x6666666666666667, 0x66666667, 2, Divide5}, // 10
409 { -1, -1, 0x2E8BA2E8BA2E8BA3, 0x2E8BA2E9, 1, Divide5}, // 11
410 {0x7c, 0x1041, 0x2AAAAAAAAAAAAAAB, 0x2AAAAAAB, 1, Divide5}, // 12
411 { -1, -1, 0x4EC4EC4EC4EC4EC5, 0x4EC4EC4F, 2, Divide5}, // 13
412 { -1, -1, 0x924924924924924A, 0x92492493, 3, Divide7}, // 14
413 {0x78, -1, 0x8888888888888889, 0x88888889, 3, Divide7}, // 15
Matteo Franchin43ec8732014-03-31 15:00:14 +0100414};
415
416// Integer division by constant via reciprocal multiply (Hacker's Delight, 10-4)
417bool Arm64Mir2Lir::SmallLiteralDivRem(Instruction::Code dalvik_opcode, bool is_div,
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100418 RegLocation rl_src, RegLocation rl_dest, int lit) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700419 UNUSED(dalvik_opcode);
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100420 if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
Matteo Franchin43ec8732014-03-31 15:00:14 +0100421 return false;
422 }
423 DividePattern pattern = magic_table[lit].pattern;
424 if (pattern == DivideNone) {
425 return false;
426 }
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100427 // Tuning: add rem patterns
428 if (!is_div) {
429 return false;
430 }
Matteo Franchin43ec8732014-03-31 15:00:14 +0100431
432 RegStorage r_magic = AllocTemp();
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100433 LoadConstant(r_magic, magic_table[lit].magic32);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100434 rl_src = LoadValue(rl_src, kCoreReg);
435 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100436 RegStorage r_long_mul = AllocTemp();
Matteo Franchin65420b22014-10-27 13:29:30 +0000437 NewLIR3(kA64Smull3xww, As64BitReg(r_long_mul).GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
Matteo Franchin43ec8732014-03-31 15:00:14 +0100438 switch (pattern) {
439 case Divide3:
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100440 OpRegRegImm(kOpLsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul), 32);
441 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
Matteo Franchin43ec8732014-03-31 15:00:14 +0100442 break;
443 case Divide5:
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100444 OpRegRegImm(kOpAsr, As64BitReg(r_long_mul), As64BitReg(r_long_mul),
445 32 + magic_table[lit].shift);
446 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
Matteo Franchin43ec8732014-03-31 15:00:14 +0100447 break;
448 case Divide7:
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100449 OpRegRegRegShift(kOpAdd, As64BitReg(r_long_mul), As64BitReg(rl_src.reg),
450 As64BitReg(r_long_mul), EncodeShift(kA64Lsr, 32));
451 OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
452 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 31));
Matteo Franchin43ec8732014-03-31 15:00:14 +0100453 break;
454 default:
455 LOG(FATAL) << "Unexpected pattern: " << pattern;
456 }
Matteo Franchin43ec8732014-03-31 15:00:14 +0100457 StoreValue(rl_dest, rl_result);
458 return true;
459}
460
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100461bool Arm64Mir2Lir::SmallLiteralDivRem64(Instruction::Code dalvik_opcode, bool is_div,
462 RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700463 UNUSED(dalvik_opcode);
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100464 if ((lit < 0) || (lit >= static_cast<int>(arraysize(magic_table)))) {
465 return false;
466 }
467 DividePattern pattern = magic_table[lit].pattern;
468 if (pattern == DivideNone) {
469 return false;
470 }
471 // Tuning: add rem patterns
472 if (!is_div) {
473 return false;
474 }
475
476 RegStorage r_magic = AllocTempWide();
477 rl_src = LoadValueWide(rl_src, kCoreReg);
478 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
479 RegStorage r_long_mul = AllocTempWide();
480
481 if (magic_table[lit].magic64_base >= 0) {
482 // Check that the entry in the table is correct.
483 if (kIsDebugBuild) {
484 uint64_t reconstructed_imm;
485 uint64_t base = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_base);
486 if (magic_table[lit].magic64_eor >= 0) {
487 uint64_t eor = DecodeLogicalImmediate(/*is_wide*/true, magic_table[lit].magic64_eor);
488 reconstructed_imm = base ^ eor;
489 } else {
490 reconstructed_imm = base + 1;
491 }
Andreas Gampece410622014-11-24 14:23:53 -0800492 DCHECK_EQ(reconstructed_imm, magic_table[lit].magic64) << " for literal " << lit;
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100493 }
494
495 // Load the magic constant in two instructions.
496 NewLIR3(WIDE(kA64Orr3Rrl), r_magic.GetReg(), rxzr, magic_table[lit].magic64_base);
497 if (magic_table[lit].magic64_eor >= 0) {
498 NewLIR3(WIDE(kA64Eor3Rrl), r_magic.GetReg(), r_magic.GetReg(),
499 magic_table[lit].magic64_eor);
500 } else {
501 NewLIR4(WIDE(kA64Add4RRdT), r_magic.GetReg(), r_magic.GetReg(), 1, 0);
502 }
503 } else {
504 LoadConstantWide(r_magic, magic_table[lit].magic64);
505 }
506
507 NewLIR3(kA64Smulh3xxx, r_long_mul.GetReg(), r_magic.GetReg(), rl_src.reg.GetReg());
508 switch (pattern) {
509 case Divide3:
510 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
511 break;
512 case Divide5:
513 OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
514 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
515 break;
516 case Divide7:
517 OpRegRegReg(kOpAdd, r_long_mul, rl_src.reg, r_long_mul);
518 OpRegRegImm(kOpAsr, r_long_mul, r_long_mul, magic_table[lit].shift);
519 OpRegRegRegShift(kOpSub, rl_result.reg, r_long_mul, rl_src.reg, EncodeShift(kA64Asr, 63));
520 break;
521 default:
522 LOG(FATAL) << "Unexpected pattern: " << pattern;
523 }
524 StoreValueWide(rl_dest, rl_result);
525 return true;
526}
527
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100528// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
529// and store the result in 'rl_dest'.
530bool Arm64Mir2Lir::HandleEasyDivRem(Instruction::Code dalvik_opcode, bool is_div,
531 RegLocation rl_src, RegLocation rl_dest, int lit) {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100532 return HandleEasyDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int>(lit));
533}
534
535// Returns true if it added instructions to 'cu' to divide 'rl_src' by 'lit'
536// and store the result in 'rl_dest'.
537bool Arm64Mir2Lir::HandleEasyDivRem64(Instruction::Code dalvik_opcode, bool is_div,
538 RegLocation rl_src, RegLocation rl_dest, int64_t lit) {
539 const bool is_64bit = rl_dest.wide;
540 const int nbits = (is_64bit) ? 64 : 32;
541
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100542 if (lit < 2) {
543 return false;
544 }
545 if (!IsPowerOfTwo(lit)) {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100546 if (is_64bit) {
547 return SmallLiteralDivRem64(dalvik_opcode, is_div, rl_src, rl_dest, lit);
548 } else {
549 return SmallLiteralDivRem(dalvik_opcode, is_div, rl_src, rl_dest, static_cast<int32_t>(lit));
550 }
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100551 }
Andreas Gampe7e499922015-01-06 08:28:12 -0800552 int k = CTZ(lit);
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100553 if (k >= nbits - 2) {
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100554 // Avoid special cases.
555 return false;
556 }
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100557
558 RegLocation rl_result;
559 RegStorage t_reg;
560 if (is_64bit) {
561 rl_src = LoadValueWide(rl_src, kCoreReg);
562 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
563 t_reg = AllocTempWide();
564 } else {
565 rl_src = LoadValue(rl_src, kCoreReg);
566 rl_result = EvalLoc(rl_dest, kCoreReg, true);
567 t_reg = AllocTemp();
568 }
569
570 int shift = EncodeShift(kA64Lsr, nbits - k);
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100571 if (is_div) {
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100572 if (lit == 2) {
573 // Division by 2 is by far the most common division by constant.
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100574 OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100575 OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
576 } else {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100577 OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
578 OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, t_reg, shift);
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100579 OpRegRegImm(kOpAsr, rl_result.reg, t_reg, k);
580 }
581 } else {
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100582 if (lit == 2) {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100583 OpRegRegRegShift(kOpAdd, t_reg, rl_src.reg, rl_src.reg, shift);
584 OpRegRegImm64(kOpAnd, t_reg, t_reg, lit - 1);
585 OpRegRegRegShift(kOpSub, rl_result.reg, t_reg, rl_src.reg, shift);
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100586 } else {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100587 RegStorage t_reg2 = (is_64bit) ? AllocTempWide() : AllocTemp();
588 OpRegRegImm(kOpAsr, t_reg, rl_src.reg, nbits - 1);
589 OpRegRegRegShift(kOpAdd, t_reg2, rl_src.reg, t_reg, shift);
590 OpRegRegImm64(kOpAnd, t_reg2, t_reg2, lit - 1);
591 OpRegRegRegShift(kOpSub, rl_result.reg, t_reg2, t_reg, shift);
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100592 }
593 }
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100594
595 if (is_64bit) {
596 StoreValueWide(rl_dest, rl_result);
597 } else {
598 StoreValue(rl_dest, rl_result);
599 }
Matteo Franchinc61b3c92014-06-18 11:52:47 +0100600 return true;
601}
602
Matteo Franchin43ec8732014-03-31 15:00:14 +0100603bool Arm64Mir2Lir::EasyMultiply(RegLocation rl_src, RegLocation rl_dest, int lit) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700604 UNUSED(rl_src, rl_dest, lit);
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100605 LOG(FATAL) << "Unexpected use of EasyMultiply for Arm64";
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700606 UNREACHABLE();
Matteo Franchin43ec8732014-03-31 15:00:14 +0100607}
608
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700609RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegLocation rl_src1, int lit,
610 bool is_div) {
611 UNUSED(rl_dest, rl_src1, lit, is_div);
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100612 LOG(FATAL) << "Unexpected use of GenDivRemLit for Arm64";
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700613 UNREACHABLE();
Matteo Franchin43ec8732014-03-31 15:00:14 +0100614}
615
616RegLocation Arm64Mir2Lir::GenDivRemLit(RegLocation rl_dest, RegStorage reg1, int lit, bool is_div) {
617 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
618
619 // Put the literal in a temp.
620 RegStorage lit_temp = AllocTemp();
621 LoadConstant(lit_temp, lit);
622 // Use the generic case for div/rem with arg2 in a register.
623 // TODO: The literal temp can be freed earlier during a modulus to reduce reg pressure.
624 rl_result = GenDivRem(rl_result, reg1, lit_temp, is_div);
625 FreeTemp(lit_temp);
626
627 return rl_result;
628}
629
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100630RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegLocation rl_src1,
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -0700631 RegLocation rl_src2, bool is_div, int flags) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700632 UNUSED(rl_dest, rl_src1, rl_src2, is_div, flags);
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100633 LOG(FATAL) << "Unexpected use of GenDivRem for Arm64";
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700634 UNREACHABLE();
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100635}
636
Serban Constantinescued65c5e2014-05-22 15:10:18 +0100637RegLocation Arm64Mir2Lir::GenDivRem(RegLocation rl_dest, RegStorage r_src1, RegStorage r_src2,
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +0100638 bool is_div) {
Serban Constantinescued65c5e2014-05-22 15:10:18 +0100639 CHECK_EQ(r_src1.Is64Bit(), r_src2.Is64Bit());
640
Matteo Franchin43ec8732014-03-31 15:00:14 +0100641 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
642 if (is_div) {
Serban Constantinescued65c5e2014-05-22 15:10:18 +0100643 OpRegRegReg(kOpDiv, rl_result.reg, r_src1, r_src2);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100644 } else {
Serban Constantinescued65c5e2014-05-22 15:10:18 +0100645 // temp = r_src1 / r_src2
646 // dest = r_src1 - temp * r_src2
647 RegStorage temp;
Matteo Franchin4163c532014-07-15 15:20:27 +0100648 A64Opcode wide;
Serban Constantinescued65c5e2014-05-22 15:10:18 +0100649 if (rl_result.reg.Is64Bit()) {
650 temp = AllocTempWide();
651 wide = WIDE(0);
652 } else {
653 temp = AllocTemp();
654 wide = UNWIDE(0);
655 }
656 OpRegRegReg(kOpDiv, temp, r_src1, r_src2);
657 NewLIR4(kA64Msub4rrrr | wide, rl_result.reg.GetReg(), temp.GetReg(),
Matteo Franchin65420b22014-10-27 13:29:30 +0000658 r_src2.GetReg(), r_src1.GetReg());
Matteo Franchin43ec8732014-03-31 15:00:14 +0100659 FreeTemp(temp);
660 }
Matteo Franchin43ec8732014-03-31 15:00:14 +0100661 return rl_result;
662}
663
Martyn Capewell9a8a5062014-08-07 11:31:48 +0100664bool Arm64Mir2Lir::GenInlinedAbsInt(CallInfo* info) {
665 RegLocation rl_src = info->args[0];
666 rl_src = LoadValue(rl_src, kCoreReg);
667 RegLocation rl_dest = InlineTarget(info);
668 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
669
670 // Compare the source value with zero. Write the negated value to the result if
671 // negative, otherwise write the original value.
672 OpRegImm(kOpCmp, rl_src.reg, 0);
673 NewLIR4(kA64Csneg4rrrc, rl_result.reg.GetReg(), rl_src.reg.GetReg(), rl_src.reg.GetReg(),
674 kArmCondPl);
675 StoreValue(rl_dest, rl_result);
676 return true;
677}
678
Serban Constantinescu169489b2014-06-11 16:43:35 +0100679bool Arm64Mir2Lir::GenInlinedAbsLong(CallInfo* info) {
680 RegLocation rl_src = info->args[0];
681 rl_src = LoadValueWide(rl_src, kCoreReg);
682 RegLocation rl_dest = InlineTargetWide(info);
683 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
Martyn Capewell9a8a5062014-08-07 11:31:48 +0100684
685 // Compare the source value with zero. Write the negated value to the result if
686 // negative, otherwise write the original value.
687 OpRegImm(kOpCmp, rl_src.reg, 0);
688 NewLIR4(WIDE(kA64Csneg4rrrc), rl_result.reg.GetReg(), rl_src.reg.GetReg(),
689 rl_src.reg.GetReg(), kArmCondPl);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100690 StoreValueWide(rl_dest, rl_result);
691 return true;
692}
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100693
Serban Constantinescu23abec92014-07-02 16:13:38 +0100694bool Arm64Mir2Lir::GenInlinedMinMax(CallInfo* info, bool is_min, bool is_long) {
Serban Constantinescu169489b2014-06-11 16:43:35 +0100695 DCHECK_EQ(cu_->instruction_set, kArm64);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100696 RegLocation rl_src1 = info->args[0];
Serban Constantinescu23abec92014-07-02 16:13:38 +0100697 RegLocation rl_src2 = (is_long) ? info->args[2] : info->args[1];
698 rl_src1 = (is_long) ? LoadValueWide(rl_src1, kCoreReg) : LoadValue(rl_src1, kCoreReg);
699 rl_src2 = (is_long) ? LoadValueWide(rl_src2, kCoreReg) : LoadValue(rl_src2, kCoreReg);
700 RegLocation rl_dest = (is_long) ? InlineTargetWide(info) : InlineTarget(info);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100701 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
702 OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
Serban Constantinescu23abec92014-07-02 16:13:38 +0100703 NewLIR4((is_long) ? WIDE(kA64Csel4rrrc) : kA64Csel4rrrc, rl_result.reg.GetReg(),
704 rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), (is_min) ? kArmCondLt : kArmCondGt);
705 (is_long) ? StoreValueWide(rl_dest, rl_result) :StoreValue(rl_dest, rl_result);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100706 return true;
707}
708
709bool Arm64Mir2Lir::GenInlinedPeek(CallInfo* info, OpSize size) {
710 RegLocation rl_src_address = info->args[0]; // long address
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100711 RegLocation rl_dest = (size == k64) ? InlineTargetWide(info) : InlineTarget(info);
712 RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100713 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100714
Andreas Gampe3c12c512014-06-24 18:46:29 +0000715 LoadBaseDisp(rl_address.reg, 0, rl_result.reg, size, kNotVolatile);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100716 if (size == k64) {
Matteo Franchin43ec8732014-03-31 15:00:14 +0100717 StoreValueWide(rl_dest, rl_result);
718 } else {
719 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100720 StoreValue(rl_dest, rl_result);
721 }
722 return true;
723}
724
725bool Arm64Mir2Lir::GenInlinedPoke(CallInfo* info, OpSize size) {
726 RegLocation rl_src_address = info->args[0]; // long address
Matteo Franchin43ec8732014-03-31 15:00:14 +0100727 RegLocation rl_src_value = info->args[2]; // [size] value
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100728 RegLocation rl_address = LoadValueWide(rl_src_address, kCoreReg);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100729
730 RegLocation rl_value;
Matteo Franchin43ec8732014-03-31 15:00:14 +0100731 if (size == k64) {
Serban Constantinescu169489b2014-06-11 16:43:35 +0100732 rl_value = LoadValueWide(rl_src_value, kCoreReg);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100733 } else {
734 DCHECK(size == kSignedByte || size == kSignedHalf || size == k32);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100735 rl_value = LoadValue(rl_src_value, kCoreReg);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100736 }
Andreas Gampe3c12c512014-06-24 18:46:29 +0000737 StoreBaseDisp(rl_address.reg, 0, rl_value.reg, size, kNotVolatile);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100738 return true;
739}
740
Matteo Franchin43ec8732014-03-31 15:00:14 +0100741bool Arm64Mir2Lir::GenInlinedCas(CallInfo* info, bool is_long, bool is_object) {
Serban Constantinescu169489b2014-06-11 16:43:35 +0100742 DCHECK_EQ(cu_->instruction_set, kArm64);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100743 // Unused - RegLocation rl_src_unsafe = info->args[0];
744 RegLocation rl_src_obj = info->args[1]; // Object - known non-null
745 RegLocation rl_src_offset = info->args[2]; // long low
Matteo Franchin43ec8732014-03-31 15:00:14 +0100746 RegLocation rl_src_expected = info->args[4]; // int, long or Object
747 // If is_long, high half is in info->args[5]
748 RegLocation rl_src_new_value = info->args[is_long ? 6 : 5]; // int, long or Object
749 // If is_long, high half is in info->args[7]
750 RegLocation rl_dest = InlineTarget(info); // boolean place for result
751
Serban Constantinescu169489b2014-06-11 16:43:35 +0100752 // Load Object and offset
buzbeea0cd2d72014-06-01 09:33:49 -0700753 RegLocation rl_object = LoadValue(rl_src_obj, kRefReg);
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100754 RegLocation rl_offset = LoadValueWide(rl_src_offset, kCoreReg);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100755
Matteo Franchin43ec8732014-03-31 15:00:14 +0100756 RegLocation rl_new_value;
Serban Constantinescu169489b2014-06-11 16:43:35 +0100757 RegLocation rl_expected;
758 if (is_long) {
Matteo Franchin43ec8732014-03-31 15:00:14 +0100759 rl_new_value = LoadValueWide(rl_src_new_value, kCoreReg);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100760 rl_expected = LoadValueWide(rl_src_expected, kCoreReg);
761 } else {
762 rl_new_value = LoadValue(rl_src_new_value, is_object ? kRefReg : kCoreReg);
763 rl_expected = LoadValue(rl_src_expected, is_object ? kRefReg : kCoreReg);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100764 }
765
766 if (is_object && !mir_graph_->IsConstantNullRef(rl_new_value)) {
767 // Mark card for object assuming new value is stored.
Vladimir Marko743b98c2014-11-24 19:45:41 +0000768 MarkGCCard(0, rl_new_value.reg, rl_object.reg);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100769 }
770
Serban Constantinescu169489b2014-06-11 16:43:35 +0100771 RegStorage r_ptr = AllocTempRef();
Matteo Franchin43ec8732014-03-31 15:00:14 +0100772 OpRegRegReg(kOpAdd, r_ptr, rl_object.reg, rl_offset.reg);
773
774 // Free now unneeded rl_object and rl_offset to give more temps.
775 ClobberSReg(rl_object.s_reg_low);
776 FreeTemp(rl_object.reg);
777 ClobberSReg(rl_offset.s_reg_low);
778 FreeTemp(rl_offset.reg);
779
Matteo Franchin43ec8732014-03-31 15:00:14 +0100780 // do {
781 // tmp = [r_ptr] - expected;
782 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
783 // result = tmp != 0;
784
Serban Constantinescu169489b2014-06-11 16:43:35 +0100785 RegStorage r_tmp;
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100786 RegStorage r_tmp_stored;
787 RegStorage rl_new_value_stored = rl_new_value.reg;
Matteo Franchin4163c532014-07-15 15:20:27 +0100788 A64Opcode wide = UNWIDE(0);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100789 if (is_long) {
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100790 r_tmp_stored = r_tmp = AllocTempWide();
791 wide = WIDE(0);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100792 } else if (is_object) {
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100793 // References use 64-bit registers, but are stored as compressed 32-bit values.
794 // This means r_tmp_stored != r_tmp.
Serban Constantinescu169489b2014-06-11 16:43:35 +0100795 r_tmp = AllocTempRef();
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100796 r_tmp_stored = As32BitReg(r_tmp);
797 rl_new_value_stored = As32BitReg(rl_new_value_stored);
Matteo Franchin43ec8732014-03-31 15:00:14 +0100798 } else {
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100799 r_tmp_stored = r_tmp = AllocTemp();
Matteo Franchin43ec8732014-03-31 15:00:14 +0100800 }
801
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100802 RegStorage r_tmp32 = (r_tmp.Is32Bit()) ? r_tmp : As32BitReg(r_tmp);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100803 LIR* loop = NewLIR0(kPseudoTargetLabel);
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100804 NewLIR2(kA64Ldaxr2rX | wide, r_tmp_stored.GetReg(), r_ptr.GetReg());
Serban Constantinescu169489b2014-06-11 16:43:35 +0100805 OpRegReg(kOpCmp, r_tmp, rl_expected.reg);
Vladimir Marko8dea81c2014-06-06 14:50:36 +0100806 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
Serban Constantinescu169489b2014-06-11 16:43:35 +0100807 LIR* early_exit = OpCondBranch(kCondNe, NULL);
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100808 NewLIR3(kA64Stlxr3wrX | wide, r_tmp32.GetReg(), rl_new_value_stored.GetReg(), r_ptr.GetReg());
809 NewLIR3(kA64Cmp3RdT, r_tmp32.GetReg(), 0, ENCODE_NO_SHIFT);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100810 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
811 OpCondBranch(kCondNe, loop);
812
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100813 LIR* exit_loop = NewLIR0(kPseudoTargetLabel);
814 early_exit->target = exit_loop;
815
Serban Constantinescu169489b2014-06-11 16:43:35 +0100816 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
Serban Constantinescu63fe93d2014-06-30 17:10:28 +0100817 NewLIR4(kA64Csinc4rrrc, rl_result.reg.GetReg(), rwzr, rwzr, kArmCondNe);
Serban Constantinescu169489b2014-06-11 16:43:35 +0100818
Matteo Franchin43ec8732014-03-31 15:00:14 +0100819 FreeTemp(r_tmp); // Now unneeded.
Serban Constantinescu169489b2014-06-11 16:43:35 +0100820 FreeTemp(r_ptr); // Now unneeded.
Matteo Franchin43ec8732014-03-31 15:00:14 +0100821
822 StoreValue(rl_dest, rl_result);
823
Matteo Franchin43ec8732014-03-31 15:00:14 +0100824 return true;
825}
826
Zheng Xu947717a2014-08-07 14:05:23 +0800827bool Arm64Mir2Lir::GenInlinedArrayCopyCharArray(CallInfo* info) {
828 constexpr int kLargeArrayThreshold = 512;
829
830 RegLocation rl_src = info->args[0];
831 RegLocation rl_src_pos = info->args[1];
832 RegLocation rl_dst = info->args[2];
833 RegLocation rl_dst_pos = info->args[3];
834 RegLocation rl_length = info->args[4];
835 // Compile time check, handle exception by non-inline method to reduce related meta-data.
836 if ((rl_src_pos.is_const && (mir_graph_->ConstantValue(rl_src_pos) < 0)) ||
837 (rl_dst_pos.is_const && (mir_graph_->ConstantValue(rl_dst_pos) < 0)) ||
838 (rl_length.is_const && (mir_graph_->ConstantValue(rl_length) < 0))) {
839 return false;
840 }
841
842 ClobberCallerSave();
843 LockCallTemps(); // Prepare for explicit register usage.
844 RegStorage rs_src = rs_x0;
845 RegStorage rs_dst = rs_x1;
846 LoadValueDirectFixed(rl_src, rs_src);
847 LoadValueDirectFixed(rl_dst, rs_dst);
848
849 // Handle null pointer exception in slow-path.
850 LIR* src_check_branch = OpCmpImmBranch(kCondEq, rs_src, 0, nullptr);
851 LIR* dst_check_branch = OpCmpImmBranch(kCondEq, rs_dst, 0, nullptr);
852 // Handle potential overlapping in slow-path.
853 // TUNING: Support overlapping cases.
854 LIR* src_dst_same = OpCmpBranch(kCondEq, rs_src, rs_dst, nullptr);
855 // Handle exception or big length in slow-path.
856 RegStorage rs_length = rs_w2;
857 LoadValueDirectFixed(rl_length, rs_length);
858 LIR* len_neg_or_too_big = OpCmpImmBranch(kCondHi, rs_length, kLargeArrayThreshold, nullptr);
859 // Src bounds check.
860 RegStorage rs_src_pos = rs_w3;
861 RegStorage rs_arr_length = rs_w4;
862 LoadValueDirectFixed(rl_src_pos, rs_src_pos);
863 LIR* src_pos_negative = OpCmpImmBranch(kCondLt, rs_src_pos, 0, nullptr);
864 Load32Disp(rs_src, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
865 OpRegReg(kOpSub, rs_arr_length, rs_src_pos);
866 LIR* src_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
867 // Dst bounds check.
868 RegStorage rs_dst_pos = rs_w5;
869 LoadValueDirectFixed(rl_dst_pos, rs_dst_pos);
870 LIR* dst_pos_negative = OpCmpImmBranch(kCondLt, rs_dst_pos, 0, nullptr);
871 Load32Disp(rs_dst, mirror::Array::LengthOffset().Int32Value(), rs_arr_length);
872 OpRegReg(kOpSub, rs_arr_length, rs_dst_pos);
873 LIR* dst_bad_len = OpCmpBranch(kCondLt, rs_arr_length, rs_length, nullptr);
874
875 // Everything is checked now.
876 // Set rs_src to the address of the first element to be copied.
877 rs_src_pos = As64BitReg(rs_src_pos);
878 OpRegImm(kOpAdd, rs_src, mirror::Array::DataOffset(2).Int32Value());
879 OpRegRegImm(kOpLsl, rs_src_pos, rs_src_pos, 1);
880 OpRegReg(kOpAdd, rs_src, rs_src_pos);
881 // Set rs_src to the address of the first element to be copied.
882 rs_dst_pos = As64BitReg(rs_dst_pos);
883 OpRegImm(kOpAdd, rs_dst, mirror::Array::DataOffset(2).Int32Value());
884 OpRegRegImm(kOpLsl, rs_dst_pos, rs_dst_pos, 1);
885 OpRegReg(kOpAdd, rs_dst, rs_dst_pos);
886
887 // rs_arr_length won't be not used anymore.
888 RegStorage rs_tmp = rs_arr_length;
889 // Use 64-bit view since rs_length will be used as index.
890 rs_length = As64BitReg(rs_length);
891 OpRegRegImm(kOpLsl, rs_length, rs_length, 1);
892
893 // Copy one element.
Zheng Xu5d7cdec2014-08-18 17:28:22 +0800894 LIR* jmp_to_copy_two = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 1, 0);
Zheng Xu947717a2014-08-07 14:05:23 +0800895 OpRegImm(kOpSub, rs_length, 2);
896 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, kSignedHalf);
897 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, kSignedHalf);
898
899 // Copy two elements.
900 LIR *copy_two = NewLIR0(kPseudoTargetLabel);
Zheng Xu5d7cdec2014-08-18 17:28:22 +0800901 LIR* jmp_to_copy_four = NewLIR3(WIDE(kA64Tbz3rht), rs_length.GetReg(), 2, 0);
Zheng Xu947717a2014-08-07 14:05:23 +0800902 OpRegImm(kOpSub, rs_length, 4);
903 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k32);
904 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k32);
905
906 // Copy four elements.
907 LIR *copy_four = NewLIR0(kPseudoTargetLabel);
908 LIR* jmp_to_ret = OpCmpImmBranch(kCondEq, rs_length, 0, nullptr);
909 LIR *begin_loop = NewLIR0(kPseudoTargetLabel);
910 OpRegImm(kOpSub, rs_length, 8);
911 rs_tmp = As64BitReg(rs_tmp);
912 LoadBaseIndexed(rs_src, rs_length, rs_tmp, 0, k64);
913 StoreBaseIndexed(rs_dst, rs_length, rs_tmp, 0, k64);
914 LIR* jmp_to_loop = OpCmpImmBranch(kCondNe, rs_length, 0, nullptr);
915 LIR* loop_finished = OpUnconditionalBranch(nullptr);
916
917 LIR *check_failed = NewLIR0(kPseudoTargetLabel);
918 LIR* launchpad_branch = OpUnconditionalBranch(nullptr);
919 LIR* return_point = NewLIR0(kPseudoTargetLabel);
920
921 src_check_branch->target = check_failed;
922 dst_check_branch->target = check_failed;
923 src_dst_same->target = check_failed;
924 len_neg_or_too_big->target = check_failed;
925 src_pos_negative->target = check_failed;
926 src_bad_len->target = check_failed;
927 dst_pos_negative->target = check_failed;
928 dst_bad_len->target = check_failed;
929 jmp_to_copy_two->target = copy_two;
930 jmp_to_copy_four->target = copy_four;
931 jmp_to_ret->target = return_point;
932 jmp_to_loop->target = begin_loop;
933 loop_finished->target = return_point;
934
935 AddIntrinsicSlowPath(info, launchpad_branch, return_point);
Serguei Katkov9863daf2014-09-04 15:21:32 +0700936 ClobberCallerSave(); // We must clobber everything because slow path will return here
Zheng Xu947717a2014-08-07 14:05:23 +0800937
938 return true;
939}
940
Vladimir Markof6737f72015-03-23 17:05:14 +0000941void Arm64Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
Serban Constantinescu63999682014-07-15 17:44:21 +0100942 ScopedMemRefType mem_ref_type(this, ResourceMask::kLiteral);
Vladimir Markof6737f72015-03-23 17:05:14 +0000943 LIR* lir = NewLIR2(kA64Ldr2rp, As32BitReg(reg).GetReg(), 0);
944 lir->target = target;
Matteo Franchin43ec8732014-03-31 15:00:14 +0100945}
946
Vladimir Marko20f85592015-03-19 10:07:02 +0000947bool Arm64Mir2Lir::CanUseOpPcRelDexCacheArrayLoad() const {
948 if (cu_->compiler_driver->GetInstructionSetFeatures()->AsArm64InstructionSetFeatures()
949 ->NeedFixCortexA53_843419()) {
950 // TODO: Implement link-time workaround in OatWriter so that we can use ADRP on Cortex-A53.
951 return false;
952 }
953 return dex_cache_arrays_layout_.Valid();
954}
955
956void Arm64Mir2Lir::OpPcRelDexCacheArrayLoad(const DexFile* dex_file, int offset,
957 RegStorage r_dest) {
958 LIR* adrp = NewLIR2(kA64Adrp2xd, r_dest.GetReg(), 0);
959 adrp->operands[2] = WrapPointer(dex_file);
960 adrp->operands[3] = offset;
961 adrp->operands[4] = WrapPointer(adrp);
962 dex_cache_access_insns_.push_back(adrp);
963 LIR* ldr = LoadBaseDisp(r_dest, 0, r_dest, kReference, kNotVolatile);
964 ldr->operands[4] = adrp->operands[4];
965 ldr->flags.fixup = kFixupLabel;
966 dex_cache_access_insns_.push_back(ldr);
967}
968
Matteo Franchin43ec8732014-03-31 15:00:14 +0100969LIR* Arm64Mir2Lir::OpVldm(RegStorage r_base, int count) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700970 UNUSED(r_base, count);
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100971 LOG(FATAL) << "Unexpected use of OpVldm for Arm64";
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700972 UNREACHABLE();
Matteo Franchin43ec8732014-03-31 15:00:14 +0100973}
974
975LIR* Arm64Mir2Lir::OpVstm(RegStorage r_base, int count) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700976 UNUSED(r_base, count);
Matteo Franchine45fb9e2014-05-06 10:10:30 +0100977 LOG(FATAL) << "Unexpected use of OpVstm for Arm64";
Ian Rogers6a3c1fc2014-10-31 00:33:20 -0700978 UNREACHABLE();
Matteo Franchin43ec8732014-03-31 15:00:14 +0100979}
980
Ningsheng Jiana262f772014-11-25 16:48:07 +0800981void Arm64Mir2Lir::GenMaddMsubInt(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
982 RegLocation rl_src3, bool is_sub) {
983 rl_src1 = LoadValue(rl_src1, kCoreReg);
984 rl_src2 = LoadValue(rl_src2, kCoreReg);
985 rl_src3 = LoadValue(rl_src3, kCoreReg);
986 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
987 NewLIR4(is_sub ? kA64Msub4rrrr : kA64Madd4rrrr, rl_result.reg.GetReg(), rl_src1.reg.GetReg(),
988 rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
989 StoreValue(rl_dest, rl_result);
990}
991
992void Arm64Mir2Lir::GenMaddMsubLong(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
993 RegLocation rl_src3, bool is_sub) {
994 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
995 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
996 rl_src3 = LoadValueWide(rl_src3, kCoreReg);
997 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
998 NewLIR4(is_sub ? WIDE(kA64Msub4rrrr) : WIDE(kA64Madd4rrrr), rl_result.reg.GetReg(),
999 rl_src1.reg.GetReg(), rl_src2.reg.GetReg(), rl_src3.reg.GetReg());
1000 StoreValueWide(rl_dest, rl_result);
1001}
1002
Matteo Franchin43ec8732014-03-31 15:00:14 +01001003void Arm64Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
Ian Rogers6a3c1fc2014-10-31 00:33:20 -07001004 RegLocation rl_result, int lit ATTRIBUTE_UNUSED,
1005 int first_bit, int second_bit) {
Ningsheng Jiana262f772014-11-25 16:48:07 +08001006 OpRegRegRegShift(kOpAdd, rl_result.reg, rl_src.reg, rl_src.reg,
1007 EncodeShift(kA64Lsl, second_bit - first_bit));
Matteo Franchin43ec8732014-03-31 15:00:14 +01001008 if (first_bit != 0) {
1009 OpRegRegImm(kOpLsl, rl_result.reg, rl_result.reg, first_bit);
1010 }
1011}
1012
Ian Rogers6a3c1fc2014-10-31 00:33:20 -07001013void Arm64Mir2Lir::GenDivZeroCheckWide(RegStorage reg ATTRIBUTE_UNUSED) {
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001014 LOG(FATAL) << "Unexpected use of GenDivZero for Arm64";
Matteo Franchin43ec8732014-03-31 15:00:14 +01001015}
1016
1017// Test suspend flag, return target of taken suspend branch
1018LIR* Arm64Mir2Lir::OpTestSuspend(LIR* target) {
Zheng Xubaa7c882014-06-30 14:26:50 +08001019 NewLIR3(kA64Subs3rRd, rwSUSPEND, rwSUSPEND, 1);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001020 return OpCondBranch((target == NULL) ? kCondEq : kCondNe, target);
1021}
1022
1023// Decrement register and branch on condition
1024LIR* Arm64Mir2Lir::OpDecAndBranch(ConditionCode c_code, RegStorage reg, LIR* target) {
buzbee33ae5582014-06-12 14:56:32 -07001025 // Combine sub & test using sub setflags encoding here. We need to make sure a
1026 // subtract form that sets carry is used, so generate explicitly.
1027 // TODO: might be best to add a new op, kOpSubs, and handle it generically.
Matteo Franchin4163c532014-07-15 15:20:27 +01001028 A64Opcode opcode = reg.Is64Bit() ? WIDE(kA64Subs3rRd) : UNWIDE(kA64Subs3rRd);
buzbee33ae5582014-06-12 14:56:32 -07001029 NewLIR3(opcode, reg.GetReg(), reg.GetReg(), 1); // For value == 1, this should set flags.
Vladimir Marko8dea81c2014-06-06 14:50:36 +01001030 DCHECK(last_lir_insn_->u.m.def_mask->HasBit(ResourceMask::kCCode));
Matteo Franchin43ec8732014-03-31 15:00:14 +01001031 return OpCondBranch(c_code, target);
1032}
1033
Andreas Gampeb14329f2014-05-15 11:16:06 -07001034bool Arm64Mir2Lir::GenMemBarrier(MemBarrierKind barrier_kind) {
Andreas Gampe0b9203e2015-01-22 20:39:27 -08001035 if (!cu_->compiler_driver->GetInstructionSetFeatures()->IsSmp()) {
Elliott Hughes8366ca02014-11-17 12:02:05 -08001036 return false;
1037 }
Matteo Franchin43ec8732014-03-31 15:00:14 +01001038 // Start off with using the last LIR as the barrier. If it is not enough, then we will generate one.
1039 LIR* barrier = last_lir_insn_;
1040
1041 int dmb_flavor;
1042 // TODO: revisit Arm barrier kinds
1043 switch (barrier_kind) {
Hans Boehm48f5c472014-06-27 14:50:10 -07001044 case kAnyStore: dmb_flavor = kISH; break;
1045 case kLoadAny: dmb_flavor = kISH; break;
1046 // We conjecture that kISHLD is insufficient. It is documented
1047 // to provide LoadLoad | StoreStore ordering. But if this were used
1048 // to implement volatile loads, we suspect that the lack of store
1049 // atomicity on ARM would cause us to allow incorrect results for
1050 // the canonical IRIW example. But we're not sure.
1051 // We should be using acquire loads instead.
Matteo Franchin43ec8732014-03-31 15:00:14 +01001052 case kStoreStore: dmb_flavor = kISHST; break;
Hans Boehm48f5c472014-06-27 14:50:10 -07001053 case kAnyAny: dmb_flavor = kISH; break;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001054 default:
1055 LOG(FATAL) << "Unexpected MemBarrierKind: " << barrier_kind;
1056 dmb_flavor = kSY; // quiet gcc.
1057 break;
1058 }
1059
Andreas Gampeb14329f2014-05-15 11:16:06 -07001060 bool ret = false;
1061
Matteo Franchin43ec8732014-03-31 15:00:14 +01001062 // If the same barrier already exists, don't generate another.
1063 if (barrier == nullptr
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001064 || (barrier->opcode != kA64Dmb1B || barrier->operands[0] != dmb_flavor)) {
1065 barrier = NewLIR1(kA64Dmb1B, dmb_flavor);
Andreas Gampeb14329f2014-05-15 11:16:06 -07001066 ret = true;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001067 }
1068
1069 // At this point we must have a memory barrier. Mark it as a scheduling barrier as well.
1070 DCHECK(!barrier->flags.use_def_invalid);
Vladimir Marko8dea81c2014-06-06 14:50:36 +01001071 barrier->u.m.def_mask = &kEncodeAll;
Andreas Gampeb14329f2014-05-15 11:16:06 -07001072 return ret;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001073}
1074
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001075void Arm64Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
1076 RegLocation rl_result;
1077
1078 rl_src = LoadValue(rl_src, kCoreReg);
1079 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
Andreas Gampe4b537a82014-06-30 22:24:53 -07001080 NewLIR4(WIDE(kA64Sbfm4rrdd), rl_result.reg.GetReg(), As64BitReg(rl_src.reg).GetReg(), 0, 31);
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001081 StoreValueWide(rl_dest, rl_result);
1082}
1083
1084void Arm64Mir2Lir::GenDivRemLong(Instruction::Code opcode, RegLocation rl_dest,
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001085 RegLocation rl_src1, RegLocation rl_src2, bool is_div, int flags) {
Matteo Franchin7c6c2ac2014-07-01 18:03:08 +01001086 if (rl_src2.is_const) {
1087 DCHECK(rl_src2.wide);
1088 int64_t lit = mir_graph_->ConstantValueWide(rl_src2);
1089 if (HandleEasyDivRem64(opcode, is_div, rl_src1, rl_dest, lit)) {
1090 return;
1091 }
1092 }
1093
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001094 RegLocation rl_result;
1095 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1096 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001097 if ((flags & MIR_IGNORE_DIV_ZERO_CHECK) == 0) {
1098 GenDivZeroCheck(rl_src2.reg);
1099 }
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001100 rl_result = GenDivRem(rl_dest, rl_src1.reg, rl_src2.reg, is_div);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001101 StoreValueWide(rl_dest, rl_result);
1102}
1103
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001104void Arm64Mir2Lir::GenLongOp(OpKind op, RegLocation rl_dest, RegLocation rl_src1,
1105 RegLocation rl_src2) {
1106 RegLocation rl_result;
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001107
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001108 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1109 rl_src2 = LoadValueWide(rl_src2, kCoreReg);
1110 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001111 OpRegRegRegShift(op, rl_result.reg, rl_src1.reg, rl_src2.reg, ENCODE_NO_SHIFT);
1112 StoreValueWide(rl_dest, rl_result);
1113}
1114
1115void Arm64Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
1116 RegLocation rl_result;
1117
1118 rl_src = LoadValueWide(rl_src, kCoreReg);
1119 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1120 OpRegRegShift(kOpNeg, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
1121 StoreValueWide(rl_dest, rl_result);
1122}
1123
1124void Arm64Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
1125 RegLocation rl_result;
1126
1127 rl_src = LoadValueWide(rl_src, kCoreReg);
1128 rl_result = EvalLocWide(rl_dest, kCoreReg, true);
1129 OpRegRegShift(kOpMvn, rl_result.reg, rl_src.reg, ENCODE_NO_SHIFT);
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001130 StoreValueWide(rl_dest, rl_result);
1131}
1132
Andreas Gampec76c6142014-08-04 16:30:03 -07001133void Arm64Mir2Lir::GenArithOpLong(Instruction::Code opcode, RegLocation rl_dest,
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001134 RegLocation rl_src1, RegLocation rl_src2, int flags) {
Andreas Gampec76c6142014-08-04 16:30:03 -07001135 switch (opcode) {
1136 case Instruction::NOT_LONG:
1137 GenNotLong(rl_dest, rl_src2);
1138 return;
1139 case Instruction::ADD_LONG:
1140 case Instruction::ADD_LONG_2ADDR:
1141 GenLongOp(kOpAdd, rl_dest, rl_src1, rl_src2);
1142 return;
1143 case Instruction::SUB_LONG:
1144 case Instruction::SUB_LONG_2ADDR:
1145 GenLongOp(kOpSub, rl_dest, rl_src1, rl_src2);
1146 return;
1147 case Instruction::MUL_LONG:
1148 case Instruction::MUL_LONG_2ADDR:
1149 GenLongOp(kOpMul, rl_dest, rl_src1, rl_src2);
1150 return;
1151 case Instruction::DIV_LONG:
1152 case Instruction::DIV_LONG_2ADDR:
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001153 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true, flags);
Andreas Gampec76c6142014-08-04 16:30:03 -07001154 return;
1155 case Instruction::REM_LONG:
1156 case Instruction::REM_LONG_2ADDR:
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001157 GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false, flags);
Andreas Gampec76c6142014-08-04 16:30:03 -07001158 return;
1159 case Instruction::AND_LONG_2ADDR:
1160 case Instruction::AND_LONG:
1161 GenLongOp(kOpAnd, rl_dest, rl_src1, rl_src2);
1162 return;
1163 case Instruction::OR_LONG:
1164 case Instruction::OR_LONG_2ADDR:
1165 GenLongOp(kOpOr, rl_dest, rl_src1, rl_src2);
1166 return;
1167 case Instruction::XOR_LONG:
1168 case Instruction::XOR_LONG_2ADDR:
1169 GenLongOp(kOpXor, rl_dest, rl_src1, rl_src2);
1170 return;
1171 case Instruction::NEG_LONG: {
1172 GenNegLong(rl_dest, rl_src2);
1173 return;
1174 }
1175 default:
1176 LOG(FATAL) << "Invalid long arith op";
1177 return;
1178 }
Matteo Franchin43ec8732014-03-31 15:00:14 +01001179}
1180
1181/*
1182 * Generate array load
1183 */
1184void Arm64Mir2Lir::GenArrayGet(int opt_flags, OpSize size, RegLocation rl_array,
1185 RegLocation rl_index, RegLocation rl_dest, int scale) {
1186 RegisterClass reg_class = RegClassBySize(size);
1187 int len_offset = mirror::Array::LengthOffset().Int32Value();
1188 int data_offset;
1189 RegLocation rl_result;
1190 bool constant_index = rl_index.is_const;
buzbeea0cd2d72014-06-01 09:33:49 -07001191 rl_array = LoadValue(rl_array, kRefReg);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001192 if (!constant_index) {
1193 rl_index = LoadValue(rl_index, kCoreReg);
1194 }
1195
1196 if (rl_dest.wide) {
1197 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1198 } else {
1199 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1200 }
1201
Matteo Franchin43ec8732014-03-31 15:00:14 +01001202 /* null object? */
1203 GenNullCheck(rl_array.reg, opt_flags);
1204
1205 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1206 RegStorage reg_len;
1207 if (needs_range_check) {
1208 reg_len = AllocTemp();
1209 /* Get len */
1210 Load32Disp(rl_array.reg, len_offset, reg_len);
1211 MarkPossibleNullPointerException(opt_flags);
1212 } else {
1213 ForceImplicitNullCheck(rl_array.reg, opt_flags);
1214 }
Vladimir Markoe08785b2014-11-07 16:11:00 +00001215 if (constant_index) {
Matteo Franchin43ec8732014-03-31 15:00:14 +01001216 rl_result = EvalLoc(rl_dest, reg_class, true);
1217
1218 if (needs_range_check) {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001219 GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001220 FreeTemp(reg_len);
1221 }
Vladimir Markoe08785b2014-11-07 16:11:00 +00001222 // Fold the constant index into the data offset.
1223 data_offset += mir_graph_->ConstantValue(rl_index) << scale;
Andreas Gampe3c12c512014-06-24 18:46:29 +00001224 if (rl_result.ref) {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001225 LoadRefDisp(rl_array.reg, data_offset, rl_result.reg, kNotVolatile);
Andreas Gampe3c12c512014-06-24 18:46:29 +00001226 } else {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001227 LoadBaseDisp(rl_array.reg, data_offset, rl_result.reg, size, kNotVolatile);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001228 }
1229 } else {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001230 // Offset base, then use indexed load.
buzbeea0cd2d72014-06-01 09:33:49 -07001231 RegStorage reg_ptr = AllocTempRef();
Matteo Franchin43ec8732014-03-31 15:00:14 +01001232 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
1233 FreeTemp(rl_array.reg);
1234 rl_result = EvalLoc(rl_dest, reg_class, true);
1235
1236 if (needs_range_check) {
1237 GenArrayBoundsCheck(rl_index.reg, reg_len);
1238 FreeTemp(reg_len);
1239 }
Andreas Gampe3c12c512014-06-24 18:46:29 +00001240 if (rl_result.ref) {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001241 LoadRefIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale);
Andreas Gampe3c12c512014-06-24 18:46:29 +00001242 } else {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001243 LoadBaseIndexed(reg_ptr, rl_index.reg, rl_result.reg, scale, size);
Andreas Gampe3c12c512014-06-24 18:46:29 +00001244 }
Matteo Franchin43ec8732014-03-31 15:00:14 +01001245 FreeTemp(reg_ptr);
Vladimir Markoe08785b2014-11-07 16:11:00 +00001246 }
1247 if (rl_dest.wide) {
1248 StoreValueWide(rl_dest, rl_result);
1249 } else {
Matteo Franchin43ec8732014-03-31 15:00:14 +01001250 StoreValue(rl_dest, rl_result);
1251 }
1252}
1253
1254/*
1255 * Generate array store
1256 *
1257 */
1258void Arm64Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
1259 RegLocation rl_index, RegLocation rl_src, int scale, bool card_mark) {
1260 RegisterClass reg_class = RegClassBySize(size);
1261 int len_offset = mirror::Array::LengthOffset().Int32Value();
1262 bool constant_index = rl_index.is_const;
1263
1264 int data_offset;
1265 if (size == k64 || size == kDouble) {
1266 data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Int32Value();
1267 } else {
1268 data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Int32Value();
1269 }
1270
buzbeea0cd2d72014-06-01 09:33:49 -07001271 rl_array = LoadValue(rl_array, kRefReg);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001272 if (!constant_index) {
1273 rl_index = LoadValue(rl_index, kCoreReg);
1274 }
1275
1276 RegStorage reg_ptr;
1277 bool allocated_reg_ptr_temp = false;
1278 if (constant_index) {
1279 reg_ptr = rl_array.reg;
1280 } else if (IsTemp(rl_array.reg) && !card_mark) {
1281 Clobber(rl_array.reg);
1282 reg_ptr = rl_array.reg;
1283 } else {
1284 allocated_reg_ptr_temp = true;
buzbeea0cd2d72014-06-01 09:33:49 -07001285 reg_ptr = AllocTempRef();
Matteo Franchin43ec8732014-03-31 15:00:14 +01001286 }
1287
1288 /* null object? */
1289 GenNullCheck(rl_array.reg, opt_flags);
1290
1291 bool needs_range_check = (!(opt_flags & MIR_IGNORE_RANGE_CHECK));
1292 RegStorage reg_len;
1293 if (needs_range_check) {
1294 reg_len = AllocTemp();
1295 // NOTE: max live temps(4) here.
1296 /* Get len */
1297 Load32Disp(rl_array.reg, len_offset, reg_len);
1298 MarkPossibleNullPointerException(opt_flags);
1299 } else {
1300 ForceImplicitNullCheck(rl_array.reg, opt_flags);
1301 }
1302 /* at this point, reg_ptr points to array, 2 live temps */
Vladimir Markoe08785b2014-11-07 16:11:00 +00001303 if (rl_src.wide) {
1304 rl_src = LoadValueWide(rl_src, reg_class);
1305 } else {
1306 rl_src = LoadValue(rl_src, reg_class);
1307 }
1308 if (constant_index) {
Matteo Franchin43ec8732014-03-31 15:00:14 +01001309 if (needs_range_check) {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001310 GenArrayBoundsCheck(mir_graph_->ConstantValue(rl_index), reg_len);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001311 FreeTemp(reg_len);
1312 }
Vladimir Markoe08785b2014-11-07 16:11:00 +00001313 // Fold the constant index into the data offset.
1314 data_offset += mir_graph_->ConstantValue(rl_index) << scale;
Andreas Gampe3c12c512014-06-24 18:46:29 +00001315 if (rl_src.ref) {
1316 StoreRefDisp(reg_ptr, data_offset, rl_src.reg, kNotVolatile);
1317 } else {
1318 StoreBaseDisp(reg_ptr, data_offset, rl_src.reg, size, kNotVolatile);
1319 }
Matteo Franchin43ec8732014-03-31 15:00:14 +01001320 } else {
1321 /* reg_ptr -> array data */
1322 OpRegRegImm(kOpAdd, reg_ptr, rl_array.reg, data_offset);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001323 if (needs_range_check) {
1324 GenArrayBoundsCheck(rl_index.reg, reg_len);
1325 FreeTemp(reg_len);
1326 }
Andreas Gampe3c12c512014-06-24 18:46:29 +00001327 if (rl_src.ref) {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001328 StoreRefIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale);
Andreas Gampe3c12c512014-06-24 18:46:29 +00001329 } else {
Vladimir Markoe08785b2014-11-07 16:11:00 +00001330 StoreBaseIndexed(reg_ptr, rl_index.reg, rl_src.reg, scale, size);
Andreas Gampe3c12c512014-06-24 18:46:29 +00001331 }
Matteo Franchin43ec8732014-03-31 15:00:14 +01001332 }
1333 if (allocated_reg_ptr_temp) {
1334 FreeTemp(reg_ptr);
1335 }
1336 if (card_mark) {
Vladimir Marko743b98c2014-11-24 19:45:41 +00001337 MarkGCCard(opt_flags, rl_src.reg, rl_array.reg);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001338 }
1339}
1340
Matteo Franchin43ec8732014-03-31 15:00:14 +01001341void Arm64Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode,
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001342 RegLocation rl_dest, RegLocation rl_src, RegLocation rl_shift,
Ian Rogers6a3c1fc2014-10-31 00:33:20 -07001343 int flags ATTRIBUTE_UNUSED) {
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001344 OpKind op = kOpBkpt;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001345 // Per spec, we only care about low 6 bits of shift amount.
1346 int shift_amount = mir_graph_->ConstantValue(rl_shift) & 0x3f;
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001347 rl_src = LoadValueWide(rl_src, kCoreReg);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001348 if (shift_amount == 0) {
1349 StoreValueWide(rl_dest, rl_src);
1350 return;
1351 }
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001352
1353 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001354 switch (opcode) {
1355 case Instruction::SHL_LONG:
1356 case Instruction::SHL_LONG_2ADDR:
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001357 op = kOpLsl;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001358 break;
1359 case Instruction::SHR_LONG:
1360 case Instruction::SHR_LONG_2ADDR:
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001361 op = kOpAsr;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001362 break;
1363 case Instruction::USHR_LONG:
1364 case Instruction::USHR_LONG_2ADDR:
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001365 op = kOpLsr;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001366 break;
1367 default:
1368 LOG(FATAL) << "Unexpected case";
1369 }
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001370 OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001371 StoreValueWide(rl_dest, rl_result);
1372}
1373
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001374void Arm64Mir2Lir::GenArithImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001375 RegLocation rl_src1, RegLocation rl_src2, int flags) {
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001376 OpKind op = kOpBkpt;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001377 switch (opcode) {
1378 case Instruction::ADD_LONG:
1379 case Instruction::ADD_LONG_2ADDR:
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001380 op = kOpAdd;
1381 break;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001382 case Instruction::SUB_LONG:
1383 case Instruction::SUB_LONG_2ADDR:
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001384 op = kOpSub;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001385 break;
1386 case Instruction::AND_LONG:
1387 case Instruction::AND_LONG_2ADDR:
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001388 op = kOpAnd;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001389 break;
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001390 case Instruction::OR_LONG:
1391 case Instruction::OR_LONG_2ADDR:
1392 op = kOpOr;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001393 break;
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001394 case Instruction::XOR_LONG:
1395 case Instruction::XOR_LONG_2ADDR:
1396 op = kOpXor;
1397 break;
Matteo Franchin43ec8732014-03-31 15:00:14 +01001398 default:
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001399 LOG(FATAL) << "Unexpected opcode";
Matteo Franchin43ec8732014-03-31 15:00:14 +01001400 }
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001401
Matteo Franchinc763e352014-07-04 12:53:27 +01001402 if (op == kOpSub) {
1403 if (!rl_src2.is_const) {
Razvan A Lupusoru5c5676b2014-09-29 16:42:11 -07001404 return GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2, flags);
Matteo Franchinc763e352014-07-04 12:53:27 +01001405 }
1406 } else {
1407 // Associativity.
1408 if (!rl_src2.is_const) {
1409 DCHECK(rl_src1.is_const);
1410 std::swap(rl_src1, rl_src2);
1411 }
1412 }
1413 DCHECK(rl_src2.is_const);
1414 int64_t val = mir_graph_->ConstantValueWide(rl_src2);
1415
Serban Constantinescued65c5e2014-05-22 15:10:18 +01001416 rl_src1 = LoadValueWide(rl_src1, kCoreReg);
1417 RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
Zheng Xue2eb29e2014-06-12 10:22:33 +08001418 OpRegRegImm64(op, rl_result.reg, rl_src1.reg, val);
Matteo Franchin43ec8732014-03-31 15:00:14 +01001419 StoreValueWide(rl_dest, rl_result);
1420}
1421
Andreas Gampef29ecd62014-07-29 00:35:00 -07001422static uint32_t ExtractReg(uint32_t reg_mask, int* reg) {
1423 // Find first register.
1424 int first_bit_set = CTZ(reg_mask) + 1;
1425 *reg = *reg + first_bit_set;
1426 reg_mask >>= first_bit_set;
1427 return reg_mask;
1428}
1429
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001430/**
1431 * @brief Split a register list in pairs or registers.
1432 *
1433 * Given a list of registers in @p reg_mask, split the list in pairs. Use as follows:
1434 * @code
1435 * int reg1 = -1, reg2 = -1;
1436 * while (reg_mask) {
1437 * reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1438 * if (UNLIKELY(reg2 < 0)) {
1439 * // Single register in reg1.
1440 * } else {
1441 * // Pair in reg1, reg2.
1442 * }
1443 * }
1444 * @endcode
1445 */
Andreas Gampef29ecd62014-07-29 00:35:00 -07001446static uint32_t GenPairWise(uint32_t reg_mask, int* reg1, int* reg2) {
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001447 // Find first register.
Andreas Gampef29ecd62014-07-29 00:35:00 -07001448 int first_bit_set = CTZ(reg_mask) + 1;
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001449 int reg = *reg1 + first_bit_set;
1450 reg_mask >>= first_bit_set;
1451
1452 if (LIKELY(reg_mask)) {
1453 // Save the first register, find the second and use the pair opcode.
Andreas Gampef29ecd62014-07-29 00:35:00 -07001454 int second_bit_set = CTZ(reg_mask) + 1;
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001455 *reg2 = reg;
1456 reg_mask >>= second_bit_set;
1457 *reg1 = reg + second_bit_set;
1458 return reg_mask;
1459 }
1460
1461 // Use the single opcode, as we just have one register.
1462 *reg1 = reg;
1463 *reg2 = -1;
1464 return reg_mask;
1465}
1466
Andreas Gampef29ecd62014-07-29 00:35:00 -07001467static void SpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001468 int reg1 = -1, reg2 = -1;
Matteo Franchinbc6d1972014-05-13 12:33:28 +01001469 const int reg_log2_size = 3;
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001470
Matteo Franchinbc6d1972014-05-13 12:33:28 +01001471 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001472 reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1473 if (UNLIKELY(reg2 < 0)) {
Andreas Gampef29ecd62014-07-29 00:35:00 -07001474 m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001475 } else {
Andreas Gampef29ecd62014-07-29 00:35:00 -07001476 m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1477 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
Matteo Franchinbc6d1972014-05-13 12:33:28 +01001478 }
1479 }
1480}
1481
1482// TODO(Arm64): consider using ld1 and st1?
Andreas Gampef29ecd62014-07-29 00:35:00 -07001483static void SpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
Matteo Franchinbc6d1972014-05-13 12:33:28 +01001484 int reg1 = -1, reg2 = -1;
1485 const int reg_log2_size = 3;
1486
1487 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1488 reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1489 if (UNLIKELY(reg2 < 0)) {
Matteo Franchin4163c532014-07-15 15:20:27 +01001490 m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
Andreas Gampef29ecd62014-07-29 00:35:00 -07001491 offset);
Matteo Franchinbc6d1972014-05-13 12:33:28 +01001492 } else {
Andreas Gampef29ecd62014-07-29 00:35:00 -07001493 m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1494 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
Matteo Franchine45fb9e2014-05-06 10:10:30 +01001495 }
1496 }
1497}
1498
Ian Rogers6a3c1fc2014-10-31 00:33:20 -07001499static int SpillRegsPreSub(Arm64Mir2Lir* m2l, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1500 int frame_size) {
Andreas Gampef29ecd62014-07-29 00:35:00 -07001501 m2l->OpRegRegImm(kOpSub, rs_sp, rs_sp, frame_size);
1502
1503 int core_count = POPCOUNT(core_reg_mask);
1504
1505 if (fp_reg_mask != 0) {
1506 // Spill FP regs.
1507 int fp_count = POPCOUNT(fp_reg_mask);
1508 int spill_offset = frame_size - (core_count + fp_count) * kArm64PointerSize;
1509 SpillFPRegs(m2l, rs_sp, spill_offset, fp_reg_mask);
1510 }
1511
1512 if (core_reg_mask != 0) {
1513 // Spill core regs.
1514 int spill_offset = frame_size - (core_count * kArm64PointerSize);
1515 SpillCoreRegs(m2l, rs_sp, spill_offset, core_reg_mask);
1516 }
1517
1518 return frame_size;
1519}
1520
1521static int SpillRegsPreIndexed(Arm64Mir2Lir* m2l, RegStorage base, uint32_t core_reg_mask,
Ian Rogers6a3c1fc2014-10-31 00:33:20 -07001522 uint32_t fp_reg_mask) {
Andreas Gampef29ecd62014-07-29 00:35:00 -07001523 // Otherwise, spill both core and fp regs at the same time.
1524 // The very first instruction will be an stp with pre-indexed address, moving the stack pointer
1525 // down. From then on, we fill upwards. This will generate overall the same number of instructions
1526 // as the specialized code above in most cases (exception being odd number of core and even
1527 // non-zero fp spills), but is more flexible, as the offsets are guaranteed small.
1528 //
1529 // Some demonstrative fill cases : (c) = core, (f) = fp
1530 // cc 44 cc 44 cc 22 cc 33 fc => 1[1/2]
1531 // fc => 23 fc => 23 ff => 11 ff => 22
1532 // ff 11 f 11 f 11
1533 //
1534 int reg1 = -1, reg2 = -1;
1535 int core_count = POPCOUNT(core_reg_mask);
1536 int fp_count = POPCOUNT(fp_reg_mask);
1537
1538 int combined = fp_count + core_count;
1539 int all_offset = RoundUp(combined, 2); // Needs to be 16B = 2-reg aligned.
1540
1541 int cur_offset = 2; // What's the starting offset after the first stp? We expect the base slot
1542 // to be filled.
1543
1544 // First figure out whether the bottom is FP or core.
1545 if (fp_count > 0) {
1546 // Some FP spills.
1547 //
1548 // Four cases: (d0 is dummy to fill up stp)
1549 // 1) Single FP, even number of core -> stp d0, fp_reg
1550 // 2) Single FP, odd number of core -> stp fp_reg, d0
1551 // 3) More FP, even number combined -> stp fp_reg1, fp_reg2
1552 // 4) More FP, odd number combined -> stp d0, fp_reg
1553 if (fp_count == 1) {
1554 fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1555 DCHECK_EQ(fp_reg_mask, 0U);
1556 if (core_count % 2 == 0) {
1557 m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1558 RegStorage::FloatSolo64(reg1).GetReg(),
1559 RegStorage::FloatSolo64(reg1).GetReg(),
1560 base.GetReg(), -all_offset);
1561 } else {
1562 m2l->NewLIR4(WIDE(kA64StpPre4ffXD),
1563 RegStorage::FloatSolo64(reg1).GetReg(),
1564 RegStorage::FloatSolo64(reg1).GetReg(),
1565 base.GetReg(), -all_offset);
1566 cur_offset = 0; // That core reg needs to go into the upper half.
1567 }
1568 } else {
1569 if (combined % 2 == 0) {
1570 fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1571 m2l->NewLIR4(WIDE(kA64StpPre4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1572 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), -all_offset);
1573 } else {
1574 fp_reg_mask = ExtractReg(fp_reg_mask, &reg1);
1575 m2l->NewLIR4(WIDE(kA64StpPre4ffXD), rs_d0.GetReg(), RegStorage::FloatSolo64(reg1).GetReg(),
1576 base.GetReg(), -all_offset);
1577 }
1578 }
1579 } else {
1580 // No FP spills.
1581 //
1582 // Two cases:
1583 // 1) Even number of core -> stp core1, core2
1584 // 2) Odd number of core -> stp xzr, core1
1585 if (core_count % 2 == 1) {
1586 core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1587 m2l->NewLIR4(WIDE(kA64StpPre4rrXD), rs_xzr.GetReg(),
1588 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1589 } else {
1590 core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1591 m2l->NewLIR4(WIDE(kA64StpPre4rrXD), RegStorage::Solo64(reg2).GetReg(),
1592 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), -all_offset);
1593 }
1594 }
1595
1596 if (fp_count != 0) {
1597 for (; fp_reg_mask != 0;) {
1598 // Have some FP regs to do.
1599 fp_reg_mask = GenPairWise(fp_reg_mask, &reg1, &reg2);
1600 if (UNLIKELY(reg2 < 0)) {
Matteo Franchin4163c532014-07-15 15:20:27 +01001601 m2l->NewLIR3(WIDE(kA64Str3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
Andreas Gampef29ecd62014-07-29 00:35:00 -07001602 cur_offset);
1603 // Do not increment offset here, as the second half will be filled by a core reg.
1604 } else {
1605 m2l->NewLIR4(WIDE(kA64Stp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1606 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), cur_offset);
1607 cur_offset += 2;
1608 }
1609 }
1610
1611 // Reset counting.
1612 reg1 = -1;
1613
1614 // If there is an odd number of core registers, we need to store the bottom now.
1615 if (core_count % 2 == 1) {
1616 core_reg_mask = ExtractReg(core_reg_mask, &reg1);
1617 m2l->NewLIR3(WIDE(kA64Str3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(),
1618 cur_offset + 1);
1619 cur_offset += 2; // Half-slot filled now.
1620 }
1621 }
1622
1623 // Spill the rest of the core regs. They are guaranteed to be even.
1624 DCHECK_EQ(POPCOUNT(core_reg_mask) % 2, 0);
1625 for (; core_reg_mask != 0; cur_offset += 2) {
1626 core_reg_mask = GenPairWise(core_reg_mask, &reg1, &reg2);
1627 m2l->NewLIR4(WIDE(kA64Stp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1628 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), cur_offset);
1629 }
1630
1631 DCHECK_EQ(cur_offset, all_offset);
1632
1633 return all_offset * 8;
1634}
1635
1636int Arm64Mir2Lir::SpillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1637 int frame_size) {
1638 // If the frame size is small enough that all offsets would fit into the immediates, use that
1639 // setup, as it decrements sp early (kind of instruction scheduling), and is not worse
1640 // instruction-count wise than the complicated code below.
1641 //
1642 // This case is also optimal when we have an odd number of core spills, and an even (non-zero)
1643 // number of fp spills.
1644 if ((RoundUp(frame_size, 8) / 8 <= 63)) {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -07001645 return SpillRegsPreSub(this, core_reg_mask, fp_reg_mask, frame_size);
Andreas Gampef29ecd62014-07-29 00:35:00 -07001646 } else {
Ian Rogers6a3c1fc2014-10-31 00:33:20 -07001647 return SpillRegsPreIndexed(this, base, core_reg_mask, fp_reg_mask);
Andreas Gampef29ecd62014-07-29 00:35:00 -07001648 }
1649}
1650
1651static void UnSpillCoreRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1652 int reg1 = -1, reg2 = -1;
1653 const int reg_log2_size = 3;
1654
1655 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1656 reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1657 if (UNLIKELY(reg2 < 0)) {
1658 m2l->NewLIR3(WIDE(kA64Ldr3rXD), RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1659 } else {
1660 DCHECK_LE(offset, 63);
1661 m2l->NewLIR4(WIDE(kA64Ldp4rrXD), RegStorage::Solo64(reg2).GetReg(),
1662 RegStorage::Solo64(reg1).GetReg(), base.GetReg(), offset);
1663 }
1664 }
1665}
1666
1667static void UnSpillFPRegs(Arm64Mir2Lir* m2l, RegStorage base, int offset, uint32_t reg_mask) {
1668 int reg1 = -1, reg2 = -1;
1669 const int reg_log2_size = 3;
1670
1671 for (offset = (offset >> reg_log2_size); reg_mask; offset += 2) {
1672 reg_mask = GenPairWise(reg_mask, & reg1, & reg2);
1673 if (UNLIKELY(reg2 < 0)) {
Matteo Franchin4163c532014-07-15 15:20:27 +01001674 m2l->NewLIR3(WIDE(kA64Ldr3fXD), RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(),
Andreas Gampef29ecd62014-07-29 00:35:00 -07001675 offset);
1676 } else {
1677 m2l->NewLIR4(WIDE(kA64Ldp4ffXD), RegStorage::FloatSolo64(reg2).GetReg(),
1678 RegStorage::FloatSolo64(reg1).GetReg(), base.GetReg(), offset);
1679 }
1680 }
1681}
1682
1683void Arm64Mir2Lir::UnspillRegs(RegStorage base, uint32_t core_reg_mask, uint32_t fp_reg_mask,
1684 int frame_size) {
Ian Rogersb28c1c02014-11-08 11:21:21 -08001685 DCHECK_EQ(base, rs_sp);
Andreas Gampef29ecd62014-07-29 00:35:00 -07001686 // Restore saves and drop stack frame.
1687 // 2 versions:
1688 //
1689 // 1. (Original): Try to address directly, then drop the whole frame.
1690 // Limitation: ldp is a 7b signed immediate.
1691 //
1692 // 2. (New): Drop the non-save-part. Then do similar to original, which is now guaranteed to be
1693 // in range. Then drop the rest.
1694 //
1695 // TODO: In methods with few spills but huge frame, it would be better to do non-immediate loads
1696 // in variant 1.
1697
1698 // "Magic" constant, 63 (max signed 7b) * 8.
1699 static constexpr int kMaxFramesizeForOffset = 63 * kArm64PointerSize;
1700
1701 const int num_core_spills = POPCOUNT(core_reg_mask);
1702 const int num_fp_spills = POPCOUNT(fp_reg_mask);
1703
1704 int early_drop = 0;
1705
1706 if (frame_size > kMaxFramesizeForOffset) {
1707 // Second variant. Drop the frame part.
1708
1709 // TODO: Always use the first formula, as num_fp_spills would be zero?
1710 if (fp_reg_mask != 0) {
1711 early_drop = frame_size - kArm64PointerSize * (num_fp_spills + num_core_spills);
1712 } else {
1713 early_drop = frame_size - kArm64PointerSize * num_core_spills;
1714 }
1715
1716 // Drop needs to be 16B aligned, so that SP keeps aligned.
1717 early_drop = RoundDown(early_drop, 16);
1718
1719 OpRegImm64(kOpAdd, rs_sp, early_drop);
1720 }
1721
1722 // Unspill.
1723 if (fp_reg_mask != 0) {
1724 int offset = frame_size - early_drop - kArm64PointerSize * (num_fp_spills + num_core_spills);
1725 UnSpillFPRegs(this, rs_sp, offset, fp_reg_mask);
1726 }
1727 if (core_reg_mask != 0) {
1728 int offset = frame_size - early_drop - kArm64PointerSize * num_core_spills;
1729 UnSpillCoreRegs(this, rs_sp, offset, core_reg_mask);
1730 }
1731
1732 // Drop the (rest of) the frame.
1733 OpRegImm64(kOpAdd, rs_sp, frame_size - early_drop);
1734}
1735
Serban Constantinescu23abec92014-07-02 16:13:38 +01001736bool Arm64Mir2Lir::GenInlinedReverseBits(CallInfo* info, OpSize size) {
Matteo Franchin4163c532014-07-15 15:20:27 +01001737 A64Opcode wide = IsWide(size) ? WIDE(0) : UNWIDE(0);
Serban Constantinescu23abec92014-07-02 16:13:38 +01001738 RegLocation rl_src_i = info->args[0];
Fred Shih37f05ef2014-07-16 18:38:08 -07001739 RegLocation rl_dest = IsWide(size) ? InlineTargetWide(info) : InlineTarget(info); // result reg
Serban Constantinescu23abec92014-07-02 16:13:38 +01001740 RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
Ningsheng Jiana262f772014-11-25 16:48:07 +08001741 RegLocation rl_i = IsWide(size) ?
1742 LoadValueWide(rl_src_i, kCoreReg) : LoadValue(rl_src_i, kCoreReg);
Serban Constantinescu23abec92014-07-02 16:13:38 +01001743 NewLIR2(kA64Rbit2rr | wide, rl_result.reg.GetReg(), rl_i.reg.GetReg());
Fred Shih37f05ef2014-07-16 18:38:08 -07001744 IsWide(size) ? StoreValueWide(rl_dest, rl_result) : StoreValue(rl_dest, rl_result);
Serban Constantinescu23abec92014-07-02 16:13:38 +01001745 return true;
1746}
1747
Matteo Franchin43ec8732014-03-31 15:00:14 +01001748} // namespace art