blob: 7ce576c307c19e6ca703bad9fd528c2261837b73 [file] [log] [blame]
Anton Kirilov5ec62182016-10-13 20:16:02 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm_vixl.h"
18
19#include "arch/arm/instruction_set_features_arm.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080020#include "art_method.h"
Anton Kirilov5ec62182016-10-13 20:16:02 +010021#include "code_generator_arm_vixl.h"
22#include "common_arm.h"
Andreas Gampe09659c22017-09-18 18:23:32 -070023#include "heap_poisoning.h"
Anton Kirilov5ec62182016-10-13 20:16:02 +010024#include "lock_word.h"
25#include "mirror/array-inl.h"
Andreas Gampec15a2f42017-04-21 12:09:39 -070026#include "mirror/object_array-inl.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080027#include "mirror/reference.h"
28#include "mirror/string.h"
29#include "scoped_thread_state_change-inl.h"
Andreas Gampeb486a982017-06-01 13:45:54 -070030#include "thread-current-inl.h"
Anton Kirilov5ec62182016-10-13 20:16:02 +010031
32#include "aarch32/constants-aarch32.h"
33
34namespace art {
35namespace arm {
36
37#define __ assembler->GetVIXLAssembler()->
38
39using helpers::DRegisterFrom;
40using helpers::HighRegisterFrom;
41using helpers::InputDRegisterAt;
42using helpers::InputRegisterAt;
43using helpers::InputSRegisterAt;
44using helpers::InputVRegisterAt;
45using helpers::Int32ConstantFrom;
46using helpers::LocationFrom;
47using helpers::LowRegisterFrom;
48using helpers::LowSRegisterFrom;
xueliang.zhong53463ba2017-02-16 15:18:03 +000049using helpers::HighSRegisterFrom;
Anton Kirilov5ec62182016-10-13 20:16:02 +010050using helpers::OutputDRegister;
xueliang.zhongc032e742016-03-28 16:44:32 +010051using helpers::OutputSRegister;
Anton Kirilov5ec62182016-10-13 20:16:02 +010052using helpers::OutputRegister;
53using helpers::OutputVRegister;
54using helpers::RegisterFrom;
55using helpers::SRegisterFrom;
xueliang.zhongc032e742016-03-28 16:44:32 +010056using helpers::DRegisterFromS;
Anton Kirilov5ec62182016-10-13 20:16:02 +010057
58using namespace vixl::aarch32; // NOLINT(build/namespaces)
59
Artem Serov0fb37192016-12-06 18:13:40 +000060using vixl::ExactAssemblyScope;
61using vixl::CodeBufferCheckScope;
62
Anton Kirilov5ec62182016-10-13 20:16:02 +010063ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
64 return codegen_->GetAssembler();
65}
66
67ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
68 return codegen_->GetGraph()->GetArena();
69}
70
71// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
72// intrinsified call. This will copy the arguments into the positions for a regular call.
73//
74// Note: The actual parameters are required to be in the locations given by the invoke's location
75// summary. If an intrinsic modifies those locations before a slowpath call, they must be
76// restored!
77//
78// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
79// sub-optimal (compared to a direct pointer call), but this is a slow-path.
80
81class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
82 public:
83 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
84 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
85
86 Location MoveArguments(CodeGenerator* codegen) {
Artem Serovd4cc5b22016-11-04 11:19:09 +000087 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
Anton Kirilov5ec62182016-10-13 20:16:02 +010088 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
89 return calling_convention_visitor.GetMethodLocation();
90 }
91
92 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
93 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
94 __ Bind(GetEntryLabel());
95
96 SaveLiveRegisters(codegen, invoke_->GetLocations());
97
98 Location method_loc = MoveArguments(codegen);
99
100 if (invoke_->IsInvokeStaticOrDirect()) {
Vladimir Markoe7197bf2017-06-02 17:00:23 +0100101 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100102 } else {
Vladimir Markoe7197bf2017-06-02 17:00:23 +0100103 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100104 }
Anton Kirilov5ec62182016-10-13 20:16:02 +0100105
106 // Copy the result back to the expected output.
107 Location out = invoke_->GetLocations()->Out();
108 if (out.IsValid()) {
109 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
110 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
111 codegen->MoveFromReturnRegister(out, invoke_->GetType());
112 }
113
114 RestoreLiveRegisters(codegen, invoke_->GetLocations());
115 __ B(GetExitLabel());
116 }
117
118 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
119
120 private:
121 // The instruction where this slow path is happening.
122 HInvoke* const invoke_;
123
124 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
125};
126
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000127// Compute base address for the System.arraycopy intrinsic in `base`.
128static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100129 DataType::Type type,
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000130 const vixl32::Register& array,
131 const Location& pos,
132 const vixl32::Register& base) {
133 // This routine is only used by the SystemArrayCopy intrinsic at the
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100134 // moment. We can allow DataType::Type::kReference as `type` to implement
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000135 // the SystemArrayCopyChar intrinsic.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100136 DCHECK_EQ(type, DataType::Type::kReference);
137 const int32_t element_size = DataType::Size(type);
138 const uint32_t element_size_shift = DataType::SizeShift(type);
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000139 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
140
141 if (pos.IsConstant()) {
142 int32_t constant = Int32ConstantFrom(pos);
143 __ Add(base, array, element_size * constant + data_offset);
144 } else {
145 __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
146 __ Add(base, base, data_offset);
147 }
148}
149
150// Compute end address for the System.arraycopy intrinsic in `end`.
151static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100152 DataType::Type type,
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000153 const Location& copy_length,
154 const vixl32::Register& base,
155 const vixl32::Register& end) {
156 // This routine is only used by the SystemArrayCopy intrinsic at the
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100157 // moment. We can allow DataType::Type::kReference as `type` to implement
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000158 // the SystemArrayCopyChar intrinsic.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100159 DCHECK_EQ(type, DataType::Type::kReference);
160 const int32_t element_size = DataType::Size(type);
161 const uint32_t element_size_shift = DataType::SizeShift(type);
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000162
163 if (copy_length.IsConstant()) {
164 int32_t constant = Int32ConstantFrom(copy_length);
165 __ Add(end, base, element_size * constant);
166 } else {
167 __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
168 }
169}
170
Anton Kirilov5ec62182016-10-13 20:16:02 +0100171// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
172class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
173 public:
174 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
175 : SlowPathCodeARMVIXL(instruction) {
176 DCHECK(kEmitCompilerReadBarrier);
177 DCHECK(kUseBakerReadBarrier);
178 }
179
180 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
181 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
182 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
183 LocationSummary* locations = instruction_->GetLocations();
184 DCHECK(locations->CanCall());
185 DCHECK(instruction_->IsInvokeStaticOrDirect())
186 << "Unexpected instruction in read barrier arraycopy slow path: "
187 << instruction_->DebugName();
188 DCHECK(instruction_->GetLocations()->Intrinsified());
189 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
190
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100191 DataType::Type type = DataType::Type::kReference;
192 const int32_t element_size = DataType::Size(type);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100193
194 vixl32::Register dest = InputRegisterAt(instruction_, 2);
195 Location dest_pos = locations->InAt(3);
196 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
197 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
198 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
199 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
200
201 __ Bind(GetEntryLabel());
202 // Compute the base destination address in `dst_curr_addr`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000203 GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100204
205 vixl32::Label loop;
206 __ Bind(&loop);
207 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
208 assembler->MaybeUnpoisonHeapReference(tmp);
209 // TODO: Inline the mark bit check before calling the runtime?
210 // tmp = ReadBarrier::Mark(tmp);
211 // No need to save live registers; it's taken care of by the
212 // entrypoint. Also, there is no need to update the stack mask,
213 // as this runtime call will not trigger a garbage collection.
214 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
215 // explanations.)
216 DCHECK(!tmp.IsSP());
217 DCHECK(!tmp.IsLR());
218 DCHECK(!tmp.IsPC());
219 // IP is used internally by the ReadBarrierMarkRegX entry point
220 // as a temporary (and not preserved). It thus cannot be used by
221 // any live register in this slow path.
222 DCHECK(!src_curr_addr.Is(ip));
223 DCHECK(!dst_curr_addr.Is(ip));
224 DCHECK(!src_stop_addr.Is(ip));
225 DCHECK(!tmp.Is(ip));
226 DCHECK(tmp.IsRegister()) << tmp;
Roland Levillain9cc0ea82017-03-16 11:25:59 +0000227 // TODO: Load the entrypoint once before the loop, instead of
228 // loading it at every iteration.
Anton Kirilov5ec62182016-10-13 20:16:02 +0100229 int32_t entry_point_offset =
Roland Levillain97c46462017-05-11 14:04:03 +0100230 Thread::ReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
Anton Kirilov5ec62182016-10-13 20:16:02 +0100231 // This runtime call does not require a stack map.
232 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
233 assembler->MaybePoisonHeapReference(tmp);
234 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
235 __ Cmp(src_curr_addr, src_stop_addr);
Artem Serov517d9f62016-12-12 15:51:15 +0000236 __ B(ne, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100237 __ B(GetExitLabel());
238 }
239
240 const char* GetDescription() const OVERRIDE {
241 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
242 }
243
244 private:
245 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
246};
247
248IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
249 : arena_(codegen->GetGraph()->GetArena()),
Nicolas Geoffray331605a2017-03-01 11:01:41 +0000250 codegen_(codegen),
Anton Kirilov5ec62182016-10-13 20:16:02 +0100251 assembler_(codegen->GetAssembler()),
252 features_(codegen->GetInstructionSetFeatures()) {}
253
254bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
255 Dispatch(invoke);
256 LocationSummary* res = invoke->GetLocations();
257 if (res == nullptr) {
258 return false;
259 }
260 return res->Intrinsified();
261}
262
263static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
264 LocationSummary* locations = new (arena) LocationSummary(invoke,
265 LocationSummary::kNoCall,
266 kIntrinsified);
267 locations->SetInAt(0, Location::RequiresFpuRegister());
268 locations->SetOut(Location::RequiresRegister());
269}
270
271static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
272 LocationSummary* locations = new (arena) LocationSummary(invoke,
273 LocationSummary::kNoCall,
274 kIntrinsified);
275 locations->SetInAt(0, Location::RequiresRegister());
276 locations->SetOut(Location::RequiresFpuRegister());
277}
278
279static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
280 Location input = locations->InAt(0);
281 Location output = locations->Out();
282 if (is64bit) {
283 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
284 } else {
285 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
286 }
287}
288
289static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
290 Location input = locations->InAt(0);
291 Location output = locations->Out();
292 if (is64bit) {
293 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
294 } else {
295 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
296 }
297}
298
299void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
300 CreateFPToIntLocations(arena_, invoke);
301}
302void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
303 CreateIntToFPLocations(arena_, invoke);
304}
305
306void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
307 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
308}
309void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
310 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
311}
312
313void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
314 CreateFPToIntLocations(arena_, invoke);
315}
316void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
317 CreateIntToFPLocations(arena_, invoke);
318}
319
320void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
321 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
322}
323void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
324 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
325}
326
327static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
328 LocationSummary* locations = new (arena) LocationSummary(invoke,
329 LocationSummary::kNoCall,
330 kIntrinsified);
331 locations->SetInAt(0, Location::RequiresRegister());
332 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
333}
334
Vladimir Marko1819e412017-08-29 17:02:56 +0100335static void CreateLongToLongLocationsWithOverlap(ArenaAllocator* arena, HInvoke* invoke) {
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +0100336 LocationSummary* locations = new (arena) LocationSummary(invoke,
337 LocationSummary::kNoCall,
338 kIntrinsified);
339 locations->SetInAt(0, Location::RequiresRegister());
340 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
341}
342
Anton Kirilov5ec62182016-10-13 20:16:02 +0100343static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
344 LocationSummary* locations = new (arena) LocationSummary(invoke,
345 LocationSummary::kNoCall,
346 kIntrinsified);
347 locations->SetInAt(0, Location::RequiresFpuRegister());
348 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
349}
350
Anton Kirilov6f644202017-02-27 18:29:45 +0000351static void GenNumberOfLeadingZeros(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100352 DataType::Type type,
Anton Kirilov6f644202017-02-27 18:29:45 +0000353 CodeGeneratorARMVIXL* codegen) {
354 ArmVIXLAssembler* assembler = codegen->GetAssembler();
355 LocationSummary* locations = invoke->GetLocations();
Anton Kirilov5ec62182016-10-13 20:16:02 +0100356 Location in = locations->InAt(0);
357 vixl32::Register out = RegisterFrom(locations->Out());
358
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100359 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100360
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100361 if (type == DataType::Type::kInt64) {
Anton Kirilov5ec62182016-10-13 20:16:02 +0100362 vixl32::Register in_reg_lo = LowRegisterFrom(in);
363 vixl32::Register in_reg_hi = HighRegisterFrom(in);
364 vixl32::Label end;
Anton Kirilov6f644202017-02-27 18:29:45 +0000365 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100366 __ Clz(out, in_reg_hi);
Anton Kirilov6f644202017-02-27 18:29:45 +0000367 __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100368 __ Clz(out, in_reg_lo);
369 __ Add(out, out, 32);
Anton Kirilov6f644202017-02-27 18:29:45 +0000370 if (end.IsReferenced()) {
371 __ Bind(&end);
372 }
Anton Kirilov5ec62182016-10-13 20:16:02 +0100373 } else {
374 __ Clz(out, RegisterFrom(in));
375 }
376}
377
378void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
379 CreateIntToIntLocations(arena_, invoke);
380}
381
382void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100383 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt32, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100384}
385
386void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Marko1819e412017-08-29 17:02:56 +0100387 CreateLongToLongLocationsWithOverlap(arena_, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100388}
389
390void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100391 GenNumberOfLeadingZeros(invoke, DataType::Type::kInt64, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100392}
393
Anton Kirilov6f644202017-02-27 18:29:45 +0000394static void GenNumberOfTrailingZeros(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100395 DataType::Type type,
Anton Kirilov6f644202017-02-27 18:29:45 +0000396 CodeGeneratorARMVIXL* codegen) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100397 DCHECK((type == DataType::Type::kInt32) || (type == DataType::Type::kInt64));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100398
Anton Kirilov6f644202017-02-27 18:29:45 +0000399 ArmVIXLAssembler* assembler = codegen->GetAssembler();
400 LocationSummary* locations = invoke->GetLocations();
Anton Kirilov5ec62182016-10-13 20:16:02 +0100401 vixl32::Register out = RegisterFrom(locations->Out());
402
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100403 if (type == DataType::Type::kInt64) {
Anton Kirilov5ec62182016-10-13 20:16:02 +0100404 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
405 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
406 vixl32::Label end;
Anton Kirilov6f644202017-02-27 18:29:45 +0000407 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100408 __ Rbit(out, in_reg_lo);
409 __ Clz(out, out);
Anton Kirilov6f644202017-02-27 18:29:45 +0000410 __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100411 __ Rbit(out, in_reg_hi);
412 __ Clz(out, out);
413 __ Add(out, out, 32);
Anton Kirilov6f644202017-02-27 18:29:45 +0000414 if (end.IsReferenced()) {
415 __ Bind(&end);
416 }
Anton Kirilov5ec62182016-10-13 20:16:02 +0100417 } else {
418 vixl32::Register in = RegisterFrom(locations->InAt(0));
419 __ Rbit(out, in);
420 __ Clz(out, out);
421 }
422}
423
424void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko1819e412017-08-29 17:02:56 +0100425 CreateIntToIntLocations(arena_, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100426}
427
428void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100429 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt32, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100430}
431
432void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko1819e412017-08-29 17:02:56 +0100433 CreateLongToLongLocationsWithOverlap(arena_, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100434}
435
436void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100437 GenNumberOfTrailingZeros(invoke, DataType::Type::kInt64, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100438}
439
440static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
441 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
442}
443
444void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
445 CreateFPToFPLocations(arena_, invoke);
446}
447
448void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
449 MathAbsFP(invoke, GetAssembler());
450}
451
452void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
453 CreateFPToFPLocations(arena_, invoke);
454}
455
456void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
457 MathAbsFP(invoke, GetAssembler());
458}
459
460static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
461 LocationSummary* locations = new (arena) LocationSummary(invoke,
462 LocationSummary::kNoCall,
463 kIntrinsified);
464 locations->SetInAt(0, Location::RequiresRegister());
465 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
466
467 locations->AddTemp(Location::RequiresRegister());
468}
469
470static void GenAbsInteger(LocationSummary* locations,
471 bool is64bit,
472 ArmVIXLAssembler* assembler) {
473 Location in = locations->InAt(0);
474 Location output = locations->Out();
475
476 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
477
478 if (is64bit) {
479 vixl32::Register in_reg_lo = LowRegisterFrom(in);
480 vixl32::Register in_reg_hi = HighRegisterFrom(in);
481 vixl32::Register out_reg_lo = LowRegisterFrom(output);
482 vixl32::Register out_reg_hi = HighRegisterFrom(output);
483
484 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
485
486 __ Asr(mask, in_reg_hi, 31);
487 __ Adds(out_reg_lo, in_reg_lo, mask);
488 __ Adc(out_reg_hi, in_reg_hi, mask);
489 __ Eor(out_reg_lo, mask, out_reg_lo);
490 __ Eor(out_reg_hi, mask, out_reg_hi);
491 } else {
492 vixl32::Register in_reg = RegisterFrom(in);
493 vixl32::Register out_reg = RegisterFrom(output);
494
495 __ Asr(mask, in_reg, 31);
496 __ Add(out_reg, in_reg, mask);
497 __ Eor(out_reg, mask, out_reg);
498 }
499}
500
501void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
502 CreateIntToIntPlusTemp(arena_, invoke);
503}
504
505void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
506 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
507}
508
509
510void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
511 CreateIntToIntPlusTemp(arena_, invoke);
512}
513
514void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
515 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
516}
517
Anton Kirilov6f644202017-02-27 18:29:45 +0000518static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
519 ArmVIXLAssembler* assembler = codegen->GetAssembler();
xueliang.zhongc032e742016-03-28 16:44:32 +0100520 Location op1_loc = invoke->GetLocations()->InAt(0);
521 Location op2_loc = invoke->GetLocations()->InAt(1);
522 Location out_loc = invoke->GetLocations()->Out();
523
524 // Optimization: don't generate any code if inputs are the same.
525 if (op1_loc.Equals(op2_loc)) {
526 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
527 return;
528 }
529
530 vixl32::SRegister op1 = SRegisterFrom(op1_loc);
531 vixl32::SRegister op2 = SRegisterFrom(op2_loc);
532 vixl32::SRegister out = OutputSRegister(invoke);
533 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
534 const vixl32::Register temp1 = temps.Acquire();
535 vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
536 vixl32::Label nan, done;
Anton Kirilov6f644202017-02-27 18:29:45 +0000537 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
xueliang.zhongc032e742016-03-28 16:44:32 +0100538
539 DCHECK(op1.Is(out));
540
541 __ Vcmp(op1, op2);
542 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
543 __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
544
545 // op1 <> op2
546 vixl32::ConditionType cond = is_min ? gt : lt;
547 {
548 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
549 2 * kMaxInstructionSizeInBytes,
550 CodeBufferCheckScope::kMaximumSize);
551 __ it(cond);
552 __ vmov(cond, F32, out, op2);
553 }
Anton Kirilov6f644202017-02-27 18:29:45 +0000554 // for <>(not equal), we've done min/max calculation.
555 __ B(ne, final_label, /* far_target */ false);
xueliang.zhongc032e742016-03-28 16:44:32 +0100556
557 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
558 __ Vmov(temp1, op1);
559 __ Vmov(temp2, op2);
560 if (is_min) {
561 __ Orr(temp1, temp1, temp2);
562 } else {
563 __ And(temp1, temp1, temp2);
564 }
565 __ Vmov(out, temp1);
Anton Kirilov6f644202017-02-27 18:29:45 +0000566 __ B(final_label);
xueliang.zhongc032e742016-03-28 16:44:32 +0100567
568 // handle NaN input.
569 __ Bind(&nan);
570 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
571 __ Vmov(out, temp1);
572
Anton Kirilov6f644202017-02-27 18:29:45 +0000573 if (done.IsReferenced()) {
574 __ Bind(&done);
575 }
xueliang.zhongc032e742016-03-28 16:44:32 +0100576}
577
578static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
579 LocationSummary* locations = new (arena) LocationSummary(invoke,
580 LocationSummary::kNoCall,
581 kIntrinsified);
582 locations->SetInAt(0, Location::RequiresFpuRegister());
583 locations->SetInAt(1, Location::RequiresFpuRegister());
584 locations->SetOut(Location::SameAsFirstInput());
585}
586
587void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
588 CreateFPFPToFPLocations(arena_, invoke);
589 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
590}
591
592void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
Anton Kirilov6f644202017-02-27 18:29:45 +0000593 GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
xueliang.zhongc032e742016-03-28 16:44:32 +0100594}
595
596void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
597 CreateFPFPToFPLocations(arena_, invoke);
598 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
599}
600
601void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
Anton Kirilov6f644202017-02-27 18:29:45 +0000602 GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
xueliang.zhongc032e742016-03-28 16:44:32 +0100603}
604
Anton Kirilov6f644202017-02-27 18:29:45 +0000605static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
606 ArmVIXLAssembler* assembler = codegen->GetAssembler();
xueliang.zhongc032e742016-03-28 16:44:32 +0100607 Location op1_loc = invoke->GetLocations()->InAt(0);
608 Location op2_loc = invoke->GetLocations()->InAt(1);
609 Location out_loc = invoke->GetLocations()->Out();
610
611 // Optimization: don't generate any code if inputs are the same.
612 if (op1_loc.Equals(op2_loc)) {
613 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
614 return;
615 }
616
617 vixl32::DRegister op1 = DRegisterFrom(op1_loc);
618 vixl32::DRegister op2 = DRegisterFrom(op2_loc);
619 vixl32::DRegister out = OutputDRegister(invoke);
620 vixl32::Label handle_nan_eq, done;
Anton Kirilov6f644202017-02-27 18:29:45 +0000621 vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
xueliang.zhongc032e742016-03-28 16:44:32 +0100622
623 DCHECK(op1.Is(out));
624
625 __ Vcmp(op1, op2);
626 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
627 __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
628
629 // op1 <> op2
630 vixl32::ConditionType cond = is_min ? gt : lt;
631 {
632 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
633 2 * kMaxInstructionSizeInBytes,
634 CodeBufferCheckScope::kMaximumSize);
635 __ it(cond);
636 __ vmov(cond, F64, out, op2);
637 }
Anton Kirilov6f644202017-02-27 18:29:45 +0000638 // for <>(not equal), we've done min/max calculation.
639 __ B(ne, final_label, /* far_target */ false);
xueliang.zhongc032e742016-03-28 16:44:32 +0100640
641 // handle op1 == op2, max(+0.0,-0.0).
642 if (!is_min) {
643 __ Vand(F64, out, op1, op2);
Anton Kirilov6f644202017-02-27 18:29:45 +0000644 __ B(final_label);
xueliang.zhongc032e742016-03-28 16:44:32 +0100645 }
646
647 // handle op1 == op2, min(+0.0,-0.0), NaN input.
648 __ Bind(&handle_nan_eq);
649 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
650
Anton Kirilov6f644202017-02-27 18:29:45 +0000651 if (done.IsReferenced()) {
652 __ Bind(&done);
653 }
xueliang.zhongc032e742016-03-28 16:44:32 +0100654}
655
656void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
657 CreateFPFPToFPLocations(arena_, invoke);
658}
659
660void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
Anton Kirilov6f644202017-02-27 18:29:45 +0000661 GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
xueliang.zhongc032e742016-03-28 16:44:32 +0100662}
663
664void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
665 CreateFPFPToFPLocations(arena_, invoke);
666}
667
668void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Anton Kirilov6f644202017-02-27 18:29:45 +0000669 GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
xueliang.zhongc032e742016-03-28 16:44:32 +0100670}
671
672static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
673 Location op1_loc = invoke->GetLocations()->InAt(0);
674 Location op2_loc = invoke->GetLocations()->InAt(1);
675 Location out_loc = invoke->GetLocations()->Out();
676
677 // Optimization: don't generate any code if inputs are the same.
678 if (op1_loc.Equals(op2_loc)) {
679 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
680 return;
681 }
682
683 vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
684 vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
685 vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
686 vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
687 vixl32::Register out_lo = LowRegisterFrom(out_loc);
688 vixl32::Register out_hi = HighRegisterFrom(out_loc);
689 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
690 const vixl32::Register temp = temps.Acquire();
691
692 DCHECK(op1_lo.Is(out_lo));
693 DCHECK(op1_hi.Is(out_hi));
694
695 // Compare op1 >= op2, or op1 < op2.
696 __ Cmp(out_lo, op2_lo);
697 __ Sbcs(temp, out_hi, op2_hi);
698
699 // Now GE/LT condition code is correct for the long comparison.
700 {
701 vixl32::ConditionType cond = is_min ? ge : lt;
702 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
703 3 * kMaxInstructionSizeInBytes,
704 CodeBufferCheckScope::kMaximumSize);
705 __ itt(cond);
706 __ mov(cond, out_lo, op2_lo);
707 __ mov(cond, out_hi, op2_hi);
708 }
709}
710
711static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
712 LocationSummary* locations = new (arena) LocationSummary(invoke,
713 LocationSummary::kNoCall,
714 kIntrinsified);
715 locations->SetInAt(0, Location::RequiresRegister());
716 locations->SetInAt(1, Location::RequiresRegister());
717 locations->SetOut(Location::SameAsFirstInput());
718}
719
720void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
721 CreateLongLongToLongLocations(arena_, invoke);
722}
723
724void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
725 GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
726}
727
728void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
729 CreateLongLongToLongLocations(arena_, invoke);
730}
731
732void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
733 GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
734}
735
Anton Kirilov5ec62182016-10-13 20:16:02 +0100736static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
737 vixl32::Register op1 = InputRegisterAt(invoke, 0);
738 vixl32::Register op2 = InputRegisterAt(invoke, 1);
739 vixl32::Register out = OutputRegister(invoke);
740
741 __ Cmp(op1, op2);
742
743 {
Artem Serov0fb37192016-12-06 18:13:40 +0000744 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
745 3 * kMaxInstructionSizeInBytes,
746 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100747
748 __ ite(is_min ? lt : gt);
749 __ mov(is_min ? lt : gt, out, op1);
750 __ mov(is_min ? ge : le, out, op2);
751 }
752}
753
754static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
755 LocationSummary* locations = new (arena) LocationSummary(invoke,
756 LocationSummary::kNoCall,
757 kIntrinsified);
758 locations->SetInAt(0, Location::RequiresRegister());
759 locations->SetInAt(1, Location::RequiresRegister());
760 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
761}
762
763void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
764 CreateIntIntToIntLocations(arena_, invoke);
765}
766
767void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
768 GenMinMax(invoke, /* is_min */ true, GetAssembler());
769}
770
771void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
772 CreateIntIntToIntLocations(arena_, invoke);
773}
774
775void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
776 GenMinMax(invoke, /* is_min */ false, GetAssembler());
777}
778
779void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
780 CreateFPToFPLocations(arena_, invoke);
781}
782
783void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
784 ArmVIXLAssembler* assembler = GetAssembler();
785 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
786}
787
xueliang.zhong6099d5e2016-04-20 18:44:56 +0100788void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
789 if (features_.HasARMv8AInstructions()) {
790 CreateFPToFPLocations(arena_, invoke);
791 }
792}
793
794void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
795 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
796 ArmVIXLAssembler* assembler = GetAssembler();
797 __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
798}
799
xueliang.zhong53463ba2017-02-16 15:18:03 +0000800void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
801 if (features_.HasARMv8AInstructions()) {
802 LocationSummary* locations = new (arena_) LocationSummary(invoke,
803 LocationSummary::kNoCall,
804 kIntrinsified);
805 locations->SetInAt(0, Location::RequiresFpuRegister());
806 locations->SetOut(Location::RequiresRegister());
807 locations->AddTemp(Location::RequiresFpuRegister());
808 }
809}
810
811void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
812 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
813
814 ArmVIXLAssembler* assembler = GetAssembler();
815 vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
816 vixl32::Register out_reg = OutputRegister(invoke);
817 vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
818 vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
819 vixl32::Label done;
820 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
821
822 // Round to nearest integer, ties away from zero.
823 __ Vcvta(S32, F32, temp1, in_reg);
824 __ Vmov(out_reg, temp1);
825
826 // For positive, zero or NaN inputs, rounding is done.
827 __ Cmp(out_reg, 0);
828 __ B(ge, final_label, /* far_target */ false);
829
830 // Handle input < 0 cases.
831 // If input is negative but not a tie, previous result (round to nearest) is valid.
832 // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
833 __ Vrinta(F32, F32, temp1, in_reg);
834 __ Vmov(temp2, 0.5);
835 __ Vsub(F32, temp1, in_reg, temp1);
836 __ Vcmp(F32, temp1, temp2);
837 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
838 {
839 // Use ExactAsemblyScope here because we are using IT.
840 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
841 2 * kMaxInstructionSizeInBytes,
842 CodeBufferCheckScope::kMaximumSize);
843 __ it(eq);
844 __ add(eq, out_reg, out_reg, 1);
845 }
846
847 if (done.IsReferenced()) {
848 __ Bind(&done);
849 }
850}
851
Anton Kirilov5ec62182016-10-13 20:16:02 +0100852void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
853 CreateIntToIntLocations(arena_, invoke);
854}
855
856void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
857 ArmVIXLAssembler* assembler = GetAssembler();
858 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000859 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100860}
861
862void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
863 CreateIntToIntLocations(arena_, invoke);
864}
865
866void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
867 ArmVIXLAssembler* assembler = GetAssembler();
868 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000869 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100870}
871
872void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
873 CreateIntToIntLocations(arena_, invoke);
874}
875
876void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
877 ArmVIXLAssembler* assembler = GetAssembler();
878 // Ignore upper 4B of long address.
879 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
880 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
881 // exception. So we can't use ldrd as addr may be unaligned.
882 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
883 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
884 if (addr.Is(lo)) {
885 __ Ldr(hi, MemOperand(addr, 4));
Scott Wakelingb77051e2016-11-21 19:46:00 +0000886 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100887 } else {
Scott Wakelingb77051e2016-11-21 19:46:00 +0000888 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100889 __ Ldr(hi, MemOperand(addr, 4));
890 }
891}
892
893void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
894 CreateIntToIntLocations(arena_, invoke);
895}
896
897void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
898 ArmVIXLAssembler* assembler = GetAssembler();
899 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000900 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100901}
902
903static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
904 LocationSummary* locations = new (arena) LocationSummary(invoke,
905 LocationSummary::kNoCall,
906 kIntrinsified);
907 locations->SetInAt(0, Location::RequiresRegister());
908 locations->SetInAt(1, Location::RequiresRegister());
909}
910
911void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
912 CreateIntIntToVoidLocations(arena_, invoke);
913}
914
915void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
916 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000917 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100918}
919
920void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
921 CreateIntIntToVoidLocations(arena_, invoke);
922}
923
924void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
925 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000926 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100927}
928
929void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
930 CreateIntIntToVoidLocations(arena_, invoke);
931}
932
933void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
934 ArmVIXLAssembler* assembler = GetAssembler();
935 // Ignore upper 4B of long address.
936 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
937 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
938 // exception. So we can't use ldrd as addr may be unaligned.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000939 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100940 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
941}
942
943void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
944 CreateIntIntToVoidLocations(arena_, invoke);
945}
946
947void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
948 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000949 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100950}
951
952void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
953 LocationSummary* locations = new (arena_) LocationSummary(invoke,
954 LocationSummary::kNoCall,
955 kIntrinsified);
956 locations->SetOut(Location::RequiresRegister());
957}
958
959void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
960 ArmVIXLAssembler* assembler = GetAssembler();
961 __ Ldr(OutputRegister(invoke),
962 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
963}
964
965static void GenUnsafeGet(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100966 DataType::Type type,
Anton Kirilov5ec62182016-10-13 20:16:02 +0100967 bool is_volatile,
968 CodeGeneratorARMVIXL* codegen) {
969 LocationSummary* locations = invoke->GetLocations();
970 ArmVIXLAssembler* assembler = codegen->GetAssembler();
971 Location base_loc = locations->InAt(1);
972 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
973 Location offset_loc = locations->InAt(2);
974 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
975 Location trg_loc = locations->Out();
976
977 switch (type) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100978 case DataType::Type::kInt32: {
Anton Kirilov5ec62182016-10-13 20:16:02 +0100979 vixl32::Register trg = RegisterFrom(trg_loc);
980 __ Ldr(trg, MemOperand(base, offset));
981 if (is_volatile) {
982 __ Dmb(vixl32::ISH);
983 }
984 break;
985 }
986
Vladimir Marko0ebe0d82017-09-21 22:50:39 +0100987 case DataType::Type::kReference: {
Anton Kirilov5ec62182016-10-13 20:16:02 +0100988 vixl32::Register trg = RegisterFrom(trg_loc);
989 if (kEmitCompilerReadBarrier) {
990 if (kUseBakerReadBarrier) {
991 Location temp = locations->GetTemp(0);
992 codegen->GenerateReferenceLoadWithBakerReadBarrier(
993 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
994 if (is_volatile) {
995 __ Dmb(vixl32::ISH);
996 }
997 } else {
998 __ Ldr(trg, MemOperand(base, offset));
999 if (is_volatile) {
1000 __ Dmb(vixl32::ISH);
1001 }
1002 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
1003 }
1004 } else {
1005 __ Ldr(trg, MemOperand(base, offset));
1006 if (is_volatile) {
1007 __ Dmb(vixl32::ISH);
1008 }
1009 assembler->MaybeUnpoisonHeapReference(trg);
1010 }
1011 break;
1012 }
1013
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001014 case DataType::Type::kInt64: {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001015 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
1016 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
1017 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
Artem Serov657022c2016-11-23 14:19:38 +00001018 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1019 const vixl32::Register temp_reg = temps.Acquire();
1020 __ Add(temp_reg, base, offset);
1021 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001022 } else {
1023 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
1024 }
1025 if (is_volatile) {
1026 __ Dmb(vixl32::ISH);
1027 }
1028 break;
1029 }
1030
1031 default:
1032 LOG(FATAL) << "Unexpected type " << type;
1033 UNREACHABLE();
1034 }
1035}
1036
1037static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
1038 HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001039 DataType::Type type) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001040 bool can_call = kEmitCompilerReadBarrier &&
1041 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
1042 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
1043 LocationSummary* locations = new (arena) LocationSummary(invoke,
1044 (can_call
1045 ? LocationSummary::kCallOnSlowPath
1046 : LocationSummary::kNoCall),
1047 kIntrinsified);
1048 if (can_call && kUseBakerReadBarrier) {
1049 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1050 }
1051 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1052 locations->SetInAt(1, Location::RequiresRegister());
1053 locations->SetInAt(2, Location::RequiresRegister());
1054 locations->SetOut(Location::RequiresRegister(),
1055 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001056 if (type == DataType::Type::kReference && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001057 // We need a temporary register for the read barrier marking slow
Roland Levillain9983e302017-07-14 14:34:22 +01001058 // path in CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001059 locations->AddTemp(Location::RequiresRegister());
1060 }
1061}
1062
1063void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001064 CreateIntIntIntToIntLocations(arena_, invoke, DataType::Type::kInt32);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001065}
1066void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001067 CreateIntIntIntToIntLocations(arena_, invoke, DataType::Type::kInt32);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001068}
1069void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001070 CreateIntIntIntToIntLocations(arena_, invoke, DataType::Type::kInt64);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001071}
1072void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001073 CreateIntIntIntToIntLocations(arena_, invoke, DataType::Type::kInt64);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001074}
1075void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001076 CreateIntIntIntToIntLocations(arena_, invoke, DataType::Type::kReference);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001077}
1078void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001079 CreateIntIntIntToIntLocations(arena_, invoke, DataType::Type::kReference);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001080}
1081
1082void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001083 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ false, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001084}
1085void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001086 GenUnsafeGet(invoke, DataType::Type::kInt32, /* is_volatile */ true, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001087}
1088void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001089 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ false, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001090}
1091void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001092 GenUnsafeGet(invoke, DataType::Type::kInt64, /* is_volatile */ true, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001093}
1094void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001095 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ false, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001096}
1097void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001098 GenUnsafeGet(invoke, DataType::Type::kReference, /* is_volatile */ true, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001099}
1100
1101static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
1102 const ArmInstructionSetFeatures& features,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001103 DataType::Type type,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001104 bool is_volatile,
1105 HInvoke* invoke) {
1106 LocationSummary* locations = new (arena) LocationSummary(invoke,
1107 LocationSummary::kNoCall,
1108 kIntrinsified);
1109 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1110 locations->SetInAt(1, Location::RequiresRegister());
1111 locations->SetInAt(2, Location::RequiresRegister());
1112 locations->SetInAt(3, Location::RequiresRegister());
1113
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001114 if (type == DataType::Type::kInt64) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001115 // Potentially need temps for ldrexd-strexd loop.
1116 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
1117 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
1118 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
1119 }
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001120 } else if (type == DataType::Type::kReference) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001121 // Temps for card-marking.
1122 locations->AddTemp(Location::RequiresRegister()); // Temp.
1123 locations->AddTemp(Location::RequiresRegister()); // Card.
1124 }
1125}
1126
1127void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001128 CreateIntIntIntIntToVoid(
1129 arena_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001130}
1131void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001132 CreateIntIntIntIntToVoid(
1133 arena_, features_, DataType::Type::kInt32, /* is_volatile */ false, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001134}
1135void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001136 CreateIntIntIntIntToVoid(
1137 arena_, features_, DataType::Type::kInt32, /* is_volatile */ true, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001138}
1139void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001140 CreateIntIntIntIntToVoid(
1141 arena_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001142}
1143void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001144 CreateIntIntIntIntToVoid(
1145 arena_, features_, DataType::Type::kReference, /* is_volatile */ false, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001146}
1147void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001148 CreateIntIntIntIntToVoid(
1149 arena_, features_, DataType::Type::kReference, /* is_volatile */ true, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001150}
1151void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1152 CreateIntIntIntIntToVoid(
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001153 arena_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001154}
1155void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1156 CreateIntIntIntIntToVoid(
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001157 arena_, features_, DataType::Type::kInt64, /* is_volatile */ false, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001158}
1159void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1160 CreateIntIntIntIntToVoid(
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001161 arena_, features_, DataType::Type::kInt64, /* is_volatile */ true, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001162}
1163
1164static void GenUnsafePut(LocationSummary* locations,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001165 DataType::Type type,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001166 bool is_volatile,
1167 bool is_ordered,
1168 CodeGeneratorARMVIXL* codegen) {
1169 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1170
1171 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
1172 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
1173 vixl32::Register value;
1174
1175 if (is_volatile || is_ordered) {
1176 __ Dmb(vixl32::ISH);
1177 }
1178
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001179 if (type == DataType::Type::kInt64) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001180 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
1181 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
1182 value = value_lo;
1183 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1184 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
1185 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
1186 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1187 const vixl32::Register temp_reg = temps.Acquire();
1188
1189 __ Add(temp_reg, base, offset);
1190 vixl32::Label loop_head;
1191 __ Bind(&loop_head);
Scott Wakelingb77051e2016-11-21 19:46:00 +00001192 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
1193 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001194 __ Cmp(temp_lo, 0);
Artem Serov517d9f62016-12-12 15:51:15 +00001195 __ B(ne, &loop_head, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001196 } else {
1197 __ Strd(value_lo, value_hi, MemOperand(base, offset));
1198 }
1199 } else {
1200 value = RegisterFrom(locations->InAt(3));
1201 vixl32::Register source = value;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001202 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001203 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1204 __ Mov(temp, value);
1205 assembler->PoisonHeapReference(temp);
1206 source = temp;
1207 }
1208 __ Str(source, MemOperand(base, offset));
1209 }
1210
1211 if (is_volatile) {
1212 __ Dmb(vixl32::ISH);
1213 }
1214
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001215 if (type == DataType::Type::kReference) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001216 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1217 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
1218 bool value_can_be_null = true; // TODO: Worth finding out this information?
1219 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
1220 }
1221}
1222
1223void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1224 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001225 DataType::Type::kInt32,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001226 /* is_volatile */ false,
1227 /* is_ordered */ false,
1228 codegen_);
1229}
1230void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1231 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001232 DataType::Type::kInt32,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001233 /* is_volatile */ false,
1234 /* is_ordered */ true,
1235 codegen_);
1236}
1237void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1238 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001239 DataType::Type::kInt32,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001240 /* is_volatile */ true,
1241 /* is_ordered */ false,
1242 codegen_);
1243}
1244void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1245 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001246 DataType::Type::kReference,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001247 /* is_volatile */ false,
1248 /* is_ordered */ false,
1249 codegen_);
1250}
1251void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1252 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001253 DataType::Type::kReference,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001254 /* is_volatile */ false,
1255 /* is_ordered */ true,
1256 codegen_);
1257}
1258void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1259 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001260 DataType::Type::kReference,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001261 /* is_volatile */ true,
1262 /* is_ordered */ false,
1263 codegen_);
1264}
1265void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1266 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001267 DataType::Type::kInt64,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001268 /* is_volatile */ false,
1269 /* is_ordered */ false,
1270 codegen_);
1271}
1272void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1273 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001274 DataType::Type::kInt64,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001275 /* is_volatile */ false,
1276 /* is_ordered */ true,
1277 codegen_);
1278}
1279void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1280 GenUnsafePut(invoke->GetLocations(),
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001281 DataType::Type::kInt64,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001282 /* is_volatile */ true,
1283 /* is_ordered */ false,
1284 codegen_);
1285}
1286
1287static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
1288 HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001289 DataType::Type type) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001290 bool can_call = kEmitCompilerReadBarrier &&
1291 kUseBakerReadBarrier &&
1292 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1293 LocationSummary* locations = new (arena) LocationSummary(invoke,
1294 (can_call
1295 ? LocationSummary::kCallOnSlowPath
1296 : LocationSummary::kNoCall),
1297 kIntrinsified);
1298 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1299 locations->SetInAt(1, Location::RequiresRegister());
1300 locations->SetInAt(2, Location::RequiresRegister());
1301 locations->SetInAt(3, Location::RequiresRegister());
1302 locations->SetInAt(4, Location::RequiresRegister());
1303
1304 // If heap poisoning is enabled, we don't want the unpoisoning
1305 // operations to potentially clobber the output. Likewise when
1306 // emitting a (Baker) read barrier, which may call.
1307 Location::OutputOverlap overlaps =
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001308 ((kPoisonHeapReferences && type == DataType::Type::kReference) || can_call)
Anton Kirilov5ec62182016-10-13 20:16:02 +01001309 ? Location::kOutputOverlap
1310 : Location::kNoOutputOverlap;
1311 locations->SetOut(Location::RequiresRegister(), overlaps);
1312
1313 // Temporary registers used in CAS. In the object case
1314 // (UnsafeCASObject intrinsic), these are also used for
1315 // card-marking, and possibly for (Baker) read barrier.
1316 locations->AddTemp(Location::RequiresRegister()); // Pointer.
1317 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
1318}
1319
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001320static void GenCas(HInvoke* invoke, DataType::Type type, CodeGeneratorARMVIXL* codegen) {
1321 DCHECK_NE(type, DataType::Type::kInt64);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001322
1323 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1324 LocationSummary* locations = invoke->GetLocations();
1325
1326 Location out_loc = locations->Out();
1327 vixl32::Register out = OutputRegister(invoke); // Boolean result.
1328
1329 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
1330 Location offset_loc = locations->InAt(2);
1331 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
1332 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
1333 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
1334
1335 Location tmp_ptr_loc = locations->GetTemp(0);
1336 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
1337 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
1338
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001339 if (type == DataType::Type::kReference) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001340 // The only read barrier implementation supporting the
1341 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1342 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1343
1344 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1345 // object and scan the receiver at the next GC for nothing.
1346 bool value_can_be_null = true; // TODO: Worth finding out this information?
1347 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1348
1349 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1350 // Need to make sure the reference stored in the field is a to-space
1351 // one before attempting the CAS or the CAS could fail incorrectly.
Roland Levillainff487002017-03-07 16:50:01 +00001352 codegen->UpdateReferenceFieldWithBakerReadBarrier(
Anton Kirilov5ec62182016-10-13 20:16:02 +01001353 invoke,
1354 out_loc, // Unused, used only as a "temporary" within the read barrier.
1355 base,
Roland Levillainff487002017-03-07 16:50:01 +00001356 /* field_offset */ offset_loc,
Anton Kirilov5ec62182016-10-13 20:16:02 +01001357 tmp_ptr_loc,
1358 /* needs_null_check */ false,
Roland Levillainff487002017-03-07 16:50:01 +00001359 tmp);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001360 }
1361 }
1362
1363 // Prevent reordering with prior memory operations.
1364 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1365 // latter allows a preceding load to be delayed past the STXR
1366 // instruction below.
1367 __ Dmb(vixl32::ISH);
1368
1369 __ Add(tmp_ptr, base, offset);
1370
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001371 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001372 codegen->GetAssembler()->PoisonHeapReference(expected);
1373 if (value.Is(expected)) {
1374 // Do not poison `value`, as it is the same register as
1375 // `expected`, which has just been poisoned.
1376 } else {
1377 codegen->GetAssembler()->PoisonHeapReference(value);
1378 }
1379 }
1380
1381 // do {
1382 // tmp = [r_ptr] - expected;
1383 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1384 // result = tmp != 0;
1385
1386 vixl32::Label loop_head;
1387 __ Bind(&loop_head);
1388
Scott Wakelingb77051e2016-11-21 19:46:00 +00001389 __ Ldrex(tmp, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001390
1391 __ Subs(tmp, tmp, expected);
1392
1393 {
Artem Serov0fb37192016-12-06 18:13:40 +00001394 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1395 3 * kMaxInstructionSizeInBytes,
1396 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001397
1398 __ itt(eq);
Scott Wakelingb77051e2016-11-21 19:46:00 +00001399 __ strex(eq, tmp, value, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001400 __ cmp(eq, tmp, 1);
1401 }
1402
Artem Serov517d9f62016-12-12 15:51:15 +00001403 __ B(eq, &loop_head, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001404
1405 __ Dmb(vixl32::ISH);
1406
1407 __ Rsbs(out, tmp, 1);
1408
1409 {
Artem Serov0fb37192016-12-06 18:13:40 +00001410 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1411 2 * kMaxInstructionSizeInBytes,
1412 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001413
1414 __ it(cc);
1415 __ mov(cc, out, 0);
1416 }
1417
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001418 if (kPoisonHeapReferences && type == DataType::Type::kReference) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001419 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1420 if (value.Is(expected)) {
1421 // Do not unpoison `value`, as it is the same register as
1422 // `expected`, which has just been unpoisoned.
1423 } else {
1424 codegen->GetAssembler()->UnpoisonHeapReference(value);
1425 }
1426 }
1427}
1428
1429void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001430 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, DataType::Type::kInt32);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001431}
1432void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1433 // The only read barrier implementation supporting the
1434 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1435 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1436 return;
1437 }
1438
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001439 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, DataType::Type::kReference);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001440}
1441void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001442 GenCas(invoke, DataType::Type::kInt32, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001443}
1444void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1445 // The only read barrier implementation supporting the
1446 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1447 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1448
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001449 GenCas(invoke, DataType::Type::kReference, codegen_);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001450}
1451
1452void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1453 // The inputs plus one temp.
1454 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1455 invoke->InputAt(1)->CanBeNull()
1456 ? LocationSummary::kCallOnSlowPath
1457 : LocationSummary::kNoCall,
1458 kIntrinsified);
1459 locations->SetInAt(0, Location::RequiresRegister());
1460 locations->SetInAt(1, Location::RequiresRegister());
1461 locations->AddTemp(Location::RequiresRegister());
1462 locations->AddTemp(Location::RequiresRegister());
1463 locations->AddTemp(Location::RequiresRegister());
1464 // Need temporary registers for String compression's feature.
1465 if (mirror::kUseStringCompression) {
1466 locations->AddTemp(Location::RequiresRegister());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001467 }
1468 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1469}
1470
1471void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1472 ArmVIXLAssembler* assembler = GetAssembler();
1473 LocationSummary* locations = invoke->GetLocations();
1474
1475 vixl32::Register str = InputRegisterAt(invoke, 0);
1476 vixl32::Register arg = InputRegisterAt(invoke, 1);
1477 vixl32::Register out = OutputRegister(invoke);
1478
1479 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1480 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1481 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001482 vixl32::Register temp3;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001483 if (mirror::kUseStringCompression) {
1484 temp3 = RegisterFrom(locations->GetTemp(3));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001485 }
1486
1487 vixl32::Label loop;
1488 vixl32::Label find_char_diff;
1489 vixl32::Label end;
1490 vixl32::Label different_compression;
1491
1492 // Get offsets of count and value fields within a string object.
1493 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1494 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1495
1496 // Note that the null check must have been done earlier.
1497 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1498
1499 // Take slow path and throw if input can be and is null.
1500 SlowPathCodeARMVIXL* slow_path = nullptr;
1501 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1502 if (can_slow_path) {
1503 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1504 codegen_->AddSlowPath(slow_path);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001505 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001506 }
1507
1508 // Reference equality check, return 0 if same reference.
1509 __ Subs(out, str, arg);
1510 __ B(eq, &end);
1511
Anton Kirilov5ec62182016-10-13 20:16:02 +01001512 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001513 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001514 __ Ldr(temp3, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001515 __ Ldr(temp2, MemOperand(arg, count_offset));
1516 // Extract lengths from the `count` fields.
1517 __ Lsr(temp0, temp3, 1u);
1518 __ Lsr(temp1, temp2, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001519 } else {
1520 // Load lengths of this and argument strings.
1521 __ Ldr(temp0, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001522 __ Ldr(temp1, MemOperand(arg, count_offset));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001523 }
1524 // out = length diff.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001525 __ Subs(out, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001526 // temp0 = min(len(str), len(arg)).
1527
1528 {
Artem Serov0fb37192016-12-06 18:13:40 +00001529 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1530 2 * kMaxInstructionSizeInBytes,
1531 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001532
1533 __ it(gt);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001534 __ mov(gt, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001535 }
1536
Anton Kirilov5ec62182016-10-13 20:16:02 +01001537 // Shorter string is empty?
xueliang.zhongf51bc622016-11-04 09:23:32 +00001538 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1539 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1540 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001541
1542 if (mirror::kUseStringCompression) {
1543 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001544 __ Eors(temp2, temp2, temp3);
1545 __ Lsrs(temp2, temp2, 1u);
1546 __ B(cs, &different_compression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001547 // For string compression, calculate the number of bytes to compare (not chars).
1548 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001549 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001550
Artem Serov0fb37192016-12-06 18:13:40 +00001551 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1552 2 * kMaxInstructionSizeInBytes,
1553 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001554
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001555 __ it(ne);
1556 __ add(ne, temp0, temp0, temp0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001557 }
1558
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001559 // Store offset of string value in preparation for comparison loop.
1560 __ Mov(temp1, value_offset);
1561
Anton Kirilov5ec62182016-10-13 20:16:02 +01001562 // Assertions that must hold in order to compare multiple characters at a time.
1563 CHECK_ALIGNED(value_offset, 8);
1564 static_assert(IsAligned<8>(kObjectAlignment),
1565 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1566
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001567 const unsigned char_size = DataType::Size(DataType::Type::kUint16);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001568 DCHECK_EQ(char_size, 2u);
1569
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001570 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1571
Anton Kirilov5ec62182016-10-13 20:16:02 +01001572 vixl32::Label find_char_diff_2nd_cmp;
1573 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1574 __ Bind(&loop);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001575 vixl32::Register temp_reg = temps.Acquire();
Anton Kirilov5ec62182016-10-13 20:16:02 +01001576 __ Ldr(temp_reg, MemOperand(str, temp1));
1577 __ Ldr(temp2, MemOperand(arg, temp1));
1578 __ Cmp(temp_reg, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001579 __ B(ne, &find_char_diff, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001580 __ Add(temp1, temp1, char_size * 2);
1581
1582 __ Ldr(temp_reg, MemOperand(str, temp1));
1583 __ Ldr(temp2, MemOperand(arg, temp1));
1584 __ Cmp(temp_reg, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001585 __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001586 __ Add(temp1, temp1, char_size * 2);
1587 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1588 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
Artem Serov517d9f62016-12-12 15:51:15 +00001589 __ B(hi, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001590 __ B(&end);
1591
1592 __ Bind(&find_char_diff_2nd_cmp);
1593 if (mirror::kUseStringCompression) {
1594 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
Artem Serov517d9f62016-12-12 15:51:15 +00001595 __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end?
Anton Kirilov5ec62182016-10-13 20:16:02 +01001596 } else {
1597 // Without string compression, we can start treating temp0 as signed
1598 // and rely on the signed comparison below.
1599 __ Sub(temp0, temp0, 2);
1600 }
1601
1602 // Find the single character difference.
1603 __ Bind(&find_char_diff);
1604 // Get the bit position of the first character that differs.
1605 __ Eor(temp1, temp2, temp_reg);
1606 __ Rbit(temp1, temp1);
1607 __ Clz(temp1, temp1);
1608
1609 // temp0 = number of characters remaining to compare.
1610 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1611 // in the comparison loop, and after the end of the shorter string data).
1612
1613 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1614 // two words compared, in the interval [0,1].
1615 // (0 for low half-word different, 1 for high half-word different).
1616 // With string compression, (temp1 << 3) = byte where the difference occurs,
1617 // in the interval [0,3].
1618
1619 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1620 // the remaining string data, so just return length diff (out).
1621 // The comparison is unsigned for string compression, otherwise signed.
1622 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
Artem Serov517d9f62016-12-12 15:51:15 +00001623 __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001624
Anton Kirilov5ec62182016-10-13 20:16:02 +01001625 // Extract the characters and calculate the difference.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001626 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001627 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1628 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1629 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001630 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1631 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001632 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1633 __ Lsr(temp2, temp2, temp1); // Extract second character.
1634 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1635 __ Lsr(out, temp_reg, temp1); // Extract first character.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001636 __ And(temp2, temp2, temp3);
1637 __ And(out, out, temp3);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001638 } else {
Anton Kirilovb88c4842016-11-14 14:37:00 +00001639 __ Bic(temp1, temp1, 0xf);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001640 __ Lsr(temp2, temp2, temp1);
1641 __ Lsr(out, temp_reg, temp1);
Anton Kirilovb88c4842016-11-14 14:37:00 +00001642 __ Movt(temp2, 0);
1643 __ Movt(out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001644 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001645
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001646 __ Sub(out, out, temp2);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001647 temps.Release(temp_reg);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001648
1649 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001650 __ B(&end);
1651 __ Bind(&different_compression);
1652
1653 // Comparison for different compression style.
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001654 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001655 DCHECK_EQ(c_char_size, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001656
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001657 // We want to free up the temp3, currently holding `str.count`, for comparison.
1658 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1659 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1660 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1661 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001662 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001663 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001664 __ Mov(temp1, str);
1665 __ Mov(temp2, arg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001666 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1667 {
Artem Serov0fb37192016-12-06 18:13:40 +00001668 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1669 3 * kMaxInstructionSizeInBytes,
1670 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001671 __ itt(cs); // Interleave with selection of temp1 and temp2.
1672 __ mov(cs, temp1, arg); // Preserves flags.
1673 __ mov(cs, temp2, str); // Preserves flags.
1674 }
Anton Kirilovb88c4842016-11-14 14:37:00 +00001675 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001676
1677 // Adjust temp1 and temp2 from string pointers to data pointers.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001678 __ Add(temp1, temp1, value_offset);
1679 __ Add(temp2, temp2, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001680
1681 vixl32::Label different_compression_loop;
1682 vixl32::Label different_compression_diff;
1683
1684 // Main loop for different compression.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001685 temp_reg = temps.Acquire();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001686 __ Bind(&different_compression_loop);
1687 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1688 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
Anton Kirilovb88c4842016-11-14 14:37:00 +00001689 __ Cmp(temp_reg, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00001690 __ B(ne, &different_compression_diff, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001691 __ Subs(temp0, temp0, 2);
Artem Serov517d9f62016-12-12 15:51:15 +00001692 __ B(hi, &different_compression_loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001693 __ B(&end);
1694
1695 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001696 __ Bind(&different_compression_diff);
1697 __ Sub(out, temp_reg, temp3);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001698 temps.Release(temp_reg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001699 // Flip the difference if the `arg` is compressed.
1700 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1701 __ Lsrs(temp0, temp0, 1u);
1702 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1703 "Expecting 0=compressed, 1=uncompressed");
1704
Artem Serov0fb37192016-12-06 18:13:40 +00001705 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1706 2 * kMaxInstructionSizeInBytes,
1707 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001708 __ it(cc);
1709 __ rsb(cc, out, out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001710 }
1711
1712 __ Bind(&end);
1713
1714 if (can_slow_path) {
1715 __ Bind(slow_path->GetExitLabel());
1716 }
1717}
1718
Vladimir Marko984519c2017-08-23 10:45:29 +01001719// The cut off for unrolling the loop in String.equals() intrinsic for const strings.
1720// The normal loop plus the pre-header is 9 instructions (18-26 bytes) without string compression
1721// and 12 instructions (24-32 bytes) with string compression. We can compare up to 4 bytes in 4
1722// instructions (LDR+LDR+CMP+BNE) and up to 8 bytes in 6 instructions (LDRD+LDRD+CMP+BNE+CMP+BNE).
1723// Allow up to 12 instructions (32 bytes) for the unrolled loop.
1724constexpr size_t kShortConstStringEqualsCutoffInBytes = 16;
1725
1726static const char* GetConstString(HInstruction* candidate, uint32_t* utf16_length) {
1727 if (candidate->IsLoadString()) {
1728 HLoadString* load_string = candidate->AsLoadString();
1729 const DexFile& dex_file = load_string->GetDexFile();
1730 return dex_file.StringDataAndUtf16LengthByIdx(load_string->GetStringIndex(), utf16_length);
1731 }
1732 return nullptr;
1733}
1734
Anton Kirilov5ec62182016-10-13 20:16:02 +01001735void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1736 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1737 LocationSummary::kNoCall,
1738 kIntrinsified);
1739 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1740 locations->SetInAt(0, Location::RequiresRegister());
1741 locations->SetInAt(1, Location::RequiresRegister());
Vladimir Marko984519c2017-08-23 10:45:29 +01001742
Anton Kirilov5ec62182016-10-13 20:16:02 +01001743 // Temporary registers to store lengths of strings and for calculations.
1744 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1745 locations->AddTemp(LocationFrom(r0));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001746
Vladimir Marko984519c2017-08-23 10:45:29 +01001747 // For the generic implementation and for long const strings we need an extra temporary.
1748 // We do not need it for short const strings, up to 4 bytes, see code generation below.
1749 uint32_t const_string_length = 0u;
1750 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1751 if (const_string == nullptr) {
1752 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1753 }
1754 bool is_compressed =
1755 mirror::kUseStringCompression &&
1756 const_string != nullptr &&
1757 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1758 if (const_string == nullptr || const_string_length > (is_compressed ? 4u : 2u)) {
1759 locations->AddTemp(Location::RequiresRegister());
1760 }
1761
1762 // TODO: If the String.equals() is used only for an immediately following HIf, we can
1763 // mark it as emitted-at-use-site and emit branches directly to the appropriate blocks.
1764 // Then we shall need an extra temporary register instead of the output register.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001765 locations->SetOut(Location::RequiresRegister());
1766}
1767
1768void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1769 ArmVIXLAssembler* assembler = GetAssembler();
1770 LocationSummary* locations = invoke->GetLocations();
1771
1772 vixl32::Register str = InputRegisterAt(invoke, 0);
1773 vixl32::Register arg = InputRegisterAt(invoke, 1);
1774 vixl32::Register out = OutputRegister(invoke);
1775
1776 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001777
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001778 vixl32::Label loop;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001779 vixl32::Label end;
1780 vixl32::Label return_true;
1781 vixl32::Label return_false;
Anton Kirilov6f644202017-02-27 18:29:45 +00001782 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001783
1784 // Get offsets of count, value, and class fields within a string object.
1785 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1786 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1787 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1788
1789 // Note that the null check must have been done earlier.
1790 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1791
1792 StringEqualsOptimizations optimizations(invoke);
1793 if (!optimizations.GetArgumentNotNull()) {
1794 // Check if input is null, return false if it is.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001795 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001796 }
1797
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001798 // Reference equality check, return true if same reference.
1799 __ Cmp(str, arg);
Artem Serov517d9f62016-12-12 15:51:15 +00001800 __ B(eq, &return_true, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001801
Anton Kirilov5ec62182016-10-13 20:16:02 +01001802 if (!optimizations.GetArgumentIsString()) {
1803 // Instanceof check for the argument by comparing class fields.
1804 // All string objects must have the same type since String cannot be subclassed.
1805 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1806 // If the argument is a string object, its class field must be equal to receiver's class field.
1807 __ Ldr(temp, MemOperand(str, class_offset));
Vladimir Marko984519c2017-08-23 10:45:29 +01001808 __ Ldr(out, MemOperand(arg, class_offset));
1809 __ Cmp(temp, out);
Artem Serov517d9f62016-12-12 15:51:15 +00001810 __ B(ne, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001811 }
1812
Vladimir Marko984519c2017-08-23 10:45:29 +01001813 // Check if one of the inputs is a const string. Do not special-case both strings
1814 // being const, such cases should be handled by constant folding if needed.
1815 uint32_t const_string_length = 0u;
1816 const char* const_string = GetConstString(invoke->InputAt(0), &const_string_length);
1817 if (const_string == nullptr) {
1818 const_string = GetConstString(invoke->InputAt(1), &const_string_length);
1819 if (const_string != nullptr) {
1820 std::swap(str, arg); // Make sure the const string is in `str`.
1821 }
1822 }
1823 bool is_compressed =
1824 mirror::kUseStringCompression &&
1825 const_string != nullptr &&
1826 mirror::String::DexFileStringAllASCII(const_string, const_string_length);
1827
1828 if (const_string != nullptr) {
1829 // Load `count` field of the argument string and check if it matches the const string.
1830 // Also compares the compression style, if differs return false.
1831 __ Ldr(temp, MemOperand(arg, count_offset));
1832 __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
1833 __ B(ne, &return_false, /* far_target */ false);
1834 } else {
1835 // Load `count` fields of this and argument strings.
1836 __ Ldr(temp, MemOperand(str, count_offset));
1837 __ Ldr(out, MemOperand(arg, count_offset));
1838 // Check if `count` fields are equal, return false if they're not.
1839 // Also compares the compression style, if differs return false.
1840 __ Cmp(temp, out);
1841 __ B(ne, &return_false, /* far_target */ false);
1842 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001843
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001844 // Assertions that must hold in order to compare strings 4 bytes at a time.
Vladimir Marko984519c2017-08-23 10:45:29 +01001845 // Ok to do this because strings are zero-padded to kObjectAlignment.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001846 DCHECK_ALIGNED(value_offset, 4);
1847 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1848
Vladimir Marko984519c2017-08-23 10:45:29 +01001849 if (const_string != nullptr &&
1850 const_string_length <= (is_compressed ? kShortConstStringEqualsCutoffInBytes
1851 : kShortConstStringEqualsCutoffInBytes / 2u)) {
1852 // Load and compare the contents. Though we know the contents of the short const string
1853 // at compile time, materializing constants may be more code than loading from memory.
1854 int32_t offset = value_offset;
1855 size_t remaining_bytes =
1856 RoundUp(is_compressed ? const_string_length : const_string_length * 2u, 4u);
1857 while (remaining_bytes > sizeof(uint32_t)) {
1858 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1859 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1860 vixl32::Register temp2 = scratch_scope.Acquire();
1861 __ Ldrd(temp, temp1, MemOperand(str, offset));
1862 __ Ldrd(temp2, out, MemOperand(arg, offset));
1863 __ Cmp(temp, temp2);
1864 __ B(ne, &return_false, /* far_label */ false);
1865 __ Cmp(temp1, out);
1866 __ B(ne, &return_false, /* far_label */ false);
1867 offset += 2u * sizeof(uint32_t);
1868 remaining_bytes -= 2u * sizeof(uint32_t);
1869 }
1870 if (remaining_bytes != 0u) {
1871 __ Ldr(temp, MemOperand(str, offset));
1872 __ Ldr(out, MemOperand(arg, offset));
1873 __ Cmp(temp, out);
1874 __ B(ne, &return_false, /* far_label */ false);
1875 }
1876 } else {
1877 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1878 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1879 "Expecting 0=compressed, 1=uncompressed");
1880 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
1881
1882 if (mirror::kUseStringCompression) {
1883 // For string compression, calculate the number of bytes to compare (not chars).
1884 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1885 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1886 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1887 2 * kMaxInstructionSizeInBytes,
1888 CodeBufferCheckScope::kMaximumSize);
1889 __ it(cs); // If uncompressed,
1890 __ add(cs, temp, temp, temp); // double the byte count.
1891 }
1892
1893 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1894 UseScratchRegisterScope scratch_scope(assembler->GetVIXLAssembler());
1895 vixl32::Register temp2 = scratch_scope.Acquire();
1896
1897 // Store offset of string value in preparation for comparison loop.
1898 __ Mov(temp1, value_offset);
1899
1900 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1901 __ Bind(&loop);
1902 __ Ldr(out, MemOperand(str, temp1));
1903 __ Ldr(temp2, MemOperand(arg, temp1));
1904 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
1905 __ Cmp(out, temp2);
1906 __ B(ne, &return_false, /* far_target */ false);
1907 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1908 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1909 __ B(hi, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001910 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001911
Anton Kirilov5ec62182016-10-13 20:16:02 +01001912 // Return true and exit the function.
1913 // If loop does not result in returning false, we return true.
1914 __ Bind(&return_true);
1915 __ Mov(out, 1);
Anton Kirilov6f644202017-02-27 18:29:45 +00001916 __ B(final_label);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001917
1918 // Return false and exit the function.
1919 __ Bind(&return_false);
1920 __ Mov(out, 0);
Anton Kirilov6f644202017-02-27 18:29:45 +00001921
1922 if (end.IsReferenced()) {
1923 __ Bind(&end);
1924 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001925}
1926
1927static void GenerateVisitStringIndexOf(HInvoke* invoke,
1928 ArmVIXLAssembler* assembler,
1929 CodeGeneratorARMVIXL* codegen,
1930 ArenaAllocator* allocator,
1931 bool start_at_zero) {
1932 LocationSummary* locations = invoke->GetLocations();
1933
1934 // Note that the null check must have been done earlier.
1935 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1936
1937 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1938 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1939 SlowPathCodeARMVIXL* slow_path = nullptr;
1940 HInstruction* code_point = invoke->InputAt(1);
1941 if (code_point->IsIntConstant()) {
Anton Kirilov644032c2016-12-06 17:51:43 +00001942 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
Anton Kirilov5ec62182016-10-13 20:16:02 +01001943 std::numeric_limits<uint16_t>::max()) {
1944 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1945 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1946 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1947 codegen->AddSlowPath(slow_path);
1948 __ B(slow_path->GetEntryLabel());
1949 __ Bind(slow_path->GetExitLabel());
1950 return;
1951 }
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01001952 } else if (code_point->GetType() != DataType::Type::kUint16) {
Anton Kirilov5ec62182016-10-13 20:16:02 +01001953 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1954 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1955 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1956 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1957 codegen->AddSlowPath(slow_path);
1958 __ B(hs, slow_path->GetEntryLabel());
1959 }
1960
1961 if (start_at_zero) {
1962 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1963 DCHECK(tmp_reg.Is(r2));
1964 // Start-index = 0.
1965 __ Mov(tmp_reg, 0);
1966 }
1967
1968 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1969 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1970
1971 if (slow_path != nullptr) {
1972 __ Bind(slow_path->GetExitLabel());
1973 }
1974}
1975
1976void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1977 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1978 LocationSummary::kCallOnMainAndSlowPath,
1979 kIntrinsified);
1980 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1981 // best to align the inputs accordingly.
1982 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1983 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1984 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1985 locations->SetOut(LocationFrom(r0));
1986
1987 // Need to send start-index=0.
1988 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1989}
1990
1991void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1992 GenerateVisitStringIndexOf(
1993 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1994}
1995
1996void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1997 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1998 LocationSummary::kCallOnMainAndSlowPath,
1999 kIntrinsified);
2000 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
2001 // best to align the inputs accordingly.
2002 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2003 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2004 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2005 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2006 locations->SetOut(LocationFrom(r0));
2007}
2008
2009void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
2010 GenerateVisitStringIndexOf(
2011 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
2012}
2013
2014void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
2015 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2016 LocationSummary::kCallOnMainAndSlowPath,
2017 kIntrinsified);
2018 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2019 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2020 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2021 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2022 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
2023 locations->SetOut(LocationFrom(r0));
2024}
2025
2026void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
2027 ArmVIXLAssembler* assembler = GetAssembler();
2028 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
2029 __ Cmp(byte_array, 0);
2030 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2031 codegen_->AddSlowPath(slow_path);
2032 __ B(eq, slow_path->GetEntryLabel());
2033
2034 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
2035 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
2036 __ Bind(slow_path->GetExitLabel());
2037}
2038
2039void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
2040 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2041 LocationSummary::kCallOnMainOnly,
2042 kIntrinsified);
2043 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2044 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2045 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
2046 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
2047 locations->SetOut(LocationFrom(r0));
2048}
2049
2050void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
2051 // No need to emit code checking whether `locations->InAt(2)` is a null
2052 // pointer, as callers of the native method
2053 //
2054 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
2055 //
2056 // all include a null check on `data` before calling that method.
2057 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
2058 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
2059}
2060
2061void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
2062 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2063 LocationSummary::kCallOnMainAndSlowPath,
2064 kIntrinsified);
2065 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2066 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
2067 locations->SetOut(LocationFrom(r0));
2068}
2069
2070void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
2071 ArmVIXLAssembler* assembler = GetAssembler();
2072 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
2073 __ Cmp(string_to_copy, 0);
2074 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2075 codegen_->AddSlowPath(slow_path);
2076 __ B(eq, slow_path->GetEntryLabel());
2077
2078 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
2079 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
2080
2081 __ Bind(slow_path->GetExitLabel());
2082}
2083
2084void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
2085 // The only read barrier implementation supporting the
2086 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2087 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2088 return;
2089 }
2090
2091 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2092 LocationSummary* locations = invoke->GetLocations();
2093 if (locations == nullptr) {
2094 return;
2095 }
2096
2097 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
2098 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
2099 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
2100
2101 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
2102 locations->SetInAt(1, Location::RequiresRegister());
2103 }
2104 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
2105 locations->SetInAt(3, Location::RequiresRegister());
2106 }
2107 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
2108 locations->SetInAt(4, Location::RequiresRegister());
2109 }
2110 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2111 // Temporary register IP cannot be used in
2112 // ReadBarrierSystemArrayCopySlowPathARM (because that register
2113 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
2114 // temporary register from the register allocator.
2115 locations->AddTemp(Location::RequiresRegister());
Vladimir Markoeee1c0e2017-04-21 17:58:41 +01002116 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen_);
2117 arm_codegen->MaybeAddBakerCcEntrypointTempForFields(locations);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002118 }
2119}
2120
2121static void CheckPosition(ArmVIXLAssembler* assembler,
2122 Location pos,
2123 vixl32::Register input,
2124 Location length,
2125 SlowPathCodeARMVIXL* slow_path,
2126 vixl32::Register temp,
2127 bool length_is_input_length = false) {
2128 // Where is the length in the Array?
2129 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
2130
2131 if (pos.IsConstant()) {
2132 int32_t pos_const = Int32ConstantFrom(pos);
2133 if (pos_const == 0) {
2134 if (!length_is_input_length) {
2135 // Check that length(input) >= length.
2136 __ Ldr(temp, MemOperand(input, length_offset));
2137 if (length.IsConstant()) {
2138 __ Cmp(temp, Int32ConstantFrom(length));
2139 } else {
2140 __ Cmp(temp, RegisterFrom(length));
2141 }
2142 __ B(lt, slow_path->GetEntryLabel());
2143 }
2144 } else {
2145 // Check that length(input) >= pos.
2146 __ Ldr(temp, MemOperand(input, length_offset));
2147 __ Subs(temp, temp, pos_const);
2148 __ B(lt, slow_path->GetEntryLabel());
2149
2150 // Check that (length(input) - pos) >= length.
2151 if (length.IsConstant()) {
2152 __ Cmp(temp, Int32ConstantFrom(length));
2153 } else {
2154 __ Cmp(temp, RegisterFrom(length));
2155 }
2156 __ B(lt, slow_path->GetEntryLabel());
2157 }
2158 } else if (length_is_input_length) {
2159 // The only way the copy can succeed is if pos is zero.
2160 vixl32::Register pos_reg = RegisterFrom(pos);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002161 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002162 } else {
2163 // Check that pos >= 0.
2164 vixl32::Register pos_reg = RegisterFrom(pos);
2165 __ Cmp(pos_reg, 0);
2166 __ B(lt, slow_path->GetEntryLabel());
2167
2168 // Check that pos <= length(input).
2169 __ Ldr(temp, MemOperand(input, length_offset));
2170 __ Subs(temp, temp, pos_reg);
2171 __ B(lt, slow_path->GetEntryLabel());
2172
2173 // Check that (length(input) - pos) >= length.
2174 if (length.IsConstant()) {
2175 __ Cmp(temp, Int32ConstantFrom(length));
2176 } else {
2177 __ Cmp(temp, RegisterFrom(length));
2178 }
2179 __ B(lt, slow_path->GetEntryLabel());
2180 }
2181}
2182
2183void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
2184 // The only read barrier implementation supporting the
2185 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2186 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2187
2188 ArmVIXLAssembler* assembler = GetAssembler();
2189 LocationSummary* locations = invoke->GetLocations();
2190
2191 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2192 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2193 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2194 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2195 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2196
2197 vixl32::Register src = InputRegisterAt(invoke, 0);
2198 Location src_pos = locations->InAt(1);
2199 vixl32::Register dest = InputRegisterAt(invoke, 2);
2200 Location dest_pos = locations->InAt(3);
2201 Location length = locations->InAt(4);
2202 Location temp1_loc = locations->GetTemp(0);
2203 vixl32::Register temp1 = RegisterFrom(temp1_loc);
2204 Location temp2_loc = locations->GetTemp(1);
2205 vixl32::Register temp2 = RegisterFrom(temp2_loc);
2206 Location temp3_loc = locations->GetTemp(2);
2207 vixl32::Register temp3 = RegisterFrom(temp3_loc);
2208
2209 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2210 codegen_->AddSlowPath(intrinsic_slow_path);
2211
2212 vixl32::Label conditions_on_positions_validated;
2213 SystemArrayCopyOptimizations optimizations(invoke);
2214
2215 // If source and destination are the same, we go to slow path if we need to do
2216 // forward copying.
2217 if (src_pos.IsConstant()) {
2218 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
2219 if (dest_pos.IsConstant()) {
2220 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2221 if (optimizations.GetDestinationIsSource()) {
2222 // Checked when building locations.
2223 DCHECK_GE(src_pos_constant, dest_pos_constant);
2224 } else if (src_pos_constant < dest_pos_constant) {
2225 __ Cmp(src, dest);
2226 __ B(eq, intrinsic_slow_path->GetEntryLabel());
2227 }
2228
2229 // Checked when building locations.
2230 DCHECK(!optimizations.GetDestinationIsSource()
2231 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
2232 } else {
2233 if (!optimizations.GetDestinationIsSource()) {
2234 __ Cmp(src, dest);
Artem Serov517d9f62016-12-12 15:51:15 +00002235 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002236 }
2237 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
2238 __ B(gt, intrinsic_slow_path->GetEntryLabel());
2239 }
2240 } else {
2241 if (!optimizations.GetDestinationIsSource()) {
2242 __ Cmp(src, dest);
Artem Serov517d9f62016-12-12 15:51:15 +00002243 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002244 }
2245 if (dest_pos.IsConstant()) {
2246 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2247 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
2248 } else {
2249 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
2250 }
2251 __ B(lt, intrinsic_slow_path->GetEntryLabel());
2252 }
2253
2254 __ Bind(&conditions_on_positions_validated);
2255
2256 if (!optimizations.GetSourceIsNotNull()) {
2257 // Bail out if the source is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00002258 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002259 }
2260
2261 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2262 // Bail out if the destination is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00002263 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002264 }
2265
2266 // If the length is negative, bail out.
2267 // We have already checked in the LocationsBuilder for the constant case.
2268 if (!length.IsConstant() &&
2269 !optimizations.GetCountIsSourceLength() &&
2270 !optimizations.GetCountIsDestinationLength()) {
2271 __ Cmp(RegisterFrom(length), 0);
2272 __ B(lt, intrinsic_slow_path->GetEntryLabel());
2273 }
2274
2275 // Validity checks: source.
2276 CheckPosition(assembler,
2277 src_pos,
2278 src,
2279 length,
2280 intrinsic_slow_path,
2281 temp1,
2282 optimizations.GetCountIsSourceLength());
2283
2284 // Validity checks: dest.
2285 CheckPosition(assembler,
2286 dest_pos,
2287 dest,
2288 length,
2289 intrinsic_slow_path,
2290 temp1,
2291 optimizations.GetCountIsDestinationLength());
2292
2293 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2294 // Check whether all elements of the source array are assignable to the component
2295 // type of the destination array. We do two checks: the classes are the same,
2296 // or the destination is Object[]. If none of these checks succeed, we go to the
2297 // slow path.
2298
2299 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2300 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2301 // /* HeapReference<Class> */ temp1 = src->klass_
2302 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2303 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2304 // Bail out if the source is not a non primitive array.
2305 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2306 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2307 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002308 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002309 // If heap poisoning is enabled, `temp1` has been unpoisoned
2310 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2311 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2312 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
2313 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002314 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002315 }
2316
2317 // /* HeapReference<Class> */ temp1 = dest->klass_
2318 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2319 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
2320
2321 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2322 // Bail out if the destination is not a non primitive array.
2323 //
2324 // Register `temp1` is not trashed by the read barrier emitted
2325 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2326 // method produces a call to a ReadBarrierMarkRegX entry point,
2327 // which saves all potentially live registers, including
2328 // temporaries such a `temp1`.
2329 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2330 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2331 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002332 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002333 // If heap poisoning is enabled, `temp2` has been unpoisoned
2334 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2335 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2336 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2337 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002338 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002339 }
2340
2341 // For the same reason given earlier, `temp1` is not trashed by the
2342 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2343 // /* HeapReference<Class> */ temp2 = src->klass_
2344 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2345 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
2346 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2347 __ Cmp(temp1, temp2);
2348
2349 if (optimizations.GetDestinationIsTypedObjectArray()) {
2350 vixl32::Label do_copy;
Artem Serov517d9f62016-12-12 15:51:15 +00002351 __ B(eq, &do_copy, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002352 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2353 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2354 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2355 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2356 // We do not need to emit a read barrier for the following
2357 // heap reference load, as `temp1` is only used in a
2358 // comparison with null below, and this reference is not
2359 // kept afterwards.
2360 __ Ldr(temp1, MemOperand(temp1, super_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002361 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002362 __ Bind(&do_copy);
2363 } else {
2364 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2365 }
2366 } else {
2367 // Non read barrier code.
2368
2369 // /* HeapReference<Class> */ temp1 = dest->klass_
2370 __ Ldr(temp1, MemOperand(dest, class_offset));
2371 // /* HeapReference<Class> */ temp2 = src->klass_
2372 __ Ldr(temp2, MemOperand(src, class_offset));
2373 bool did_unpoison = false;
2374 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2375 !optimizations.GetSourceIsNonPrimitiveArray()) {
2376 // One or two of the references need to be unpoisoned. Unpoison them
2377 // both to make the identity check valid.
2378 assembler->MaybeUnpoisonHeapReference(temp1);
2379 assembler->MaybeUnpoisonHeapReference(temp2);
2380 did_unpoison = true;
2381 }
2382
2383 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2384 // Bail out if the destination is not a non primitive array.
2385 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2386 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002387 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002388 assembler->MaybeUnpoisonHeapReference(temp3);
2389 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2390 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2391 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002392 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002393 }
2394
2395 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2396 // Bail out if the source is not a non primitive array.
2397 // /* HeapReference<Class> */ temp3 = temp2->component_type_
2398 __ Ldr(temp3, MemOperand(temp2, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002399 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002400 assembler->MaybeUnpoisonHeapReference(temp3);
2401 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2402 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2403 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002404 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002405 }
2406
2407 __ Cmp(temp1, temp2);
2408
2409 if (optimizations.GetDestinationIsTypedObjectArray()) {
2410 vixl32::Label do_copy;
Artem Serov517d9f62016-12-12 15:51:15 +00002411 __ B(eq, &do_copy, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002412 if (!did_unpoison) {
2413 assembler->MaybeUnpoisonHeapReference(temp1);
2414 }
2415 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2416 __ Ldr(temp1, MemOperand(temp1, component_offset));
2417 assembler->MaybeUnpoisonHeapReference(temp1);
2418 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2419 __ Ldr(temp1, MemOperand(temp1, super_offset));
2420 // No need to unpoison the result, we're comparing against null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00002421 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002422 __ Bind(&do_copy);
2423 } else {
2424 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2425 }
2426 }
2427 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2428 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2429 // Bail out if the source is not a non primitive array.
2430 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2431 // /* HeapReference<Class> */ temp1 = src->klass_
2432 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2433 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2434 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2435 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2436 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002437 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002438 // If heap poisoning is enabled, `temp3` has been unpoisoned
2439 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2440 } else {
2441 // /* HeapReference<Class> */ temp1 = src->klass_
2442 __ Ldr(temp1, MemOperand(src, class_offset));
2443 assembler->MaybeUnpoisonHeapReference(temp1);
2444 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2445 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002446 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002447 assembler->MaybeUnpoisonHeapReference(temp3);
2448 }
2449 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2450 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2451 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002452 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002453 }
2454
Roland Levillain1663d162017-03-17 15:15:21 +00002455 if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
2456 // Null constant length: not need to emit the loop code at all.
Anton Kirilov5ec62182016-10-13 20:16:02 +01002457 } else {
Roland Levillain1663d162017-03-17 15:15:21 +00002458 vixl32::Label done;
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002459 const DataType::Type type = DataType::Type::kReference;
2460 const int32_t element_size = DataType::Size(type);
Roland Levillain1663d162017-03-17 15:15:21 +00002461
2462 if (length.IsRegister()) {
2463 // Don't enter the copy loop if the length is null.
2464 __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002465 }
Roland Levillain1663d162017-03-17 15:15:21 +00002466
2467 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2468 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2469
2470 // SystemArrayCopy implementation for Baker read barriers (see
Roland Levillain9983e302017-07-14 14:34:22 +01002471 // also CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier):
Roland Levillain1663d162017-03-17 15:15:21 +00002472 //
2473 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2474 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2475 // bool is_gray = (rb_state == ReadBarrier::GrayState());
2476 // if (is_gray) {
2477 // // Slow-path copy.
2478 // do {
2479 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2480 // } while (src_ptr != end_ptr)
2481 // } else {
2482 // // Fast-path copy.
2483 // do {
2484 // *dest_ptr++ = *src_ptr++;
2485 // } while (src_ptr != end_ptr)
2486 // }
2487
2488 // /* int32_t */ monitor = src->monitor_
2489 __ Ldr(temp2, MemOperand(src, monitor_offset));
2490 // /* LockWord */ lock_word = LockWord(monitor)
2491 static_assert(sizeof(LockWord) == sizeof(int32_t),
2492 "art::LockWord and int32_t have different sizes.");
2493
2494 // Introduce a dependency on the lock_word including the rb_state,
2495 // which shall prevent load-load reordering without using
2496 // a memory barrier (which would be more expensive).
2497 // `src` is unchanged by this operation, but its value now depends
2498 // on `temp2`.
2499 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2500
2501 // Compute the base source address in `temp1`.
2502 // Note that `temp1` (the base source address) is computed from
2503 // `src` (and `src_pos`) here, and thus honors the artificial
2504 // dependency of `src` on `temp2`.
2505 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2506 // Compute the end source address in `temp3`.
2507 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2508 // The base destination address is computed later, as `temp2` is
2509 // used for intermediate computations.
2510
2511 // Slow path used to copy array when `src` is gray.
2512 // Note that the base destination address is computed in `temp2`
2513 // by the slow path code.
2514 SlowPathCodeARMVIXL* read_barrier_slow_path =
2515 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2516 codegen_->AddSlowPath(read_barrier_slow_path);
2517
2518 // Given the numeric representation, it's enough to check the low bit of the
2519 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2520 // which can be a 16-bit instruction unlike the TST immediate.
2521 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2522 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
2523 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2524 // Carry flag is the last bit shifted out by LSRS.
2525 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2526
2527 // Fast-path copy.
2528 // Compute the base destination address in `temp2`.
2529 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2530 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2531 // poison/unpoison.
2532 vixl32::Label loop;
2533 __ Bind(&loop);
2534 {
2535 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2536 const vixl32::Register temp_reg = temps.Acquire();
2537 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2538 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2539 }
2540 __ Cmp(temp1, temp3);
2541 __ B(ne, &loop, /* far_target */ false);
2542
2543 __ Bind(read_barrier_slow_path->GetExitLabel());
2544 } else {
2545 // Non read barrier code.
2546 // Compute the base source address in `temp1`.
2547 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
2548 // Compute the base destination address in `temp2`.
2549 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
2550 // Compute the end source address in `temp3`.
2551 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
2552 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2553 // poison/unpoison.
2554 vixl32::Label loop;
2555 __ Bind(&loop);
2556 {
2557 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2558 const vixl32::Register temp_reg = temps.Acquire();
2559 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2560 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2561 }
2562 __ Cmp(temp1, temp3);
2563 __ B(ne, &loop, /* far_target */ false);
2564 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01002565 __ Bind(&done);
2566 }
2567
2568 // We only need one card marking on the destination array.
2569 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2570
2571 __ Bind(intrinsic_slow_path->GetExitLabel());
2572}
2573
2574static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2575 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2576 // the code generator. Furthermore, the register allocator creates fixed live intervals
2577 // for all caller-saved registers because we are doing a function call. As a result, if
2578 // the input and output locations are unallocated, the register allocator runs out of
2579 // registers and fails; however, a debuggable graph is not the common case.
2580 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2581 return;
2582 }
2583
2584 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002585 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2586 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002587
2588 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2589 LocationSummary::kCallOnMainOnly,
2590 kIntrinsified);
2591 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2592
2593 locations->SetInAt(0, Location::RequiresFpuRegister());
2594 locations->SetOut(Location::RequiresFpuRegister());
2595 // Native code uses the soft float ABI.
2596 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2597 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2598}
2599
2600static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2601 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2602 // the code generator. Furthermore, the register allocator creates fixed live intervals
2603 // for all caller-saved registers because we are doing a function call. As a result, if
2604 // the input and output locations are unallocated, the register allocator runs out of
2605 // registers and fails; however, a debuggable graph is not the common case.
2606 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2607 return;
2608 }
2609
2610 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002611 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kFloat64);
2612 DCHECK_EQ(invoke->InputAt(1)->GetType(), DataType::Type::kFloat64);
2613 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002614
2615 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2616 LocationSummary::kCallOnMainOnly,
2617 kIntrinsified);
2618 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2619
2620 locations->SetInAt(0, Location::RequiresFpuRegister());
2621 locations->SetInAt(1, Location::RequiresFpuRegister());
2622 locations->SetOut(Location::RequiresFpuRegister());
2623 // Native code uses the soft float ABI.
2624 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2625 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2626 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2627 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2628}
2629
2630static void GenFPToFPCall(HInvoke* invoke,
2631 ArmVIXLAssembler* assembler,
2632 CodeGeneratorARMVIXL* codegen,
2633 QuickEntrypointEnum entry) {
2634 LocationSummary* const locations = invoke->GetLocations();
2635
2636 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2637 DCHECK(locations->WillCall() && locations->Intrinsified());
2638
2639 // Native code uses the soft float ABI.
2640 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2641 RegisterFrom(locations->GetTemp(1)),
2642 InputDRegisterAt(invoke, 0));
2643 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2644 __ Vmov(OutputDRegister(invoke),
2645 RegisterFrom(locations->GetTemp(0)),
2646 RegisterFrom(locations->GetTemp(1)));
2647}
2648
2649static void GenFPFPToFPCall(HInvoke* invoke,
2650 ArmVIXLAssembler* assembler,
2651 CodeGeneratorARMVIXL* codegen,
2652 QuickEntrypointEnum entry) {
2653 LocationSummary* const locations = invoke->GetLocations();
2654
2655 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2656 DCHECK(locations->WillCall() && locations->Intrinsified());
2657
2658 // Native code uses the soft float ABI.
2659 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2660 RegisterFrom(locations->GetTemp(1)),
2661 InputDRegisterAt(invoke, 0));
2662 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2663 RegisterFrom(locations->GetTemp(3)),
2664 InputDRegisterAt(invoke, 1));
2665 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2666 __ Vmov(OutputDRegister(invoke),
2667 RegisterFrom(locations->GetTemp(0)),
2668 RegisterFrom(locations->GetTemp(1)));
2669}
2670
2671void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2672 CreateFPToFPCallLocations(arena_, invoke);
2673}
2674
2675void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2676 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2677}
2678
2679void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2680 CreateFPToFPCallLocations(arena_, invoke);
2681}
2682
2683void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2684 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2685}
2686
2687void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2688 CreateFPToFPCallLocations(arena_, invoke);
2689}
2690
2691void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2692 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2693}
2694
2695void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2696 CreateFPToFPCallLocations(arena_, invoke);
2697}
2698
2699void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2700 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2701}
2702
2703void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2704 CreateFPToFPCallLocations(arena_, invoke);
2705}
2706
2707void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2708 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2709}
2710
2711void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2712 CreateFPToFPCallLocations(arena_, invoke);
2713}
2714
2715void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2716 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2717}
2718
2719void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2720 CreateFPToFPCallLocations(arena_, invoke);
2721}
2722
2723void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2724 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2725}
2726
2727void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2728 CreateFPToFPCallLocations(arena_, invoke);
2729}
2730
2731void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2732 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2733}
2734
2735void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2736 CreateFPToFPCallLocations(arena_, invoke);
2737}
2738
2739void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2740 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2741}
2742
2743void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2744 CreateFPToFPCallLocations(arena_, invoke);
2745}
2746
2747void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2748 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2749}
2750
2751void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2752 CreateFPToFPCallLocations(arena_, invoke);
2753}
2754
2755void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2756 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2757}
2758
2759void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2760 CreateFPToFPCallLocations(arena_, invoke);
2761}
2762
2763void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2764 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2765}
2766
2767void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2768 CreateFPToFPCallLocations(arena_, invoke);
2769}
2770
2771void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2772 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2773}
2774
2775void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2776 CreateFPToFPCallLocations(arena_, invoke);
2777}
2778
2779void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2780 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2781}
2782
2783void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2784 CreateFPFPToFPCallLocations(arena_, invoke);
2785}
2786
2787void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2788 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2789}
2790
2791void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2792 CreateFPFPToFPCallLocations(arena_, invoke);
2793}
2794
2795void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2796 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2797}
2798
2799void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2800 CreateFPFPToFPCallLocations(arena_, invoke);
2801}
2802
2803void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2804 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2805}
2806
2807void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2808 CreateIntToIntLocations(arena_, invoke);
2809}
2810
2811void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2812 ArmVIXLAssembler* assembler = GetAssembler();
2813 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2814}
2815
2816void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
Vladimir Marko1819e412017-08-29 17:02:56 +01002817 CreateLongToLongLocationsWithOverlap(arena_, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002818}
2819
2820void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2821 ArmVIXLAssembler* assembler = GetAssembler();
2822 LocationSummary* locations = invoke->GetLocations();
2823
2824 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2825 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2826 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2827 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2828
2829 __ Rbit(out_reg_lo, in_reg_hi);
2830 __ Rbit(out_reg_hi, in_reg_lo);
2831}
2832
2833void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2834 CreateIntToIntLocations(arena_, invoke);
2835}
2836
2837void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2838 ArmVIXLAssembler* assembler = GetAssembler();
2839 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2840}
2841
2842void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
Vladimir Marko1819e412017-08-29 17:02:56 +01002843 CreateLongToLongLocationsWithOverlap(arena_, invoke);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002844}
2845
2846void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2847 ArmVIXLAssembler* assembler = GetAssembler();
2848 LocationSummary* locations = invoke->GetLocations();
2849
2850 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2851 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2852 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2853 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2854
2855 __ Rev(out_reg_lo, in_reg_hi);
2856 __ Rev(out_reg_hi, in_reg_lo);
2857}
2858
2859void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2860 CreateIntToIntLocations(arena_, invoke);
2861}
2862
2863void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2864 ArmVIXLAssembler* assembler = GetAssembler();
2865 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2866}
2867
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002868static void GenBitCount(HInvoke* instr, DataType::Type type, ArmVIXLAssembler* assembler) {
2869 DCHECK(DataType::IsIntOrLongType(type)) << type;
2870 DCHECK_EQ(instr->GetType(), DataType::Type::kInt32);
2871 DCHECK_EQ(DataType::Kind(instr->InputAt(0)->GetType()), type);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002872
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002873 bool is_long = type == DataType::Type::kInt64;
Anton Kirilov5ec62182016-10-13 20:16:02 +01002874 LocationSummary* locations = instr->GetLocations();
2875 Location in = locations->InAt(0);
2876 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2877 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2878 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2879 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2880 vixl32::Register out_r = OutputRegister(instr);
2881
2882 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2883 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2884 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2885 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2886 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2887 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2888 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2889 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2890 if (is_long) {
2891 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2892 }
2893 __ Vmov(out_r, tmp_s);
2894}
2895
2896void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2897 CreateIntToIntLocations(arena_, invoke);
2898 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2899}
2900
2901void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002902 GenBitCount(invoke, DataType::Type::kInt32, GetAssembler());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002903}
2904
2905void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2906 VisitIntegerBitCount(invoke);
2907}
2908
2909void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002910 GenBitCount(invoke, DataType::Type::kInt64, GetAssembler());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002911}
2912
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002913static void GenHighestOneBit(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002914 DataType::Type type,
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002915 CodeGeneratorARMVIXL* codegen) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002916 DCHECK(DataType::IsIntOrLongType(type));
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002917
2918 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2919 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2920 const vixl32::Register temp = temps.Acquire();
2921
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002922 if (type == DataType::Type::kInt64) {
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002923 LocationSummary* locations = invoke->GetLocations();
2924 Location in = locations->InAt(0);
2925 Location out = locations->Out();
2926
2927 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2928 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2929 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2930 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2931
2932 __ Mov(temp, 0x80000000); // Modified immediate.
2933 __ Clz(out_reg_lo, in_reg_lo);
2934 __ Clz(out_reg_hi, in_reg_hi);
2935 __ Lsr(out_reg_lo, temp, out_reg_lo);
2936 __ Lsrs(out_reg_hi, temp, out_reg_hi);
2937
2938 // Discard result for lowest 32 bits if highest 32 bits are not zero.
2939 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
2940 // we check that the output is in a low register, so that a 16-bit MOV
2941 // encoding can be used. If output is in a high register, then we generate
2942 // 4 more bytes of code to avoid a branch.
2943 Operand mov_src(0);
2944 if (!out_reg_lo.IsLow()) {
2945 __ Mov(LeaveFlags, temp, 0);
2946 mov_src = Operand(temp);
2947 }
2948 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
2949 2 * vixl32::k16BitT32InstructionSizeInBytes,
2950 CodeBufferCheckScope::kExactSize);
2951 __ it(ne);
2952 __ mov(ne, out_reg_lo, mov_src);
2953 } else {
2954 vixl32::Register out = OutputRegister(invoke);
2955 vixl32::Register in = InputRegisterAt(invoke, 0);
2956
2957 __ Mov(temp, 0x80000000); // Modified immediate.
2958 __ Clz(out, in);
2959 __ Lsr(out, temp, out);
2960 }
2961}
2962
2963void IntrinsicLocationsBuilderARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
2964 CreateIntToIntLocations(arena_, invoke);
2965}
2966
2967void IntrinsicCodeGeneratorARMVIXL::VisitIntegerHighestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002968 GenHighestOneBit(invoke, DataType::Type::kInt32, codegen_);
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002969}
2970
2971void IntrinsicLocationsBuilderARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
Vladimir Marko1819e412017-08-29 17:02:56 +01002972 CreateLongToLongLocationsWithOverlap(arena_, invoke);
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002973}
2974
2975void IntrinsicCodeGeneratorARMVIXL::VisitLongHighestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002976 GenHighestOneBit(invoke, DataType::Type::kInt64, codegen_);
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002977}
2978
2979static void GenLowestOneBit(HInvoke* invoke,
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002980 DataType::Type type,
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002981 CodeGeneratorARMVIXL* codegen) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002982 DCHECK(DataType::IsIntOrLongType(type));
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002983
2984 ArmVIXLAssembler* assembler = codegen->GetAssembler();
2985 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2986 const vixl32::Register temp = temps.Acquire();
2987
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01002988 if (type == DataType::Type::kInt64) {
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01002989 LocationSummary* locations = invoke->GetLocations();
2990 Location in = locations->InAt(0);
2991 Location out = locations->Out();
2992
2993 vixl32::Register in_reg_lo = LowRegisterFrom(in);
2994 vixl32::Register in_reg_hi = HighRegisterFrom(in);
2995 vixl32::Register out_reg_lo = LowRegisterFrom(out);
2996 vixl32::Register out_reg_hi = HighRegisterFrom(out);
2997
2998 __ Rsb(out_reg_hi, in_reg_hi, 0);
2999 __ Rsb(out_reg_lo, in_reg_lo, 0);
3000 __ And(out_reg_hi, out_reg_hi, in_reg_hi);
3001 // The result of this operation is 0 iff in_reg_lo is 0
3002 __ Ands(out_reg_lo, out_reg_lo, in_reg_lo);
3003
3004 // Discard result for highest 32 bits if lowest 32 bits are not zero.
3005 // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
3006 // we check that the output is in a low register, so that a 16-bit MOV
3007 // encoding can be used. If output is in a high register, then we generate
3008 // 4 more bytes of code to avoid a branch.
3009 Operand mov_src(0);
3010 if (!out_reg_lo.IsLow()) {
3011 __ Mov(LeaveFlags, temp, 0);
3012 mov_src = Operand(temp);
3013 }
3014 ExactAssemblyScope it_scope(codegen->GetVIXLAssembler(),
3015 2 * vixl32::k16BitT32InstructionSizeInBytes,
3016 CodeBufferCheckScope::kExactSize);
3017 __ it(ne);
3018 __ mov(ne, out_reg_hi, mov_src);
3019 } else {
3020 vixl32::Register out = OutputRegister(invoke);
3021 vixl32::Register in = InputRegisterAt(invoke, 0);
3022
3023 __ Rsb(temp, in, 0);
3024 __ And(out, temp, in);
3025 }
3026}
3027
3028void IntrinsicLocationsBuilderARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
3029 CreateIntToIntLocations(arena_, invoke);
3030}
3031
3032void IntrinsicCodeGeneratorARMVIXL::VisitIntegerLowestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003033 GenLowestOneBit(invoke, DataType::Type::kInt32, codegen_);
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01003034}
3035
3036void IntrinsicLocationsBuilderARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
Vladimir Marko1819e412017-08-29 17:02:56 +01003037 CreateLongToLongLocationsWithOverlap(arena_, invoke);
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01003038}
3039
3040void IntrinsicCodeGeneratorARMVIXL::VisitLongLowestOneBit(HInvoke* invoke) {
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003041 GenLowestOneBit(invoke, DataType::Type::kInt64, codegen_);
Petre-Ionut Tudor27292e62017-08-04 16:06:45 +01003042}
3043
Anton Kirilov5ec62182016-10-13 20:16:02 +01003044void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
3045 LocationSummary* locations = new (arena_) LocationSummary(invoke,
3046 LocationSummary::kNoCall,
3047 kIntrinsified);
3048 locations->SetInAt(0, Location::RequiresRegister());
3049 locations->SetInAt(1, Location::RequiresRegister());
3050 locations->SetInAt(2, Location::RequiresRegister());
3051 locations->SetInAt(3, Location::RequiresRegister());
3052 locations->SetInAt(4, Location::RequiresRegister());
3053
3054 // Temporary registers to store lengths of strings and for calculations.
3055 locations->AddTemp(Location::RequiresRegister());
3056 locations->AddTemp(Location::RequiresRegister());
3057 locations->AddTemp(Location::RequiresRegister());
3058}
3059
3060void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
3061 ArmVIXLAssembler* assembler = GetAssembler();
3062 LocationSummary* locations = invoke->GetLocations();
3063
3064 // Check assumption that sizeof(Char) is 2 (used in scaling below).
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003065 const size_t char_size = DataType::Size(DataType::Type::kUint16);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003066 DCHECK_EQ(char_size, 2u);
3067
3068 // Location of data in char array buffer.
3069 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
3070
3071 // Location of char array data in string.
3072 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
3073
3074 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
3075 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
3076 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
3077 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
3078 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
3079 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
3080 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
3081
3082 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
3083 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
3084 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
3085
3086 vixl32::Label done, compressed_string_loop;
Anton Kirilov6f644202017-02-27 18:29:45 +00003087 vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003088 // dst to be copied.
3089 __ Add(dst_ptr, dstObj, data_offset);
3090 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
3091
3092 __ Subs(num_chr, srcEnd, srcBegin);
3093 // Early out for valid zero-length retrievals.
Anton Kirilov6f644202017-02-27 18:29:45 +00003094 __ B(eq, final_label, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003095
3096 // src range to copy.
3097 __ Add(src_ptr, srcObj, value_offset);
3098
3099 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3100 vixl32::Register temp;
3101 vixl32::Label compressed_string_preloop;
3102 if (mirror::kUseStringCompression) {
3103 // Location of count in string.
3104 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
3105 temp = temps.Acquire();
3106 // String's length.
3107 __ Ldr(temp, MemOperand(srcObj, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01003108 __ Tst(temp, 1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003109 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00003110 __ B(eq, &compressed_string_preloop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003111 }
3112 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
3113
3114 // Do the copy.
3115 vixl32::Label loop, remainder;
3116
3117 temp = temps.Acquire();
3118 // Save repairing the value of num_chr on the < 4 character path.
3119 __ Subs(temp, num_chr, 4);
Artem Serov517d9f62016-12-12 15:51:15 +00003120 __ B(lt, &remainder, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003121
3122 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
3123 __ Mov(num_chr, temp);
3124
3125 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
3126 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
3127 // to rectify these everywhere this intrinsic applies.)
3128 __ Bind(&loop);
3129 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
3130 __ Subs(num_chr, num_chr, 4);
3131 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
3132 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
3133 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
3134 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00003135 __ B(ge, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003136
3137 __ Adds(num_chr, num_chr, 4);
Anton Kirilov6f644202017-02-27 18:29:45 +00003138 __ B(eq, final_label, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003139
3140 // Main loop for < 4 character case and remainder handling. Loads and stores one
3141 // 16-bit Java character at a time.
3142 __ Bind(&remainder);
3143 temp = temps.Acquire();
3144 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
3145 __ Subs(num_chr, num_chr, 1);
3146 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
3147 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00003148 __ B(gt, &remainder, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003149
3150 if (mirror::kUseStringCompression) {
Anton Kirilov6f644202017-02-27 18:29:45 +00003151 __ B(final_label);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01003152
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003153 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003154 DCHECK_EQ(c_char_size, 1u);
3155 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
3156 __ Bind(&compressed_string_preloop);
3157 __ Add(src_ptr, src_ptr, srcBegin);
3158 __ Bind(&compressed_string_loop);
3159 temp = temps.Acquire();
3160 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
3161 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
3162 temps.Release(temp);
3163 __ Subs(num_chr, num_chr, 1);
Artem Serov517d9f62016-12-12 15:51:15 +00003164 __ B(gt, &compressed_string_loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003165 }
3166
Anton Kirilov6f644202017-02-27 18:29:45 +00003167 if (done.IsReferenced()) {
3168 __ Bind(&done);
3169 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01003170}
3171
3172void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
3173 CreateFPToIntLocations(arena_, invoke);
3174}
3175
3176void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
3177 ArmVIXLAssembler* const assembler = GetAssembler();
3178 const vixl32::Register out = OutputRegister(invoke);
3179 // Shifting left by 1 bit makes the value encodable as an immediate operand;
3180 // we don't care about the sign bit anyway.
3181 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
3182
3183 __ Vmov(out, InputSRegisterAt(invoke, 0));
3184 // We don't care about the sign bit, so shift left.
3185 __ Lsl(out, out, 1);
3186 __ Eor(out, out, infinity);
Anton Kirilov5601d4e2017-05-11 19:33:50 +01003187 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003188}
3189
3190void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
3191 CreateFPToIntLocations(arena_, invoke);
3192}
3193
3194void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
3195 ArmVIXLAssembler* const assembler = GetAssembler();
3196 const vixl32::Register out = OutputRegister(invoke);
3197 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3198 const vixl32::Register temp = temps.Acquire();
3199 // The highest 32 bits of double precision positive infinity separated into
3200 // two constants encodable as immediate operands.
3201 constexpr uint32_t infinity_high = 0x7f000000U;
3202 constexpr uint32_t infinity_high2 = 0x00f00000U;
3203
3204 static_assert((infinity_high | infinity_high2) ==
3205 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
3206 "The constants do not add up to the high 32 bits of double "
3207 "precision positive infinity.");
3208 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
3209 __ Eor(out, out, infinity_high);
3210 __ Eor(out, out, infinity_high2);
3211 // We don't care about the sign bit, so shift left.
3212 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
Anton Kirilov5601d4e2017-05-11 19:33:50 +01003213 codegen_->GenerateConditionWithZero(kCondEQ, out, out);
Anton Kirilov5ec62182016-10-13 20:16:02 +01003214}
3215
Artem Serov9aee2d42017-01-06 15:58:31 +00003216void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
3217 if (features_.HasARMv8AInstructions()) {
3218 CreateFPToFPLocations(arena_, invoke);
3219 }
3220}
3221
3222void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
3223 ArmVIXLAssembler* assembler = GetAssembler();
3224 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3225 __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3226}
3227
3228void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
3229 if (features_.HasARMv8AInstructions()) {
3230 CreateFPToFPLocations(arena_, invoke);
3231 }
3232}
3233
3234void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
3235 ArmVIXLAssembler* assembler = GetAssembler();
3236 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
3237 __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
3238}
3239
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003240void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3241 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3242 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3243 invoke,
3244 codegen_,
3245 LocationFrom(r0),
3246 LocationFrom(calling_convention.GetRegisterAt(0)));
3247}
3248
3249void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3250 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3251 LocationSummary* locations = invoke->GetLocations();
3252 ArmVIXLAssembler* const assembler = GetAssembler();
3253
3254 vixl32::Register out = RegisterFrom(locations->Out());
3255 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3256 vixl32::Register temp = temps.Acquire();
3257 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3258 vixl32::Register argument = calling_convention.GetRegisterAt(0);
3259 if (invoke->InputAt(0)->IsConstant()) {
3260 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3261 if (value >= info.low && value <= info.high) {
3262 // Just embed the j.l.Integer in the code.
3263 ScopedObjectAccess soa(Thread::Current());
3264 mirror::Object* boxed = info.cache->Get(value + (-info.low));
3265 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3266 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3267 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
3268 } else {
3269 // Allocate and initialize a new j.l.Integer.
3270 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3271 // JIT object table.
3272 uint32_t address =
3273 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3274 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3275 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3276 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3277 __ Mov(temp, value);
3278 assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
3279 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3280 // one.
3281 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3282 }
3283 } else {
3284 vixl32::Register in = RegisterFrom(locations->InAt(0));
3285 // Check bounds of our cache.
3286 __ Add(out, in, -info.low);
3287 __ Cmp(out, info.high - info.low + 1);
3288 vixl32::Label allocate, done;
Anton Kirilovfd522532017-05-10 12:46:57 +01003289 __ B(hs, &allocate, /* is_far_target */ false);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003290 // If the value is within the bounds, load the j.l.Integer directly from the array.
3291 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3292 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3293 __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
Vladimir Marko0ebe0d82017-09-21 22:50:39 +01003294 codegen_->LoadFromShiftedRegOffset(DataType::Type::kReference, locations->Out(), temp, out);
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003295 assembler->MaybeUnpoisonHeapReference(out);
3296 __ B(&done);
3297 __ Bind(&allocate);
3298 // Otherwise allocate and initialize a new j.l.Integer.
3299 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3300 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3301 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3302 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3303 assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
3304 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3305 // one.
3306 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3307 __ Bind(&done);
3308 }
3309}
3310
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003311void IntrinsicLocationsBuilderARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3312 LocationSummary* locations = new (arena_) LocationSummary(invoke,
3313 LocationSummary::kNoCall,
3314 kIntrinsified);
3315 locations->SetOut(Location::RequiresRegister());
3316}
3317
3318void IntrinsicCodeGeneratorARMVIXL::VisitThreadInterrupted(HInvoke* invoke) {
3319 ArmVIXLAssembler* assembler = GetAssembler();
3320 vixl32::Register out = RegisterFrom(invoke->GetLocations()->Out());
3321 int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
3322 __ Ldr(out, MemOperand(tr, offset));
3323 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3324 vixl32::Register temp = temps.Acquire();
3325 vixl32::Label done;
Anton Kirilovfd522532017-05-10 12:46:57 +01003326 vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
3327 __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003328 __ Dmb(vixl32::ISH);
3329 __ Mov(temp, 0);
3330 assembler->StoreToOffset(kStoreWord, temp, tr, offset);
3331 __ Dmb(vixl32::ISH);
Anton Kirilovfd522532017-05-10 12:46:57 +01003332 if (done.IsReferenced()) {
3333 __ Bind(&done);
3334 }
Nicolas Geoffray365719c2017-03-08 13:11:50 +00003335}
3336
Anton Kirilov5ec62182016-10-13 20:16:02 +01003337UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
Anton Kirilov5ec62182016-10-13 20:16:02 +01003338UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
3339UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
Vladimir Markod254f5c2017-06-02 15:18:36 +00003340UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
Anton Kirilov5ec62182016-10-13 20:16:02 +01003341
Aart Bikff7d89c2016-11-07 08:49:28 -08003342UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3343UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08003344UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3345UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3346UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3347UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
3348UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3349UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
Aart Bikff7d89c2016-11-07 08:49:28 -08003350
Anton Kirilov5ec62182016-10-13 20:16:02 +01003351// 1.8.
3352UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3353UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3354UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3355UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3356UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3357
3358UNREACHABLE_INTRINSICS(ARMVIXL)
3359
3360#undef __
3361
3362} // namespace arm
3363} // namespace art