blob: 1a10173ed7d821776ef4963a0c21c2ef453e9665 [file] [log] [blame]
Anton Kirilov5ec62182016-10-13 20:16:02 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm_vixl.h"
18
19#include "arch/arm/instruction_set_features_arm.h"
20#include "code_generator_arm_vixl.h"
21#include "common_arm.h"
22#include "lock_word.h"
23#include "mirror/array-inl.h"
24
25#include "aarch32/constants-aarch32.h"
26
27namespace art {
28namespace arm {
29
30#define __ assembler->GetVIXLAssembler()->
31
32using helpers::DRegisterFrom;
33using helpers::HighRegisterFrom;
34using helpers::InputDRegisterAt;
35using helpers::InputRegisterAt;
36using helpers::InputSRegisterAt;
37using helpers::InputVRegisterAt;
38using helpers::Int32ConstantFrom;
39using helpers::LocationFrom;
40using helpers::LowRegisterFrom;
41using helpers::LowSRegisterFrom;
42using helpers::OutputDRegister;
43using helpers::OutputRegister;
44using helpers::OutputVRegister;
45using helpers::RegisterFrom;
46using helpers::SRegisterFrom;
47
48using namespace vixl::aarch32; // NOLINT(build/namespaces)
49
Artem Serov0fb37192016-12-06 18:13:40 +000050using vixl::ExactAssemblyScope;
51using vixl::CodeBufferCheckScope;
52
Anton Kirilov5ec62182016-10-13 20:16:02 +010053ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
54 return codegen_->GetAssembler();
55}
56
57ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
58 return codegen_->GetGraph()->GetArena();
59}
60
61// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
62// intrinsified call. This will copy the arguments into the positions for a regular call.
63//
64// Note: The actual parameters are required to be in the locations given by the invoke's location
65// summary. If an intrinsic modifies those locations before a slowpath call, they must be
66// restored!
67//
68// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
69// sub-optimal (compared to a direct pointer call), but this is a slow-path.
70
71class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
72 public:
73 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
74 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
75
76 Location MoveArguments(CodeGenerator* codegen) {
Artem Serovd4cc5b22016-11-04 11:19:09 +000077 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
Anton Kirilov5ec62182016-10-13 20:16:02 +010078 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
79 return calling_convention_visitor.GetMethodLocation();
80 }
81
82 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
83 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
84 __ Bind(GetEntryLabel());
85
86 SaveLiveRegisters(codegen, invoke_->GetLocations());
87
88 Location method_loc = MoveArguments(codegen);
89
90 if (invoke_->IsInvokeStaticOrDirect()) {
91 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
92 } else {
93 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
94 }
95 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
96
97 // Copy the result back to the expected output.
98 Location out = invoke_->GetLocations()->Out();
99 if (out.IsValid()) {
100 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
101 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
102 codegen->MoveFromReturnRegister(out, invoke_->GetType());
103 }
104
105 RestoreLiveRegisters(codegen, invoke_->GetLocations());
106 __ B(GetExitLabel());
107 }
108
109 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
110
111 private:
112 // The instruction where this slow path is happening.
113 HInvoke* const invoke_;
114
115 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
116};
117
118// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
119class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
120 public:
121 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
122 : SlowPathCodeARMVIXL(instruction) {
123 DCHECK(kEmitCompilerReadBarrier);
124 DCHECK(kUseBakerReadBarrier);
125 }
126
127 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
128 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
129 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
130 LocationSummary* locations = instruction_->GetLocations();
131 DCHECK(locations->CanCall());
132 DCHECK(instruction_->IsInvokeStaticOrDirect())
133 << "Unexpected instruction in read barrier arraycopy slow path: "
134 << instruction_->DebugName();
135 DCHECK(instruction_->GetLocations()->Intrinsified());
136 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
137
138 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
139 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
140 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
141
142 vixl32::Register dest = InputRegisterAt(instruction_, 2);
143 Location dest_pos = locations->InAt(3);
144 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
145 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
146 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
147 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
148
149 __ Bind(GetEntryLabel());
150 // Compute the base destination address in `dst_curr_addr`.
151 if (dest_pos.IsConstant()) {
152 int32_t constant = Int32ConstantFrom(dest_pos);
153 __ Add(dst_curr_addr, dest, element_size * constant + offset);
154 } else {
155 __ Add(dst_curr_addr,
156 dest,
157 Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
158 __ Add(dst_curr_addr, dst_curr_addr, offset);
159 }
160
161 vixl32::Label loop;
162 __ Bind(&loop);
163 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
164 assembler->MaybeUnpoisonHeapReference(tmp);
165 // TODO: Inline the mark bit check before calling the runtime?
166 // tmp = ReadBarrier::Mark(tmp);
167 // No need to save live registers; it's taken care of by the
168 // entrypoint. Also, there is no need to update the stack mask,
169 // as this runtime call will not trigger a garbage collection.
170 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
171 // explanations.)
172 DCHECK(!tmp.IsSP());
173 DCHECK(!tmp.IsLR());
174 DCHECK(!tmp.IsPC());
175 // IP is used internally by the ReadBarrierMarkRegX entry point
176 // as a temporary (and not preserved). It thus cannot be used by
177 // any live register in this slow path.
178 DCHECK(!src_curr_addr.Is(ip));
179 DCHECK(!dst_curr_addr.Is(ip));
180 DCHECK(!src_stop_addr.Is(ip));
181 DCHECK(!tmp.Is(ip));
182 DCHECK(tmp.IsRegister()) << tmp;
183 int32_t entry_point_offset =
184 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
185 // This runtime call does not require a stack map.
186 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
187 assembler->MaybePoisonHeapReference(tmp);
188 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
189 __ Cmp(src_curr_addr, src_stop_addr);
Artem Serov517d9f62016-12-12 15:51:15 +0000190 __ B(ne, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100191 __ B(GetExitLabel());
192 }
193
194 const char* GetDescription() const OVERRIDE {
195 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
196 }
197
198 private:
199 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
200};
201
202IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
203 : arena_(codegen->GetGraph()->GetArena()),
204 assembler_(codegen->GetAssembler()),
205 features_(codegen->GetInstructionSetFeatures()) {}
206
207bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
208 Dispatch(invoke);
209 LocationSummary* res = invoke->GetLocations();
210 if (res == nullptr) {
211 return false;
212 }
213 return res->Intrinsified();
214}
215
216static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
217 LocationSummary* locations = new (arena) LocationSummary(invoke,
218 LocationSummary::kNoCall,
219 kIntrinsified);
220 locations->SetInAt(0, Location::RequiresFpuRegister());
221 locations->SetOut(Location::RequiresRegister());
222}
223
224static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
225 LocationSummary* locations = new (arena) LocationSummary(invoke,
226 LocationSummary::kNoCall,
227 kIntrinsified);
228 locations->SetInAt(0, Location::RequiresRegister());
229 locations->SetOut(Location::RequiresFpuRegister());
230}
231
232static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
233 Location input = locations->InAt(0);
234 Location output = locations->Out();
235 if (is64bit) {
236 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
237 } else {
238 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
239 }
240}
241
242static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
243 Location input = locations->InAt(0);
244 Location output = locations->Out();
245 if (is64bit) {
246 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
247 } else {
248 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
249 }
250}
251
252void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
253 CreateFPToIntLocations(arena_, invoke);
254}
255void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
256 CreateIntToFPLocations(arena_, invoke);
257}
258
259void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
260 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
261}
262void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
263 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
264}
265
266void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
267 CreateFPToIntLocations(arena_, invoke);
268}
269void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
270 CreateIntToFPLocations(arena_, invoke);
271}
272
273void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
274 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
275}
276void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
277 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
278}
279
280static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
281 LocationSummary* locations = new (arena) LocationSummary(invoke,
282 LocationSummary::kNoCall,
283 kIntrinsified);
284 locations->SetInAt(0, Location::RequiresRegister());
285 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
286}
287
288static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
289 LocationSummary* locations = new (arena) LocationSummary(invoke,
290 LocationSummary::kNoCall,
291 kIntrinsified);
292 locations->SetInAt(0, Location::RequiresFpuRegister());
293 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
294}
295
296static void GenNumberOfLeadingZeros(LocationSummary* locations,
297 Primitive::Type type,
298 ArmVIXLAssembler* assembler) {
299 Location in = locations->InAt(0);
300 vixl32::Register out = RegisterFrom(locations->Out());
301
302 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
303
304 if (type == Primitive::kPrimLong) {
305 vixl32::Register in_reg_lo = LowRegisterFrom(in);
306 vixl32::Register in_reg_hi = HighRegisterFrom(in);
307 vixl32::Label end;
308 __ Clz(out, in_reg_hi);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000309 __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100310 __ Clz(out, in_reg_lo);
311 __ Add(out, out, 32);
312 __ Bind(&end);
313 } else {
314 __ Clz(out, RegisterFrom(in));
315 }
316}
317
318void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
319 CreateIntToIntLocations(arena_, invoke);
320}
321
322void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
323 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
324}
325
326void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
327 LocationSummary* locations = new (arena_) LocationSummary(invoke,
328 LocationSummary::kNoCall,
329 kIntrinsified);
330 locations->SetInAt(0, Location::RequiresRegister());
331 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
332}
333
334void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
335 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
336}
337
338static void GenNumberOfTrailingZeros(LocationSummary* locations,
339 Primitive::Type type,
340 ArmVIXLAssembler* assembler) {
341 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
342
343 vixl32::Register out = RegisterFrom(locations->Out());
344
345 if (type == Primitive::kPrimLong) {
346 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
347 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
348 vixl32::Label end;
349 __ Rbit(out, in_reg_lo);
350 __ Clz(out, out);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000351 __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100352 __ Rbit(out, in_reg_hi);
353 __ Clz(out, out);
354 __ Add(out, out, 32);
355 __ Bind(&end);
356 } else {
357 vixl32::Register in = RegisterFrom(locations->InAt(0));
358 __ Rbit(out, in);
359 __ Clz(out, out);
360 }
361}
362
363void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
364 LocationSummary* locations = new (arena_) LocationSummary(invoke,
365 LocationSummary::kNoCall,
366 kIntrinsified);
367 locations->SetInAt(0, Location::RequiresRegister());
368 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
369}
370
371void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
372 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
373}
374
375void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
376 LocationSummary* locations = new (arena_) LocationSummary(invoke,
377 LocationSummary::kNoCall,
378 kIntrinsified);
379 locations->SetInAt(0, Location::RequiresRegister());
380 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
381}
382
383void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
384 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
385}
386
387static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
388 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
389}
390
391void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
392 CreateFPToFPLocations(arena_, invoke);
393}
394
395void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
396 MathAbsFP(invoke, GetAssembler());
397}
398
399void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
400 CreateFPToFPLocations(arena_, invoke);
401}
402
403void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
404 MathAbsFP(invoke, GetAssembler());
405}
406
407static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
408 LocationSummary* locations = new (arena) LocationSummary(invoke,
409 LocationSummary::kNoCall,
410 kIntrinsified);
411 locations->SetInAt(0, Location::RequiresRegister());
412 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
413
414 locations->AddTemp(Location::RequiresRegister());
415}
416
417static void GenAbsInteger(LocationSummary* locations,
418 bool is64bit,
419 ArmVIXLAssembler* assembler) {
420 Location in = locations->InAt(0);
421 Location output = locations->Out();
422
423 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
424
425 if (is64bit) {
426 vixl32::Register in_reg_lo = LowRegisterFrom(in);
427 vixl32::Register in_reg_hi = HighRegisterFrom(in);
428 vixl32::Register out_reg_lo = LowRegisterFrom(output);
429 vixl32::Register out_reg_hi = HighRegisterFrom(output);
430
431 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
432
433 __ Asr(mask, in_reg_hi, 31);
434 __ Adds(out_reg_lo, in_reg_lo, mask);
435 __ Adc(out_reg_hi, in_reg_hi, mask);
436 __ Eor(out_reg_lo, mask, out_reg_lo);
437 __ Eor(out_reg_hi, mask, out_reg_hi);
438 } else {
439 vixl32::Register in_reg = RegisterFrom(in);
440 vixl32::Register out_reg = RegisterFrom(output);
441
442 __ Asr(mask, in_reg, 31);
443 __ Add(out_reg, in_reg, mask);
444 __ Eor(out_reg, mask, out_reg);
445 }
446}
447
448void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
449 CreateIntToIntPlusTemp(arena_, invoke);
450}
451
452void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
453 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
454}
455
456
457void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
458 CreateIntToIntPlusTemp(arena_, invoke);
459}
460
461void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
462 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
463}
464
465static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
466 vixl32::Register op1 = InputRegisterAt(invoke, 0);
467 vixl32::Register op2 = InputRegisterAt(invoke, 1);
468 vixl32::Register out = OutputRegister(invoke);
469
470 __ Cmp(op1, op2);
471
472 {
Artem Serov0fb37192016-12-06 18:13:40 +0000473 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
474 3 * kMaxInstructionSizeInBytes,
475 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100476
477 __ ite(is_min ? lt : gt);
478 __ mov(is_min ? lt : gt, out, op1);
479 __ mov(is_min ? ge : le, out, op2);
480 }
481}
482
483static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
484 LocationSummary* locations = new (arena) LocationSummary(invoke,
485 LocationSummary::kNoCall,
486 kIntrinsified);
487 locations->SetInAt(0, Location::RequiresRegister());
488 locations->SetInAt(1, Location::RequiresRegister());
489 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
490}
491
492void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
493 CreateIntIntToIntLocations(arena_, invoke);
494}
495
496void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
497 GenMinMax(invoke, /* is_min */ true, GetAssembler());
498}
499
500void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
501 CreateIntIntToIntLocations(arena_, invoke);
502}
503
504void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
505 GenMinMax(invoke, /* is_min */ false, GetAssembler());
506}
507
508void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
509 CreateFPToFPLocations(arena_, invoke);
510}
511
512void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
513 ArmVIXLAssembler* assembler = GetAssembler();
514 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
515}
516
xueliang.zhong6099d5e2016-04-20 18:44:56 +0100517void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
518 if (features_.HasARMv8AInstructions()) {
519 CreateFPToFPLocations(arena_, invoke);
520 }
521}
522
523void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
524 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
525 ArmVIXLAssembler* assembler = GetAssembler();
526 __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
527}
528
Anton Kirilov5ec62182016-10-13 20:16:02 +0100529void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
530 CreateIntToIntLocations(arena_, invoke);
531}
532
533void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
534 ArmVIXLAssembler* assembler = GetAssembler();
535 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000536 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100537}
538
539void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
540 CreateIntToIntLocations(arena_, invoke);
541}
542
543void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
544 ArmVIXLAssembler* assembler = GetAssembler();
545 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000546 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100547}
548
549void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
550 CreateIntToIntLocations(arena_, invoke);
551}
552
553void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
554 ArmVIXLAssembler* assembler = GetAssembler();
555 // Ignore upper 4B of long address.
556 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
557 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
558 // exception. So we can't use ldrd as addr may be unaligned.
559 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
560 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
561 if (addr.Is(lo)) {
562 __ Ldr(hi, MemOperand(addr, 4));
Scott Wakelingb77051e2016-11-21 19:46:00 +0000563 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100564 } else {
Scott Wakelingb77051e2016-11-21 19:46:00 +0000565 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100566 __ Ldr(hi, MemOperand(addr, 4));
567 }
568}
569
570void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
571 CreateIntToIntLocations(arena_, invoke);
572}
573
574void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
575 ArmVIXLAssembler* assembler = GetAssembler();
576 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000577 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100578}
579
580static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
581 LocationSummary* locations = new (arena) LocationSummary(invoke,
582 LocationSummary::kNoCall,
583 kIntrinsified);
584 locations->SetInAt(0, Location::RequiresRegister());
585 locations->SetInAt(1, Location::RequiresRegister());
586}
587
588void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
589 CreateIntIntToVoidLocations(arena_, invoke);
590}
591
592void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
593 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000594 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100595}
596
597void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
598 CreateIntIntToVoidLocations(arena_, invoke);
599}
600
601void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
602 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000603 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100604}
605
606void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
607 CreateIntIntToVoidLocations(arena_, invoke);
608}
609
610void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
611 ArmVIXLAssembler* assembler = GetAssembler();
612 // Ignore upper 4B of long address.
613 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
614 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
615 // exception. So we can't use ldrd as addr may be unaligned.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000616 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100617 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
618}
619
620void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
621 CreateIntIntToVoidLocations(arena_, invoke);
622}
623
624void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
625 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000626 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100627}
628
629void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
630 LocationSummary* locations = new (arena_) LocationSummary(invoke,
631 LocationSummary::kNoCall,
632 kIntrinsified);
633 locations->SetOut(Location::RequiresRegister());
634}
635
636void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
637 ArmVIXLAssembler* assembler = GetAssembler();
638 __ Ldr(OutputRegister(invoke),
639 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
640}
641
642static void GenUnsafeGet(HInvoke* invoke,
643 Primitive::Type type,
644 bool is_volatile,
645 CodeGeneratorARMVIXL* codegen) {
646 LocationSummary* locations = invoke->GetLocations();
647 ArmVIXLAssembler* assembler = codegen->GetAssembler();
648 Location base_loc = locations->InAt(1);
649 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
650 Location offset_loc = locations->InAt(2);
651 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
652 Location trg_loc = locations->Out();
653
654 switch (type) {
655 case Primitive::kPrimInt: {
656 vixl32::Register trg = RegisterFrom(trg_loc);
657 __ Ldr(trg, MemOperand(base, offset));
658 if (is_volatile) {
659 __ Dmb(vixl32::ISH);
660 }
661 break;
662 }
663
664 case Primitive::kPrimNot: {
665 vixl32::Register trg = RegisterFrom(trg_loc);
666 if (kEmitCompilerReadBarrier) {
667 if (kUseBakerReadBarrier) {
668 Location temp = locations->GetTemp(0);
669 codegen->GenerateReferenceLoadWithBakerReadBarrier(
670 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
671 if (is_volatile) {
672 __ Dmb(vixl32::ISH);
673 }
674 } else {
675 __ Ldr(trg, MemOperand(base, offset));
676 if (is_volatile) {
677 __ Dmb(vixl32::ISH);
678 }
679 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
680 }
681 } else {
682 __ Ldr(trg, MemOperand(base, offset));
683 if (is_volatile) {
684 __ Dmb(vixl32::ISH);
685 }
686 assembler->MaybeUnpoisonHeapReference(trg);
687 }
688 break;
689 }
690
691 case Primitive::kPrimLong: {
692 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
693 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
694 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
Artem Serov657022c2016-11-23 14:19:38 +0000695 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
696 const vixl32::Register temp_reg = temps.Acquire();
697 __ Add(temp_reg, base, offset);
698 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100699 } else {
700 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
701 }
702 if (is_volatile) {
703 __ Dmb(vixl32::ISH);
704 }
705 break;
706 }
707
708 default:
709 LOG(FATAL) << "Unexpected type " << type;
710 UNREACHABLE();
711 }
712}
713
714static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
715 HInvoke* invoke,
716 Primitive::Type type) {
717 bool can_call = kEmitCompilerReadBarrier &&
718 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
719 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
720 LocationSummary* locations = new (arena) LocationSummary(invoke,
721 (can_call
722 ? LocationSummary::kCallOnSlowPath
723 : LocationSummary::kNoCall),
724 kIntrinsified);
725 if (can_call && kUseBakerReadBarrier) {
726 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
727 }
728 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
729 locations->SetInAt(1, Location::RequiresRegister());
730 locations->SetInAt(2, Location::RequiresRegister());
731 locations->SetOut(Location::RequiresRegister(),
732 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
733 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
734 // We need a temporary register for the read barrier marking slow
735 // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
736 locations->AddTemp(Location::RequiresRegister());
737 }
738}
739
740void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
741 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
742}
743void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
744 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
745}
746void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
747 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
748}
749void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
750 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
751}
752void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
753 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
754}
755void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
756 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
757}
758
759void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
760 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
761}
762void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
763 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
764}
765void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
766 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
767}
768void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
769 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
770}
771void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
772 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
773}
774void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
775 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
776}
777
778static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
779 const ArmInstructionSetFeatures& features,
780 Primitive::Type type,
781 bool is_volatile,
782 HInvoke* invoke) {
783 LocationSummary* locations = new (arena) LocationSummary(invoke,
784 LocationSummary::kNoCall,
785 kIntrinsified);
786 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
787 locations->SetInAt(1, Location::RequiresRegister());
788 locations->SetInAt(2, Location::RequiresRegister());
789 locations->SetInAt(3, Location::RequiresRegister());
790
791 if (type == Primitive::kPrimLong) {
792 // Potentially need temps for ldrexd-strexd loop.
793 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
794 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
795 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
796 }
797 } else if (type == Primitive::kPrimNot) {
798 // Temps for card-marking.
799 locations->AddTemp(Location::RequiresRegister()); // Temp.
800 locations->AddTemp(Location::RequiresRegister()); // Card.
801 }
802}
803
804void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
805 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
806}
807void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
808 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
809}
810void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
811 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
812}
813void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
814 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
815}
816void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
817 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
818}
819void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
820 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
821}
822void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
823 CreateIntIntIntIntToVoid(
824 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
825}
826void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
827 CreateIntIntIntIntToVoid(
828 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
829}
830void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
831 CreateIntIntIntIntToVoid(
832 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
833}
834
835static void GenUnsafePut(LocationSummary* locations,
836 Primitive::Type type,
837 bool is_volatile,
838 bool is_ordered,
839 CodeGeneratorARMVIXL* codegen) {
840 ArmVIXLAssembler* assembler = codegen->GetAssembler();
841
842 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
843 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
844 vixl32::Register value;
845
846 if (is_volatile || is_ordered) {
847 __ Dmb(vixl32::ISH);
848 }
849
850 if (type == Primitive::kPrimLong) {
851 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
852 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
853 value = value_lo;
854 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
855 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
856 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
857 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
858 const vixl32::Register temp_reg = temps.Acquire();
859
860 __ Add(temp_reg, base, offset);
861 vixl32::Label loop_head;
862 __ Bind(&loop_head);
Scott Wakelingb77051e2016-11-21 19:46:00 +0000863 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
864 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100865 __ Cmp(temp_lo, 0);
Artem Serov517d9f62016-12-12 15:51:15 +0000866 __ B(ne, &loop_head, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100867 } else {
868 __ Strd(value_lo, value_hi, MemOperand(base, offset));
869 }
870 } else {
871 value = RegisterFrom(locations->InAt(3));
872 vixl32::Register source = value;
873 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
874 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
875 __ Mov(temp, value);
876 assembler->PoisonHeapReference(temp);
877 source = temp;
878 }
879 __ Str(source, MemOperand(base, offset));
880 }
881
882 if (is_volatile) {
883 __ Dmb(vixl32::ISH);
884 }
885
886 if (type == Primitive::kPrimNot) {
887 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
888 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
889 bool value_can_be_null = true; // TODO: Worth finding out this information?
890 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
891 }
892}
893
894void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
895 GenUnsafePut(invoke->GetLocations(),
896 Primitive::kPrimInt,
897 /* is_volatile */ false,
898 /* is_ordered */ false,
899 codegen_);
900}
901void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
902 GenUnsafePut(invoke->GetLocations(),
903 Primitive::kPrimInt,
904 /* is_volatile */ false,
905 /* is_ordered */ true,
906 codegen_);
907}
908void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
909 GenUnsafePut(invoke->GetLocations(),
910 Primitive::kPrimInt,
911 /* is_volatile */ true,
912 /* is_ordered */ false,
913 codegen_);
914}
915void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
916 GenUnsafePut(invoke->GetLocations(),
917 Primitive::kPrimNot,
918 /* is_volatile */ false,
919 /* is_ordered */ false,
920 codegen_);
921}
922void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
923 GenUnsafePut(invoke->GetLocations(),
924 Primitive::kPrimNot,
925 /* is_volatile */ false,
926 /* is_ordered */ true,
927 codegen_);
928}
929void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
930 GenUnsafePut(invoke->GetLocations(),
931 Primitive::kPrimNot,
932 /* is_volatile */ true,
933 /* is_ordered */ false,
934 codegen_);
935}
936void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
937 GenUnsafePut(invoke->GetLocations(),
938 Primitive::kPrimLong,
939 /* is_volatile */ false,
940 /* is_ordered */ false,
941 codegen_);
942}
943void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
944 GenUnsafePut(invoke->GetLocations(),
945 Primitive::kPrimLong,
946 /* is_volatile */ false,
947 /* is_ordered */ true,
948 codegen_);
949}
950void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
951 GenUnsafePut(invoke->GetLocations(),
952 Primitive::kPrimLong,
953 /* is_volatile */ true,
954 /* is_ordered */ false,
955 codegen_);
956}
957
958static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
959 HInvoke* invoke,
960 Primitive::Type type) {
961 bool can_call = kEmitCompilerReadBarrier &&
962 kUseBakerReadBarrier &&
963 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
964 LocationSummary* locations = new (arena) LocationSummary(invoke,
965 (can_call
966 ? LocationSummary::kCallOnSlowPath
967 : LocationSummary::kNoCall),
968 kIntrinsified);
969 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
970 locations->SetInAt(1, Location::RequiresRegister());
971 locations->SetInAt(2, Location::RequiresRegister());
972 locations->SetInAt(3, Location::RequiresRegister());
973 locations->SetInAt(4, Location::RequiresRegister());
974
975 // If heap poisoning is enabled, we don't want the unpoisoning
976 // operations to potentially clobber the output. Likewise when
977 // emitting a (Baker) read barrier, which may call.
978 Location::OutputOverlap overlaps =
979 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
980 ? Location::kOutputOverlap
981 : Location::kNoOutputOverlap;
982 locations->SetOut(Location::RequiresRegister(), overlaps);
983
984 // Temporary registers used in CAS. In the object case
985 // (UnsafeCASObject intrinsic), these are also used for
986 // card-marking, and possibly for (Baker) read barrier.
987 locations->AddTemp(Location::RequiresRegister()); // Pointer.
988 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
989}
990
991static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
992 DCHECK_NE(type, Primitive::kPrimLong);
993
994 ArmVIXLAssembler* assembler = codegen->GetAssembler();
995 LocationSummary* locations = invoke->GetLocations();
996
997 Location out_loc = locations->Out();
998 vixl32::Register out = OutputRegister(invoke); // Boolean result.
999
1000 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
1001 Location offset_loc = locations->InAt(2);
1002 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
1003 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
1004 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
1005
1006 Location tmp_ptr_loc = locations->GetTemp(0);
1007 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
1008 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
1009
1010 if (type == Primitive::kPrimNot) {
1011 // The only read barrier implementation supporting the
1012 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1013 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1014
1015 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1016 // object and scan the receiver at the next GC for nothing.
1017 bool value_can_be_null = true; // TODO: Worth finding out this information?
1018 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1019
1020 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1021 // Need to make sure the reference stored in the field is a to-space
1022 // one before attempting the CAS or the CAS could fail incorrectly.
1023 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1024 invoke,
1025 out_loc, // Unused, used only as a "temporary" within the read barrier.
1026 base,
1027 /* offset */ 0u,
1028 /* index */ offset_loc,
1029 ScaleFactor::TIMES_1,
1030 tmp_ptr_loc,
1031 /* needs_null_check */ false,
1032 /* always_update_field */ true,
1033 &tmp);
1034 }
1035 }
1036
1037 // Prevent reordering with prior memory operations.
1038 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1039 // latter allows a preceding load to be delayed past the STXR
1040 // instruction below.
1041 __ Dmb(vixl32::ISH);
1042
1043 __ Add(tmp_ptr, base, offset);
1044
1045 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1046 codegen->GetAssembler()->PoisonHeapReference(expected);
1047 if (value.Is(expected)) {
1048 // Do not poison `value`, as it is the same register as
1049 // `expected`, which has just been poisoned.
1050 } else {
1051 codegen->GetAssembler()->PoisonHeapReference(value);
1052 }
1053 }
1054
1055 // do {
1056 // tmp = [r_ptr] - expected;
1057 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1058 // result = tmp != 0;
1059
1060 vixl32::Label loop_head;
1061 __ Bind(&loop_head);
1062
Scott Wakelingb77051e2016-11-21 19:46:00 +00001063 __ Ldrex(tmp, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001064
1065 __ Subs(tmp, tmp, expected);
1066
1067 {
Artem Serov0fb37192016-12-06 18:13:40 +00001068 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1069 3 * kMaxInstructionSizeInBytes,
1070 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001071
1072 __ itt(eq);
Scott Wakelingb77051e2016-11-21 19:46:00 +00001073 __ strex(eq, tmp, value, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001074 __ cmp(eq, tmp, 1);
1075 }
1076
Artem Serov517d9f62016-12-12 15:51:15 +00001077 __ B(eq, &loop_head, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001078
1079 __ Dmb(vixl32::ISH);
1080
1081 __ Rsbs(out, tmp, 1);
1082
1083 {
Artem Serov0fb37192016-12-06 18:13:40 +00001084 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1085 2 * kMaxInstructionSizeInBytes,
1086 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001087
1088 __ it(cc);
1089 __ mov(cc, out, 0);
1090 }
1091
1092 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1093 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1094 if (value.Is(expected)) {
1095 // Do not unpoison `value`, as it is the same register as
1096 // `expected`, which has just been unpoisoned.
1097 } else {
1098 codegen->GetAssembler()->UnpoisonHeapReference(value);
1099 }
1100 }
1101}
1102
1103void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1104 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1105}
1106void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1107 // The only read barrier implementation supporting the
1108 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1109 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1110 return;
1111 }
1112
1113 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1114}
1115void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1116 GenCas(invoke, Primitive::kPrimInt, codegen_);
1117}
1118void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1119 // The only read barrier implementation supporting the
1120 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1121 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1122
1123 GenCas(invoke, Primitive::kPrimNot, codegen_);
1124}
1125
1126void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1127 // The inputs plus one temp.
1128 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1129 invoke->InputAt(1)->CanBeNull()
1130 ? LocationSummary::kCallOnSlowPath
1131 : LocationSummary::kNoCall,
1132 kIntrinsified);
1133 locations->SetInAt(0, Location::RequiresRegister());
1134 locations->SetInAt(1, Location::RequiresRegister());
1135 locations->AddTemp(Location::RequiresRegister());
1136 locations->AddTemp(Location::RequiresRegister());
1137 locations->AddTemp(Location::RequiresRegister());
1138 // Need temporary registers for String compression's feature.
1139 if (mirror::kUseStringCompression) {
1140 locations->AddTemp(Location::RequiresRegister());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001141 }
1142 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1143}
1144
1145void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1146 ArmVIXLAssembler* assembler = GetAssembler();
1147 LocationSummary* locations = invoke->GetLocations();
1148
1149 vixl32::Register str = InputRegisterAt(invoke, 0);
1150 vixl32::Register arg = InputRegisterAt(invoke, 1);
1151 vixl32::Register out = OutputRegister(invoke);
1152
1153 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1154 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1155 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001156 vixl32::Register temp3;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001157 if (mirror::kUseStringCompression) {
1158 temp3 = RegisterFrom(locations->GetTemp(3));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001159 }
1160
1161 vixl32::Label loop;
1162 vixl32::Label find_char_diff;
1163 vixl32::Label end;
1164 vixl32::Label different_compression;
1165
1166 // Get offsets of count and value fields within a string object.
1167 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1168 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1169
1170 // Note that the null check must have been done earlier.
1171 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1172
1173 // Take slow path and throw if input can be and is null.
1174 SlowPathCodeARMVIXL* slow_path = nullptr;
1175 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1176 if (can_slow_path) {
1177 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1178 codegen_->AddSlowPath(slow_path);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001179 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001180 }
1181
1182 // Reference equality check, return 0 if same reference.
1183 __ Subs(out, str, arg);
1184 __ B(eq, &end);
1185
Anton Kirilov5ec62182016-10-13 20:16:02 +01001186 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001187 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001188 __ Ldr(temp3, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001189 __ Ldr(temp2, MemOperand(arg, count_offset));
1190 // Extract lengths from the `count` fields.
1191 __ Lsr(temp0, temp3, 1u);
1192 __ Lsr(temp1, temp2, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001193 } else {
1194 // Load lengths of this and argument strings.
1195 __ Ldr(temp0, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001196 __ Ldr(temp1, MemOperand(arg, count_offset));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001197 }
1198 // out = length diff.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001199 __ Subs(out, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001200 // temp0 = min(len(str), len(arg)).
1201
1202 {
Artem Serov0fb37192016-12-06 18:13:40 +00001203 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1204 2 * kMaxInstructionSizeInBytes,
1205 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001206
1207 __ it(gt);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001208 __ mov(gt, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001209 }
1210
Anton Kirilov5ec62182016-10-13 20:16:02 +01001211 // Shorter string is empty?
xueliang.zhongf51bc622016-11-04 09:23:32 +00001212 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1213 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1214 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001215
1216 if (mirror::kUseStringCompression) {
1217 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001218 __ Eors(temp2, temp2, temp3);
1219 __ Lsrs(temp2, temp2, 1u);
1220 __ B(cs, &different_compression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001221 // For string compression, calculate the number of bytes to compare (not chars).
1222 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001223 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001224
Artem Serov0fb37192016-12-06 18:13:40 +00001225 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1226 2 * kMaxInstructionSizeInBytes,
1227 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001228
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001229 __ it(ne);
1230 __ add(ne, temp0, temp0, temp0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001231 }
1232
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001233 // Store offset of string value in preparation for comparison loop.
1234 __ Mov(temp1, value_offset);
1235
Anton Kirilov5ec62182016-10-13 20:16:02 +01001236 // Assertions that must hold in order to compare multiple characters at a time.
1237 CHECK_ALIGNED(value_offset, 8);
1238 static_assert(IsAligned<8>(kObjectAlignment),
1239 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1240
Scott Wakelingb77051e2016-11-21 19:46:00 +00001241 const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001242 DCHECK_EQ(char_size, 2u);
1243
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001244 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1245
Anton Kirilov5ec62182016-10-13 20:16:02 +01001246 vixl32::Label find_char_diff_2nd_cmp;
1247 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1248 __ Bind(&loop);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001249 vixl32::Register temp_reg = temps.Acquire();
Anton Kirilov5ec62182016-10-13 20:16:02 +01001250 __ Ldr(temp_reg, MemOperand(str, temp1));
1251 __ Ldr(temp2, MemOperand(arg, temp1));
1252 __ Cmp(temp_reg, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001253 __ B(ne, &find_char_diff, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001254 __ Add(temp1, temp1, char_size * 2);
1255
1256 __ Ldr(temp_reg, MemOperand(str, temp1));
1257 __ Ldr(temp2, MemOperand(arg, temp1));
1258 __ Cmp(temp_reg, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001259 __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001260 __ Add(temp1, temp1, char_size * 2);
1261 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1262 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
Artem Serov517d9f62016-12-12 15:51:15 +00001263 __ B(hi, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001264 __ B(&end);
1265
1266 __ Bind(&find_char_diff_2nd_cmp);
1267 if (mirror::kUseStringCompression) {
1268 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
Artem Serov517d9f62016-12-12 15:51:15 +00001269 __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end?
Anton Kirilov5ec62182016-10-13 20:16:02 +01001270 } else {
1271 // Without string compression, we can start treating temp0 as signed
1272 // and rely on the signed comparison below.
1273 __ Sub(temp0, temp0, 2);
1274 }
1275
1276 // Find the single character difference.
1277 __ Bind(&find_char_diff);
1278 // Get the bit position of the first character that differs.
1279 __ Eor(temp1, temp2, temp_reg);
1280 __ Rbit(temp1, temp1);
1281 __ Clz(temp1, temp1);
1282
1283 // temp0 = number of characters remaining to compare.
1284 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1285 // in the comparison loop, and after the end of the shorter string data).
1286
1287 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1288 // two words compared, in the interval [0,1].
1289 // (0 for low half-word different, 1 for high half-word different).
1290 // With string compression, (temp1 << 3) = byte where the difference occurs,
1291 // in the interval [0,3].
1292
1293 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1294 // the remaining string data, so just return length diff (out).
1295 // The comparison is unsigned for string compression, otherwise signed.
1296 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
Artem Serov517d9f62016-12-12 15:51:15 +00001297 __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001298
Anton Kirilov5ec62182016-10-13 20:16:02 +01001299 // Extract the characters and calculate the difference.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001300 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001301 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1302 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1303 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001304 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1305 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001306 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1307 __ Lsr(temp2, temp2, temp1); // Extract second character.
1308 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1309 __ Lsr(out, temp_reg, temp1); // Extract first character.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001310 __ And(temp2, temp2, temp3);
1311 __ And(out, out, temp3);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001312 } else {
Anton Kirilovb88c4842016-11-14 14:37:00 +00001313 __ Bic(temp1, temp1, 0xf);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001314 __ Lsr(temp2, temp2, temp1);
1315 __ Lsr(out, temp_reg, temp1);
Anton Kirilovb88c4842016-11-14 14:37:00 +00001316 __ Movt(temp2, 0);
1317 __ Movt(out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001318 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001319
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001320 __ Sub(out, out, temp2);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001321 temps.Release(temp_reg);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001322
1323 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001324 __ B(&end);
1325 __ Bind(&different_compression);
1326
1327 // Comparison for different compression style.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001328 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1329 DCHECK_EQ(c_char_size, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001330
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001331 // We want to free up the temp3, currently holding `str.count`, for comparison.
1332 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1333 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1334 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1335 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001336 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001337 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001338 __ Mov(temp1, str);
1339 __ Mov(temp2, arg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001340 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1341 {
Artem Serov0fb37192016-12-06 18:13:40 +00001342 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1343 3 * kMaxInstructionSizeInBytes,
1344 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001345 __ itt(cs); // Interleave with selection of temp1 and temp2.
1346 __ mov(cs, temp1, arg); // Preserves flags.
1347 __ mov(cs, temp2, str); // Preserves flags.
1348 }
Anton Kirilovb88c4842016-11-14 14:37:00 +00001349 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001350
1351 // Adjust temp1 and temp2 from string pointers to data pointers.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001352 __ Add(temp1, temp1, value_offset);
1353 __ Add(temp2, temp2, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001354
1355 vixl32::Label different_compression_loop;
1356 vixl32::Label different_compression_diff;
1357
1358 // Main loop for different compression.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001359 temp_reg = temps.Acquire();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001360 __ Bind(&different_compression_loop);
1361 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1362 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
Anton Kirilovb88c4842016-11-14 14:37:00 +00001363 __ Cmp(temp_reg, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00001364 __ B(ne, &different_compression_diff, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001365 __ Subs(temp0, temp0, 2);
Artem Serov517d9f62016-12-12 15:51:15 +00001366 __ B(hi, &different_compression_loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001367 __ B(&end);
1368
1369 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001370 __ Bind(&different_compression_diff);
1371 __ Sub(out, temp_reg, temp3);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001372 temps.Release(temp_reg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001373 // Flip the difference if the `arg` is compressed.
1374 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1375 __ Lsrs(temp0, temp0, 1u);
1376 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1377 "Expecting 0=compressed, 1=uncompressed");
1378
Artem Serov0fb37192016-12-06 18:13:40 +00001379 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1380 2 * kMaxInstructionSizeInBytes,
1381 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001382 __ it(cc);
1383 __ rsb(cc, out, out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001384 }
1385
1386 __ Bind(&end);
1387
1388 if (can_slow_path) {
1389 __ Bind(slow_path->GetExitLabel());
1390 }
1391}
1392
1393void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1394 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1395 LocationSummary::kNoCall,
1396 kIntrinsified);
1397 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1398 locations->SetInAt(0, Location::RequiresRegister());
1399 locations->SetInAt(1, Location::RequiresRegister());
1400 // Temporary registers to store lengths of strings and for calculations.
1401 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1402 locations->AddTemp(LocationFrom(r0));
1403 locations->AddTemp(Location::RequiresRegister());
1404 locations->AddTemp(Location::RequiresRegister());
1405
1406 locations->SetOut(Location::RequiresRegister());
1407}
1408
1409void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1410 ArmVIXLAssembler* assembler = GetAssembler();
1411 LocationSummary* locations = invoke->GetLocations();
1412
1413 vixl32::Register str = InputRegisterAt(invoke, 0);
1414 vixl32::Register arg = InputRegisterAt(invoke, 1);
1415 vixl32::Register out = OutputRegister(invoke);
1416
1417 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1418 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1419 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1420
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001421 vixl32::Label loop;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001422 vixl32::Label end;
1423 vixl32::Label return_true;
1424 vixl32::Label return_false;
1425
1426 // Get offsets of count, value, and class fields within a string object.
1427 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1428 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1429 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1430
1431 // Note that the null check must have been done earlier.
1432 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1433
1434 StringEqualsOptimizations optimizations(invoke);
1435 if (!optimizations.GetArgumentNotNull()) {
1436 // Check if input is null, return false if it is.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001437 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001438 }
1439
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001440 // Reference equality check, return true if same reference.
1441 __ Cmp(str, arg);
Artem Serov517d9f62016-12-12 15:51:15 +00001442 __ B(eq, &return_true, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001443
Anton Kirilov5ec62182016-10-13 20:16:02 +01001444 if (!optimizations.GetArgumentIsString()) {
1445 // Instanceof check for the argument by comparing class fields.
1446 // All string objects must have the same type since String cannot be subclassed.
1447 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1448 // If the argument is a string object, its class field must be equal to receiver's class field.
1449 __ Ldr(temp, MemOperand(str, class_offset));
1450 __ Ldr(temp1, MemOperand(arg, class_offset));
1451 __ Cmp(temp, temp1);
Artem Serov517d9f62016-12-12 15:51:15 +00001452 __ B(ne, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001453 }
1454
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001455 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001456 __ Ldr(temp, MemOperand(str, count_offset));
1457 __ Ldr(temp1, MemOperand(arg, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001458 // Check if `count` fields are equal, return false if they're not.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001459 // Also compares the compression style, if differs return false.
1460 __ Cmp(temp, temp1);
Artem Serov517d9f62016-12-12 15:51:15 +00001461 __ B(ne, &return_false, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001462 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1463 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1464 "Expecting 0=compressed, 1=uncompressed");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001465 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001466
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001467 // Assertions that must hold in order to compare strings 4 bytes at a time.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001468 DCHECK_ALIGNED(value_offset, 4);
1469 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1470
1471 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001472 // For string compression, calculate the number of bytes to compare (not chars).
1473 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1474 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
Artem Serov0fb37192016-12-06 18:13:40 +00001475 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1476 2 * kMaxInstructionSizeInBytes,
1477 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001478 __ it(cs); // If uncompressed,
1479 __ add(cs, temp, temp, temp); // double the byte count.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001480 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001481
1482 // Store offset of string value in preparation for comparison loop.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001483 __ Mov(temp1, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001484
1485 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1486 // Ok to do this because strings are zero-padded to kObjectAlignment.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001487 __ Bind(&loop);
1488 __ Ldr(out, MemOperand(str, temp1));
1489 __ Ldr(temp2, MemOperand(arg, temp1));
Scott Wakelingb77051e2016-11-21 19:46:00 +00001490 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001491 __ Cmp(out, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001492 __ B(ne, &return_false, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001493 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1494 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
Artem Serov517d9f62016-12-12 15:51:15 +00001495 __ B(hi, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001496
1497 // Return true and exit the function.
1498 // If loop does not result in returning false, we return true.
1499 __ Bind(&return_true);
1500 __ Mov(out, 1);
1501 __ B(&end);
1502
1503 // Return false and exit the function.
1504 __ Bind(&return_false);
1505 __ Mov(out, 0);
1506 __ Bind(&end);
1507}
1508
1509static void GenerateVisitStringIndexOf(HInvoke* invoke,
1510 ArmVIXLAssembler* assembler,
1511 CodeGeneratorARMVIXL* codegen,
1512 ArenaAllocator* allocator,
1513 bool start_at_zero) {
1514 LocationSummary* locations = invoke->GetLocations();
1515
1516 // Note that the null check must have been done earlier.
1517 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1518
1519 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1520 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1521 SlowPathCodeARMVIXL* slow_path = nullptr;
1522 HInstruction* code_point = invoke->InputAt(1);
1523 if (code_point->IsIntConstant()) {
Anton Kirilov644032c2016-12-06 17:51:43 +00001524 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
Anton Kirilov5ec62182016-10-13 20:16:02 +01001525 std::numeric_limits<uint16_t>::max()) {
1526 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1527 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1528 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1529 codegen->AddSlowPath(slow_path);
1530 __ B(slow_path->GetEntryLabel());
1531 __ Bind(slow_path->GetExitLabel());
1532 return;
1533 }
1534 } else if (code_point->GetType() != Primitive::kPrimChar) {
1535 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1536 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1537 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1538 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1539 codegen->AddSlowPath(slow_path);
1540 __ B(hs, slow_path->GetEntryLabel());
1541 }
1542
1543 if (start_at_zero) {
1544 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1545 DCHECK(tmp_reg.Is(r2));
1546 // Start-index = 0.
1547 __ Mov(tmp_reg, 0);
1548 }
1549
1550 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1551 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1552
1553 if (slow_path != nullptr) {
1554 __ Bind(slow_path->GetExitLabel());
1555 }
1556}
1557
1558void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1559 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1560 LocationSummary::kCallOnMainAndSlowPath,
1561 kIntrinsified);
1562 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1563 // best to align the inputs accordingly.
1564 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1565 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1566 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1567 locations->SetOut(LocationFrom(r0));
1568
1569 // Need to send start-index=0.
1570 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1571}
1572
1573void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1574 GenerateVisitStringIndexOf(
1575 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1576}
1577
1578void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1579 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1580 LocationSummary::kCallOnMainAndSlowPath,
1581 kIntrinsified);
1582 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1583 // best to align the inputs accordingly.
1584 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1585 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1586 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1587 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1588 locations->SetOut(LocationFrom(r0));
1589}
1590
1591void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1592 GenerateVisitStringIndexOf(
1593 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1594}
1595
1596void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1597 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1598 LocationSummary::kCallOnMainAndSlowPath,
1599 kIntrinsified);
1600 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1601 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1602 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1603 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1604 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1605 locations->SetOut(LocationFrom(r0));
1606}
1607
1608void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1609 ArmVIXLAssembler* assembler = GetAssembler();
1610 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1611 __ Cmp(byte_array, 0);
1612 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1613 codegen_->AddSlowPath(slow_path);
1614 __ B(eq, slow_path->GetEntryLabel());
1615
1616 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1617 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1618 __ Bind(slow_path->GetExitLabel());
1619}
1620
1621void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1622 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1623 LocationSummary::kCallOnMainOnly,
1624 kIntrinsified);
1625 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1626 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1627 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1628 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1629 locations->SetOut(LocationFrom(r0));
1630}
1631
1632void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1633 // No need to emit code checking whether `locations->InAt(2)` is a null
1634 // pointer, as callers of the native method
1635 //
1636 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1637 //
1638 // all include a null check on `data` before calling that method.
1639 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1640 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1641}
1642
1643void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1644 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1645 LocationSummary::kCallOnMainAndSlowPath,
1646 kIntrinsified);
1647 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1648 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1649 locations->SetOut(LocationFrom(r0));
1650}
1651
1652void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1653 ArmVIXLAssembler* assembler = GetAssembler();
1654 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1655 __ Cmp(string_to_copy, 0);
1656 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1657 codegen_->AddSlowPath(slow_path);
1658 __ B(eq, slow_path->GetEntryLabel());
1659
1660 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1661 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1662
1663 __ Bind(slow_path->GetExitLabel());
1664}
1665
1666void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1667 // The only read barrier implementation supporting the
1668 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1669 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1670 return;
1671 }
1672
1673 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1674 LocationSummary* locations = invoke->GetLocations();
1675 if (locations == nullptr) {
1676 return;
1677 }
1678
1679 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1680 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1681 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1682
1683 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1684 locations->SetInAt(1, Location::RequiresRegister());
1685 }
1686 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1687 locations->SetInAt(3, Location::RequiresRegister());
1688 }
1689 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1690 locations->SetInAt(4, Location::RequiresRegister());
1691 }
1692 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1693 // Temporary register IP cannot be used in
1694 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1695 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1696 // temporary register from the register allocator.
1697 locations->AddTemp(Location::RequiresRegister());
1698 }
1699}
1700
1701static void CheckPosition(ArmVIXLAssembler* assembler,
1702 Location pos,
1703 vixl32::Register input,
1704 Location length,
1705 SlowPathCodeARMVIXL* slow_path,
1706 vixl32::Register temp,
1707 bool length_is_input_length = false) {
1708 // Where is the length in the Array?
1709 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1710
1711 if (pos.IsConstant()) {
1712 int32_t pos_const = Int32ConstantFrom(pos);
1713 if (pos_const == 0) {
1714 if (!length_is_input_length) {
1715 // Check that length(input) >= length.
1716 __ Ldr(temp, MemOperand(input, length_offset));
1717 if (length.IsConstant()) {
1718 __ Cmp(temp, Int32ConstantFrom(length));
1719 } else {
1720 __ Cmp(temp, RegisterFrom(length));
1721 }
1722 __ B(lt, slow_path->GetEntryLabel());
1723 }
1724 } else {
1725 // Check that length(input) >= pos.
1726 __ Ldr(temp, MemOperand(input, length_offset));
1727 __ Subs(temp, temp, pos_const);
1728 __ B(lt, slow_path->GetEntryLabel());
1729
1730 // Check that (length(input) - pos) >= length.
1731 if (length.IsConstant()) {
1732 __ Cmp(temp, Int32ConstantFrom(length));
1733 } else {
1734 __ Cmp(temp, RegisterFrom(length));
1735 }
1736 __ B(lt, slow_path->GetEntryLabel());
1737 }
1738 } else if (length_is_input_length) {
1739 // The only way the copy can succeed is if pos is zero.
1740 vixl32::Register pos_reg = RegisterFrom(pos);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001741 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001742 } else {
1743 // Check that pos >= 0.
1744 vixl32::Register pos_reg = RegisterFrom(pos);
1745 __ Cmp(pos_reg, 0);
1746 __ B(lt, slow_path->GetEntryLabel());
1747
1748 // Check that pos <= length(input).
1749 __ Ldr(temp, MemOperand(input, length_offset));
1750 __ Subs(temp, temp, pos_reg);
1751 __ B(lt, slow_path->GetEntryLabel());
1752
1753 // Check that (length(input) - pos) >= length.
1754 if (length.IsConstant()) {
1755 __ Cmp(temp, Int32ConstantFrom(length));
1756 } else {
1757 __ Cmp(temp, RegisterFrom(length));
1758 }
1759 __ B(lt, slow_path->GetEntryLabel());
1760 }
1761}
1762
1763void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1764 // The only read barrier implementation supporting the
1765 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1766 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1767
1768 ArmVIXLAssembler* assembler = GetAssembler();
1769 LocationSummary* locations = invoke->GetLocations();
1770
1771 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1772 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1773 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1774 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1775 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1776
1777 vixl32::Register src = InputRegisterAt(invoke, 0);
1778 Location src_pos = locations->InAt(1);
1779 vixl32::Register dest = InputRegisterAt(invoke, 2);
1780 Location dest_pos = locations->InAt(3);
1781 Location length = locations->InAt(4);
1782 Location temp1_loc = locations->GetTemp(0);
1783 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1784 Location temp2_loc = locations->GetTemp(1);
1785 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1786 Location temp3_loc = locations->GetTemp(2);
1787 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1788
1789 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1790 codegen_->AddSlowPath(intrinsic_slow_path);
1791
1792 vixl32::Label conditions_on_positions_validated;
1793 SystemArrayCopyOptimizations optimizations(invoke);
1794
1795 // If source and destination are the same, we go to slow path if we need to do
1796 // forward copying.
1797 if (src_pos.IsConstant()) {
1798 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1799 if (dest_pos.IsConstant()) {
1800 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1801 if (optimizations.GetDestinationIsSource()) {
1802 // Checked when building locations.
1803 DCHECK_GE(src_pos_constant, dest_pos_constant);
1804 } else if (src_pos_constant < dest_pos_constant) {
1805 __ Cmp(src, dest);
1806 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1807 }
1808
1809 // Checked when building locations.
1810 DCHECK(!optimizations.GetDestinationIsSource()
1811 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1812 } else {
1813 if (!optimizations.GetDestinationIsSource()) {
1814 __ Cmp(src, dest);
Artem Serov517d9f62016-12-12 15:51:15 +00001815 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001816 }
1817 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1818 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1819 }
1820 } else {
1821 if (!optimizations.GetDestinationIsSource()) {
1822 __ Cmp(src, dest);
Artem Serov517d9f62016-12-12 15:51:15 +00001823 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001824 }
1825 if (dest_pos.IsConstant()) {
1826 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1827 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1828 } else {
1829 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1830 }
1831 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1832 }
1833
1834 __ Bind(&conditions_on_positions_validated);
1835
1836 if (!optimizations.GetSourceIsNotNull()) {
1837 // Bail out if the source is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001838 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001839 }
1840
1841 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1842 // Bail out if the destination is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001843 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001844 }
1845
1846 // If the length is negative, bail out.
1847 // We have already checked in the LocationsBuilder for the constant case.
1848 if (!length.IsConstant() &&
1849 !optimizations.GetCountIsSourceLength() &&
1850 !optimizations.GetCountIsDestinationLength()) {
1851 __ Cmp(RegisterFrom(length), 0);
1852 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1853 }
1854
1855 // Validity checks: source.
1856 CheckPosition(assembler,
1857 src_pos,
1858 src,
1859 length,
1860 intrinsic_slow_path,
1861 temp1,
1862 optimizations.GetCountIsSourceLength());
1863
1864 // Validity checks: dest.
1865 CheckPosition(assembler,
1866 dest_pos,
1867 dest,
1868 length,
1869 intrinsic_slow_path,
1870 temp1,
1871 optimizations.GetCountIsDestinationLength());
1872
1873 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1874 // Check whether all elements of the source array are assignable to the component
1875 // type of the destination array. We do two checks: the classes are the same,
1876 // or the destination is Object[]. If none of these checks succeed, we go to the
1877 // slow path.
1878
1879 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1880 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1881 // /* HeapReference<Class> */ temp1 = src->klass_
1882 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1883 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1884 // Bail out if the source is not a non primitive array.
1885 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1886 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1887 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001888 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001889 // If heap poisoning is enabled, `temp1` has been unpoisoned
1890 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1891 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1892 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1893 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001894 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001895 }
1896
1897 // /* HeapReference<Class> */ temp1 = dest->klass_
1898 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1899 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
1900
1901 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1902 // Bail out if the destination is not a non primitive array.
1903 //
1904 // Register `temp1` is not trashed by the read barrier emitted
1905 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1906 // method produces a call to a ReadBarrierMarkRegX entry point,
1907 // which saves all potentially live registers, including
1908 // temporaries such a `temp1`.
1909 // /* HeapReference<Class> */ temp2 = temp1->component_type_
1910 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1911 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001912 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001913 // If heap poisoning is enabled, `temp2` has been unpoisoned
1914 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1915 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1916 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1917 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001918 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001919 }
1920
1921 // For the same reason given earlier, `temp1` is not trashed by the
1922 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1923 // /* HeapReference<Class> */ temp2 = src->klass_
1924 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1925 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
1926 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1927 __ Cmp(temp1, temp2);
1928
1929 if (optimizations.GetDestinationIsTypedObjectArray()) {
1930 vixl32::Label do_copy;
Artem Serov517d9f62016-12-12 15:51:15 +00001931 __ B(eq, &do_copy, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001932 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1933 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1934 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1935 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1936 // We do not need to emit a read barrier for the following
1937 // heap reference load, as `temp1` is only used in a
1938 // comparison with null below, and this reference is not
1939 // kept afterwards.
1940 __ Ldr(temp1, MemOperand(temp1, super_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001941 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001942 __ Bind(&do_copy);
1943 } else {
1944 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1945 }
1946 } else {
1947 // Non read barrier code.
1948
1949 // /* HeapReference<Class> */ temp1 = dest->klass_
1950 __ Ldr(temp1, MemOperand(dest, class_offset));
1951 // /* HeapReference<Class> */ temp2 = src->klass_
1952 __ Ldr(temp2, MemOperand(src, class_offset));
1953 bool did_unpoison = false;
1954 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1955 !optimizations.GetSourceIsNonPrimitiveArray()) {
1956 // One or two of the references need to be unpoisoned. Unpoison them
1957 // both to make the identity check valid.
1958 assembler->MaybeUnpoisonHeapReference(temp1);
1959 assembler->MaybeUnpoisonHeapReference(temp2);
1960 did_unpoison = true;
1961 }
1962
1963 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1964 // Bail out if the destination is not a non primitive array.
1965 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1966 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001967 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001968 assembler->MaybeUnpoisonHeapReference(temp3);
1969 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1970 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1971 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001972 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001973 }
1974
1975 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1976 // Bail out if the source is not a non primitive array.
1977 // /* HeapReference<Class> */ temp3 = temp2->component_type_
1978 __ Ldr(temp3, MemOperand(temp2, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001979 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001980 assembler->MaybeUnpoisonHeapReference(temp3);
1981 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1982 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1983 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001984 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001985 }
1986
1987 __ Cmp(temp1, temp2);
1988
1989 if (optimizations.GetDestinationIsTypedObjectArray()) {
1990 vixl32::Label do_copy;
Artem Serov517d9f62016-12-12 15:51:15 +00001991 __ B(eq, &do_copy, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001992 if (!did_unpoison) {
1993 assembler->MaybeUnpoisonHeapReference(temp1);
1994 }
1995 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1996 __ Ldr(temp1, MemOperand(temp1, component_offset));
1997 assembler->MaybeUnpoisonHeapReference(temp1);
1998 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1999 __ Ldr(temp1, MemOperand(temp1, super_offset));
2000 // No need to unpoison the result, we're comparing against null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00002001 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002002 __ Bind(&do_copy);
2003 } else {
2004 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2005 }
2006 }
2007 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2008 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2009 // Bail out if the source is not a non primitive array.
2010 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2011 // /* HeapReference<Class> */ temp1 = src->klass_
2012 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2013 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2014 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2015 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2016 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002017 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002018 // If heap poisoning is enabled, `temp3` has been unpoisoned
2019 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2020 } else {
2021 // /* HeapReference<Class> */ temp1 = src->klass_
2022 __ Ldr(temp1, MemOperand(src, class_offset));
2023 assembler->MaybeUnpoisonHeapReference(temp1);
2024 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2025 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002026 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002027 assembler->MaybeUnpoisonHeapReference(temp3);
2028 }
2029 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2030 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2031 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002032 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002033 }
2034
2035 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
2036 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
2037 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
2038
2039 // Compute the base source address in `temp1`.
2040 if (src_pos.IsConstant()) {
2041 int32_t constant = Int32ConstantFrom(src_pos);
2042 __ Add(temp1, src, element_size * constant + offset);
2043 } else {
2044 __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
2045 __ Add(temp1, temp1, offset);
2046 }
2047
2048 // Compute the end source address in `temp3`.
2049 if (length.IsConstant()) {
2050 int32_t constant = Int32ConstantFrom(length);
2051 __ Add(temp3, temp1, element_size * constant);
2052 } else {
2053 __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
2054 }
2055
2056 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2057 // The base destination address is computed later, as `temp2` is
2058 // used for intermediate computations.
2059
2060 // SystemArrayCopy implementation for Baker read barriers (see
2061 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2062 //
2063 // if (src_ptr != end_ptr) {
2064 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2065 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002066 // bool is_gray = (rb_state == ReadBarrier::GrayState());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002067 // if (is_gray) {
2068 // // Slow-path copy.
2069 // do {
2070 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2071 // } while (src_ptr != end_ptr)
2072 // } else {
2073 // // Fast-path copy.
2074 // do {
2075 // *dest_ptr++ = *src_ptr++;
2076 // } while (src_ptr != end_ptr)
2077 // }
2078 // }
2079
2080 vixl32::Label loop, done;
2081
2082 // Don't enter copy loop if `length == 0`.
2083 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002084 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002085
2086 // /* int32_t */ monitor = src->monitor_
2087 __ Ldr(temp2, MemOperand(src, monitor_offset));
2088 // /* LockWord */ lock_word = LockWord(monitor)
2089 static_assert(sizeof(LockWord) == sizeof(int32_t),
2090 "art::LockWord and int32_t have different sizes.");
2091
2092 // Introduce a dependency on the lock_word including the rb_state,
2093 // which shall prevent load-load reordering without using
2094 // a memory barrier (which would be more expensive).
2095 // `src` is unchanged by this operation, but its value now depends
2096 // on `temp2`.
2097 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2098
2099 // Slow path used to copy array when `src` is gray.
2100 SlowPathCodeARMVIXL* read_barrier_slow_path =
2101 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2102 codegen_->AddSlowPath(read_barrier_slow_path);
2103
2104 // Given the numeric representation, it's enough to check the low bit of the
2105 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2106 // which can be a 16-bit instruction unlike the TST immediate.
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002107 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2108 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
Anton Kirilov5ec62182016-10-13 20:16:02 +01002109 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2110 // Carry flag is the last bit shifted out by LSRS.
2111 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2112
2113 // Fast-path copy.
2114
2115 // Compute the base destination address in `temp2`.
2116 if (dest_pos.IsConstant()) {
2117 int32_t constant = Int32ConstantFrom(dest_pos);
2118 __ Add(temp2, dest, element_size * constant + offset);
2119 } else {
2120 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2121 __ Add(temp2, temp2, offset);
2122 }
2123
2124 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2125 // poison/unpoison.
2126 __ Bind(&loop);
2127
2128 {
2129 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2130 const vixl32::Register temp_reg = temps.Acquire();
2131
2132 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2133 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2134 }
2135
2136 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002137 __ B(ne, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002138
2139 __ Bind(read_barrier_slow_path->GetExitLabel());
2140 __ Bind(&done);
2141 } else {
2142 // Non read barrier code.
2143
2144 // Compute the base destination address in `temp2`.
2145 if (dest_pos.IsConstant()) {
2146 int32_t constant = Int32ConstantFrom(dest_pos);
2147 __ Add(temp2, dest, element_size * constant + offset);
2148 } else {
2149 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2150 __ Add(temp2, temp2, offset);
2151 }
2152
2153 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2154 // poison/unpoison.
2155 vixl32::Label loop, done;
2156 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002157 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002158 __ Bind(&loop);
2159
2160 {
2161 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2162 const vixl32::Register temp_reg = temps.Acquire();
2163
2164 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2165 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2166 }
2167
2168 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002169 __ B(ne, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002170 __ Bind(&done);
2171 }
2172
2173 // We only need one card marking on the destination array.
2174 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2175
2176 __ Bind(intrinsic_slow_path->GetExitLabel());
2177}
2178
2179static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2180 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2181 // the code generator. Furthermore, the register allocator creates fixed live intervals
2182 // for all caller-saved registers because we are doing a function call. As a result, if
2183 // the input and output locations are unallocated, the register allocator runs out of
2184 // registers and fails; however, a debuggable graph is not the common case.
2185 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2186 return;
2187 }
2188
2189 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2190 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2191 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2192
2193 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2194 LocationSummary::kCallOnMainOnly,
2195 kIntrinsified);
2196 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2197
2198 locations->SetInAt(0, Location::RequiresFpuRegister());
2199 locations->SetOut(Location::RequiresFpuRegister());
2200 // Native code uses the soft float ABI.
2201 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2202 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2203}
2204
2205static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2206 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2207 // the code generator. Furthermore, the register allocator creates fixed live intervals
2208 // for all caller-saved registers because we are doing a function call. As a result, if
2209 // the input and output locations are unallocated, the register allocator runs out of
2210 // registers and fails; however, a debuggable graph is not the common case.
2211 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2212 return;
2213 }
2214
2215 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2216 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2217 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2218 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2219
2220 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2221 LocationSummary::kCallOnMainOnly,
2222 kIntrinsified);
2223 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2224
2225 locations->SetInAt(0, Location::RequiresFpuRegister());
2226 locations->SetInAt(1, Location::RequiresFpuRegister());
2227 locations->SetOut(Location::RequiresFpuRegister());
2228 // Native code uses the soft float ABI.
2229 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2230 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2231 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2232 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2233}
2234
2235static void GenFPToFPCall(HInvoke* invoke,
2236 ArmVIXLAssembler* assembler,
2237 CodeGeneratorARMVIXL* codegen,
2238 QuickEntrypointEnum entry) {
2239 LocationSummary* const locations = invoke->GetLocations();
2240
2241 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2242 DCHECK(locations->WillCall() && locations->Intrinsified());
2243
2244 // Native code uses the soft float ABI.
2245 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2246 RegisterFrom(locations->GetTemp(1)),
2247 InputDRegisterAt(invoke, 0));
2248 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2249 __ Vmov(OutputDRegister(invoke),
2250 RegisterFrom(locations->GetTemp(0)),
2251 RegisterFrom(locations->GetTemp(1)));
2252}
2253
2254static void GenFPFPToFPCall(HInvoke* invoke,
2255 ArmVIXLAssembler* assembler,
2256 CodeGeneratorARMVIXL* codegen,
2257 QuickEntrypointEnum entry) {
2258 LocationSummary* const locations = invoke->GetLocations();
2259
2260 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2261 DCHECK(locations->WillCall() && locations->Intrinsified());
2262
2263 // Native code uses the soft float ABI.
2264 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2265 RegisterFrom(locations->GetTemp(1)),
2266 InputDRegisterAt(invoke, 0));
2267 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2268 RegisterFrom(locations->GetTemp(3)),
2269 InputDRegisterAt(invoke, 1));
2270 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2271 __ Vmov(OutputDRegister(invoke),
2272 RegisterFrom(locations->GetTemp(0)),
2273 RegisterFrom(locations->GetTemp(1)));
2274}
2275
2276void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2277 CreateFPToFPCallLocations(arena_, invoke);
2278}
2279
2280void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2281 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2282}
2283
2284void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2285 CreateFPToFPCallLocations(arena_, invoke);
2286}
2287
2288void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2289 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2290}
2291
2292void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2293 CreateFPToFPCallLocations(arena_, invoke);
2294}
2295
2296void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2297 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2298}
2299
2300void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2301 CreateFPToFPCallLocations(arena_, invoke);
2302}
2303
2304void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2305 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2306}
2307
2308void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2309 CreateFPToFPCallLocations(arena_, invoke);
2310}
2311
2312void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2313 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2314}
2315
2316void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2317 CreateFPToFPCallLocations(arena_, invoke);
2318}
2319
2320void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2321 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2322}
2323
2324void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2325 CreateFPToFPCallLocations(arena_, invoke);
2326}
2327
2328void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2329 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2330}
2331
2332void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2333 CreateFPToFPCallLocations(arena_, invoke);
2334}
2335
2336void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2337 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2338}
2339
2340void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2341 CreateFPToFPCallLocations(arena_, invoke);
2342}
2343
2344void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2345 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2346}
2347
2348void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2349 CreateFPToFPCallLocations(arena_, invoke);
2350}
2351
2352void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2353 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2354}
2355
2356void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2357 CreateFPToFPCallLocations(arena_, invoke);
2358}
2359
2360void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2361 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2362}
2363
2364void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2365 CreateFPToFPCallLocations(arena_, invoke);
2366}
2367
2368void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2369 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2370}
2371
2372void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2373 CreateFPToFPCallLocations(arena_, invoke);
2374}
2375
2376void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2377 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2378}
2379
2380void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2381 CreateFPToFPCallLocations(arena_, invoke);
2382}
2383
2384void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2385 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2386}
2387
2388void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2389 CreateFPFPToFPCallLocations(arena_, invoke);
2390}
2391
2392void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2393 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2394}
2395
2396void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2397 CreateFPFPToFPCallLocations(arena_, invoke);
2398}
2399
2400void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2401 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2402}
2403
2404void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2405 CreateFPFPToFPCallLocations(arena_, invoke);
2406}
2407
2408void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2409 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2410}
2411
2412void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2413 CreateIntToIntLocations(arena_, invoke);
2414}
2415
2416void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2417 ArmVIXLAssembler* assembler = GetAssembler();
2418 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2419}
2420
2421void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2422 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2423 LocationSummary::kNoCall,
2424 kIntrinsified);
2425 locations->SetInAt(0, Location::RequiresRegister());
2426 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2427}
2428
2429void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2430 ArmVIXLAssembler* assembler = GetAssembler();
2431 LocationSummary* locations = invoke->GetLocations();
2432
2433 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2434 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2435 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2436 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2437
2438 __ Rbit(out_reg_lo, in_reg_hi);
2439 __ Rbit(out_reg_hi, in_reg_lo);
2440}
2441
2442void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2443 CreateIntToIntLocations(arena_, invoke);
2444}
2445
2446void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2447 ArmVIXLAssembler* assembler = GetAssembler();
2448 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2449}
2450
2451void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2452 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2453 LocationSummary::kNoCall,
2454 kIntrinsified);
2455 locations->SetInAt(0, Location::RequiresRegister());
2456 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2457}
2458
2459void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2460 ArmVIXLAssembler* assembler = GetAssembler();
2461 LocationSummary* locations = invoke->GetLocations();
2462
2463 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2464 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2465 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2466 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2467
2468 __ Rev(out_reg_lo, in_reg_hi);
2469 __ Rev(out_reg_hi, in_reg_lo);
2470}
2471
2472void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2473 CreateIntToIntLocations(arena_, invoke);
2474}
2475
2476void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2477 ArmVIXLAssembler* assembler = GetAssembler();
2478 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2479}
2480
2481static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
2482 DCHECK(Primitive::IsIntOrLongType(type)) << type;
2483 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2484 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2485
2486 bool is_long = type == Primitive::kPrimLong;
2487 LocationSummary* locations = instr->GetLocations();
2488 Location in = locations->InAt(0);
2489 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2490 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2491 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2492 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2493 vixl32::Register out_r = OutputRegister(instr);
2494
2495 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2496 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2497 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2498 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2499 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2500 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2501 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2502 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2503 if (is_long) {
2504 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2505 }
2506 __ Vmov(out_r, tmp_s);
2507}
2508
2509void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2510 CreateIntToIntLocations(arena_, invoke);
2511 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2512}
2513
2514void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2515 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2516}
2517
2518void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2519 VisitIntegerBitCount(invoke);
2520}
2521
2522void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2523 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2524}
2525
2526void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2527 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2528 LocationSummary::kNoCall,
2529 kIntrinsified);
2530 locations->SetInAt(0, Location::RequiresRegister());
2531 locations->SetInAt(1, Location::RequiresRegister());
2532 locations->SetInAt(2, Location::RequiresRegister());
2533 locations->SetInAt(3, Location::RequiresRegister());
2534 locations->SetInAt(4, Location::RequiresRegister());
2535
2536 // Temporary registers to store lengths of strings and for calculations.
2537 locations->AddTemp(Location::RequiresRegister());
2538 locations->AddTemp(Location::RequiresRegister());
2539 locations->AddTemp(Location::RequiresRegister());
2540}
2541
2542void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2543 ArmVIXLAssembler* assembler = GetAssembler();
2544 LocationSummary* locations = invoke->GetLocations();
2545
2546 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2547 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2548 DCHECK_EQ(char_size, 2u);
2549
2550 // Location of data in char array buffer.
2551 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2552
2553 // Location of char array data in string.
2554 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2555
2556 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2557 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2558 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2559 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2560 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2561 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2562 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2563
2564 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2565 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2566 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2567
2568 vixl32::Label done, compressed_string_loop;
2569 // dst to be copied.
2570 __ Add(dst_ptr, dstObj, data_offset);
2571 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2572
2573 __ Subs(num_chr, srcEnd, srcBegin);
2574 // Early out for valid zero-length retrievals.
Artem Serov517d9f62016-12-12 15:51:15 +00002575 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002576
2577 // src range to copy.
2578 __ Add(src_ptr, srcObj, value_offset);
2579
2580 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2581 vixl32::Register temp;
2582 vixl32::Label compressed_string_preloop;
2583 if (mirror::kUseStringCompression) {
2584 // Location of count in string.
2585 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2586 temp = temps.Acquire();
2587 // String's length.
2588 __ Ldr(temp, MemOperand(srcObj, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002589 __ Tst(temp, 1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002590 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00002591 __ B(eq, &compressed_string_preloop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002592 }
2593 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2594
2595 // Do the copy.
2596 vixl32::Label loop, remainder;
2597
2598 temp = temps.Acquire();
2599 // Save repairing the value of num_chr on the < 4 character path.
2600 __ Subs(temp, num_chr, 4);
Artem Serov517d9f62016-12-12 15:51:15 +00002601 __ B(lt, &remainder, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002602
2603 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2604 __ Mov(num_chr, temp);
2605
2606 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2607 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2608 // to rectify these everywhere this intrinsic applies.)
2609 __ Bind(&loop);
2610 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2611 __ Subs(num_chr, num_chr, 4);
2612 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2613 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2614 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2615 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00002616 __ B(ge, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002617
2618 __ Adds(num_chr, num_chr, 4);
Artem Serov517d9f62016-12-12 15:51:15 +00002619 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002620
2621 // Main loop for < 4 character case and remainder handling. Loads and stores one
2622 // 16-bit Java character at a time.
2623 __ Bind(&remainder);
2624 temp = temps.Acquire();
2625 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2626 __ Subs(num_chr, num_chr, 1);
2627 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2628 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00002629 __ B(gt, &remainder, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002630
2631 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002632 __ B(&done);
2633
Anton Kirilov5ec62182016-10-13 20:16:02 +01002634 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2635 DCHECK_EQ(c_char_size, 1u);
2636 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2637 __ Bind(&compressed_string_preloop);
2638 __ Add(src_ptr, src_ptr, srcBegin);
2639 __ Bind(&compressed_string_loop);
2640 temp = temps.Acquire();
2641 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2642 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2643 temps.Release(temp);
2644 __ Subs(num_chr, num_chr, 1);
Artem Serov517d9f62016-12-12 15:51:15 +00002645 __ B(gt, &compressed_string_loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002646 }
2647
2648 __ Bind(&done);
2649}
2650
2651void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2652 CreateFPToIntLocations(arena_, invoke);
2653}
2654
2655void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2656 ArmVIXLAssembler* const assembler = GetAssembler();
2657 const vixl32::Register out = OutputRegister(invoke);
2658 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2659 // we don't care about the sign bit anyway.
2660 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2661
2662 __ Vmov(out, InputSRegisterAt(invoke, 0));
2663 // We don't care about the sign bit, so shift left.
2664 __ Lsl(out, out, 1);
2665 __ Eor(out, out, infinity);
2666 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2667 __ Clz(out, out);
2668 // Any number less than 32 logically shifted right by 5 bits results in 0;
2669 // the same operation on 32 yields 1.
2670 __ Lsr(out, out, 5);
2671}
2672
2673void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2674 CreateFPToIntLocations(arena_, invoke);
2675}
2676
2677void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2678 ArmVIXLAssembler* const assembler = GetAssembler();
2679 const vixl32::Register out = OutputRegister(invoke);
2680 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2681 const vixl32::Register temp = temps.Acquire();
2682 // The highest 32 bits of double precision positive infinity separated into
2683 // two constants encodable as immediate operands.
2684 constexpr uint32_t infinity_high = 0x7f000000U;
2685 constexpr uint32_t infinity_high2 = 0x00f00000U;
2686
2687 static_assert((infinity_high | infinity_high2) ==
2688 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2689 "The constants do not add up to the high 32 bits of double "
2690 "precision positive infinity.");
2691 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2692 __ Eor(out, out, infinity_high);
2693 __ Eor(out, out, infinity_high2);
2694 // We don't care about the sign bit, so shift left.
2695 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2696 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2697 __ Clz(out, out);
2698 // Any number less than 32 logically shifted right by 5 bits results in 0;
2699 // the same operation on 32 yields 1.
2700 __ Lsr(out, out, 5);
2701}
2702
TatWai Chongd8c052a2016-11-02 16:12:48 +08002703void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2704 if (kEmitCompilerReadBarrier) {
2705 // Do not intrinsify this call with the read barrier configuration.
2706 return;
2707 }
2708 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2709 LocationSummary::kCallOnSlowPath,
2710 kIntrinsified);
2711 locations->SetInAt(0, Location::RequiresRegister());
2712 locations->SetOut(Location::SameAsFirstInput());
2713 locations->AddTemp(Location::RequiresRegister());
2714}
2715
2716void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2717 DCHECK(!kEmitCompilerReadBarrier);
2718 ArmVIXLAssembler* assembler = GetAssembler();
2719 LocationSummary* locations = invoke->GetLocations();
2720
2721 vixl32::Register obj = InputRegisterAt(invoke, 0);
2722 vixl32::Register out = OutputRegister(invoke);
2723
2724 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2725 codegen_->AddSlowPath(slow_path);
2726
2727 // Load ArtMethod first.
2728 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2729 DCHECK(invoke_direct != nullptr);
2730 vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
2731 invoke_direct, locations->GetTemp(0)));
2732
2733 // Now get declaring class.
2734 __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
2735
2736 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2737 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2738 DCHECK_NE(slow_path_flag_offset, 0u);
2739 DCHECK_NE(disable_flag_offset, 0u);
2740 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2741
2742 // Check static flags that prevent using intrinsic.
2743 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2744 vixl32::Register temp1 = temps.Acquire();
2745 __ Ldr(temp1, MemOperand(temp0, disable_flag_offset));
2746 __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset));
2747 __ Orr(temp0, temp1, temp0);
2748 __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel());
2749
2750 // Fast path.
2751 __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
2752 codegen_->MaybeRecordImplicitNullCheck(invoke);
2753 assembler->MaybeUnpoisonHeapReference(out);
2754 __ Bind(slow_path->GetExitLabel());
2755}
2756
Artem Serov9aee2d42017-01-06 15:58:31 +00002757void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2758 if (features_.HasARMv8AInstructions()) {
2759 CreateFPToFPLocations(arena_, invoke);
2760 }
2761}
2762
2763void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2764 ArmVIXLAssembler* assembler = GetAssembler();
2765 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2766 __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2767}
2768
2769void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2770 if (features_.HasARMv8AInstructions()) {
2771 CreateFPToFPLocations(arena_, invoke);
2772 }
2773}
2774
2775void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2776 ArmVIXLAssembler* assembler = GetAssembler();
2777 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2778 __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2779}
2780
Anton Kirilov5ec62182016-10-13 20:16:02 +01002781UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
2782UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
2783UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
2784UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
2785UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
2786UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
Anton Kirilov5ec62182016-10-13 20:16:02 +01002787UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
2788UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
2789UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
2790UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
Anton Kirilov5ec62182016-10-13 20:16:02 +01002791UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
2792UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
2793UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
2794UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
2795
Aart Bikff7d89c2016-11-07 08:49:28 -08002796UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
2797UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08002798UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
2799UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
2800UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
2801UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
2802UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
2803UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
Aart Bikff7d89c2016-11-07 08:49:28 -08002804
Anton Kirilov5ec62182016-10-13 20:16:02 +01002805// 1.8.
2806UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
2807UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
2808UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
2809UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
2810UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
2811
2812UNREACHABLE_INTRINSICS(ARMVIXL)
2813
2814#undef __
2815
2816} // namespace arm
2817} // namespace art