blob: 2e37a00dec82fb6872915ddbd600cd87439d704d [file] [log] [blame]
Anton Kirilov5ec62182016-10-13 20:16:02 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm_vixl.h"
18
19#include "arch/arm/instruction_set_features_arm.h"
20#include "code_generator_arm_vixl.h"
21#include "common_arm.h"
22#include "lock_word.h"
23#include "mirror/array-inl.h"
24
25#include "aarch32/constants-aarch32.h"
26
27namespace art {
28namespace arm {
29
30#define __ assembler->GetVIXLAssembler()->
31
32using helpers::DRegisterFrom;
33using helpers::HighRegisterFrom;
34using helpers::InputDRegisterAt;
35using helpers::InputRegisterAt;
36using helpers::InputSRegisterAt;
37using helpers::InputVRegisterAt;
38using helpers::Int32ConstantFrom;
39using helpers::LocationFrom;
40using helpers::LowRegisterFrom;
41using helpers::LowSRegisterFrom;
42using helpers::OutputDRegister;
43using helpers::OutputRegister;
44using helpers::OutputVRegister;
45using helpers::RegisterFrom;
46using helpers::SRegisterFrom;
47
48using namespace vixl::aarch32; // NOLINT(build/namespaces)
49
50ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
51 return codegen_->GetAssembler();
52}
53
54ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
55 return codegen_->GetGraph()->GetArena();
56}
57
58// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
59// intrinsified call. This will copy the arguments into the positions for a regular call.
60//
61// Note: The actual parameters are required to be in the locations given by the invoke's location
62// summary. If an intrinsic modifies those locations before a slowpath call, they must be
63// restored!
64//
65// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
66// sub-optimal (compared to a direct pointer call), but this is a slow-path.
67
68class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
69 public:
70 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
71 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
72
73 Location MoveArguments(CodeGenerator* codegen) {
74 InvokeDexCallingConventionVisitorARM calling_convention_visitor;
75 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
76 return calling_convention_visitor.GetMethodLocation();
77 }
78
79 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
80 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
81 __ Bind(GetEntryLabel());
82
83 SaveLiveRegisters(codegen, invoke_->GetLocations());
84
85 Location method_loc = MoveArguments(codegen);
86
87 if (invoke_->IsInvokeStaticOrDirect()) {
88 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
89 } else {
90 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
91 }
92 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
93
94 // Copy the result back to the expected output.
95 Location out = invoke_->GetLocations()->Out();
96 if (out.IsValid()) {
97 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
98 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
99 codegen->MoveFromReturnRegister(out, invoke_->GetType());
100 }
101
102 RestoreLiveRegisters(codegen, invoke_->GetLocations());
103 __ B(GetExitLabel());
104 }
105
106 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
107
108 private:
109 // The instruction where this slow path is happening.
110 HInvoke* const invoke_;
111
112 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
113};
114
115// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
116class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
117 public:
118 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
119 : SlowPathCodeARMVIXL(instruction) {
120 DCHECK(kEmitCompilerReadBarrier);
121 DCHECK(kUseBakerReadBarrier);
122 }
123
124 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
125 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
126 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
127 LocationSummary* locations = instruction_->GetLocations();
128 DCHECK(locations->CanCall());
129 DCHECK(instruction_->IsInvokeStaticOrDirect())
130 << "Unexpected instruction in read barrier arraycopy slow path: "
131 << instruction_->DebugName();
132 DCHECK(instruction_->GetLocations()->Intrinsified());
133 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
134
135 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
136 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
137 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
138
139 vixl32::Register dest = InputRegisterAt(instruction_, 2);
140 Location dest_pos = locations->InAt(3);
141 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
142 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
143 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
144 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
145
146 __ Bind(GetEntryLabel());
147 // Compute the base destination address in `dst_curr_addr`.
148 if (dest_pos.IsConstant()) {
149 int32_t constant = Int32ConstantFrom(dest_pos);
150 __ Add(dst_curr_addr, dest, element_size * constant + offset);
151 } else {
152 __ Add(dst_curr_addr,
153 dest,
154 Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
155 __ Add(dst_curr_addr, dst_curr_addr, offset);
156 }
157
158 vixl32::Label loop;
159 __ Bind(&loop);
160 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
161 assembler->MaybeUnpoisonHeapReference(tmp);
162 // TODO: Inline the mark bit check before calling the runtime?
163 // tmp = ReadBarrier::Mark(tmp);
164 // No need to save live registers; it's taken care of by the
165 // entrypoint. Also, there is no need to update the stack mask,
166 // as this runtime call will not trigger a garbage collection.
167 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
168 // explanations.)
169 DCHECK(!tmp.IsSP());
170 DCHECK(!tmp.IsLR());
171 DCHECK(!tmp.IsPC());
172 // IP is used internally by the ReadBarrierMarkRegX entry point
173 // as a temporary (and not preserved). It thus cannot be used by
174 // any live register in this slow path.
175 DCHECK(!src_curr_addr.Is(ip));
176 DCHECK(!dst_curr_addr.Is(ip));
177 DCHECK(!src_stop_addr.Is(ip));
178 DCHECK(!tmp.Is(ip));
179 DCHECK(tmp.IsRegister()) << tmp;
180 int32_t entry_point_offset =
181 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
182 // This runtime call does not require a stack map.
183 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
184 assembler->MaybePoisonHeapReference(tmp);
185 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
186 __ Cmp(src_curr_addr, src_stop_addr);
187 __ B(ne, &loop);
188 __ B(GetExitLabel());
189 }
190
191 const char* GetDescription() const OVERRIDE {
192 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
193 }
194
195 private:
196 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
197};
198
199IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
200 : arena_(codegen->GetGraph()->GetArena()),
201 assembler_(codegen->GetAssembler()),
202 features_(codegen->GetInstructionSetFeatures()) {}
203
204bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
205 Dispatch(invoke);
206 LocationSummary* res = invoke->GetLocations();
207 if (res == nullptr) {
208 return false;
209 }
210 return res->Intrinsified();
211}
212
213static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
214 LocationSummary* locations = new (arena) LocationSummary(invoke,
215 LocationSummary::kNoCall,
216 kIntrinsified);
217 locations->SetInAt(0, Location::RequiresFpuRegister());
218 locations->SetOut(Location::RequiresRegister());
219}
220
221static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
222 LocationSummary* locations = new (arena) LocationSummary(invoke,
223 LocationSummary::kNoCall,
224 kIntrinsified);
225 locations->SetInAt(0, Location::RequiresRegister());
226 locations->SetOut(Location::RequiresFpuRegister());
227}
228
229static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
230 Location input = locations->InAt(0);
231 Location output = locations->Out();
232 if (is64bit) {
233 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
234 } else {
235 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
236 }
237}
238
239static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
240 Location input = locations->InAt(0);
241 Location output = locations->Out();
242 if (is64bit) {
243 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
244 } else {
245 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
246 }
247}
248
249void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
250 CreateFPToIntLocations(arena_, invoke);
251}
252void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
253 CreateIntToFPLocations(arena_, invoke);
254}
255
256void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
257 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
258}
259void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
260 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
261}
262
263void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
264 CreateFPToIntLocations(arena_, invoke);
265}
266void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
267 CreateIntToFPLocations(arena_, invoke);
268}
269
270void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
271 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
272}
273void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
274 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
275}
276
277static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
278 LocationSummary* locations = new (arena) LocationSummary(invoke,
279 LocationSummary::kNoCall,
280 kIntrinsified);
281 locations->SetInAt(0, Location::RequiresRegister());
282 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
283}
284
285static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
286 LocationSummary* locations = new (arena) LocationSummary(invoke,
287 LocationSummary::kNoCall,
288 kIntrinsified);
289 locations->SetInAt(0, Location::RequiresFpuRegister());
290 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
291}
292
293static void GenNumberOfLeadingZeros(LocationSummary* locations,
294 Primitive::Type type,
295 ArmVIXLAssembler* assembler) {
296 Location in = locations->InAt(0);
297 vixl32::Register out = RegisterFrom(locations->Out());
298
299 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
300
301 if (type == Primitive::kPrimLong) {
302 vixl32::Register in_reg_lo = LowRegisterFrom(in);
303 vixl32::Register in_reg_hi = HighRegisterFrom(in);
304 vixl32::Label end;
305 __ Clz(out, in_reg_hi);
306 __ Cbnz(in_reg_hi, &end);
307 __ Clz(out, in_reg_lo);
308 __ Add(out, out, 32);
309 __ Bind(&end);
310 } else {
311 __ Clz(out, RegisterFrom(in));
312 }
313}
314
315void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
316 CreateIntToIntLocations(arena_, invoke);
317}
318
319void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
320 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
321}
322
323void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
324 LocationSummary* locations = new (arena_) LocationSummary(invoke,
325 LocationSummary::kNoCall,
326 kIntrinsified);
327 locations->SetInAt(0, Location::RequiresRegister());
328 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
329}
330
331void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
332 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
333}
334
335static void GenNumberOfTrailingZeros(LocationSummary* locations,
336 Primitive::Type type,
337 ArmVIXLAssembler* assembler) {
338 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
339
340 vixl32::Register out = RegisterFrom(locations->Out());
341
342 if (type == Primitive::kPrimLong) {
343 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
344 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
345 vixl32::Label end;
346 __ Rbit(out, in_reg_lo);
347 __ Clz(out, out);
348 __ Cbnz(in_reg_lo, &end);
349 __ Rbit(out, in_reg_hi);
350 __ Clz(out, out);
351 __ Add(out, out, 32);
352 __ Bind(&end);
353 } else {
354 vixl32::Register in = RegisterFrom(locations->InAt(0));
355 __ Rbit(out, in);
356 __ Clz(out, out);
357 }
358}
359
360void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
361 LocationSummary* locations = new (arena_) LocationSummary(invoke,
362 LocationSummary::kNoCall,
363 kIntrinsified);
364 locations->SetInAt(0, Location::RequiresRegister());
365 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
366}
367
368void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
369 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
370}
371
372void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
373 LocationSummary* locations = new (arena_) LocationSummary(invoke,
374 LocationSummary::kNoCall,
375 kIntrinsified);
376 locations->SetInAt(0, Location::RequiresRegister());
377 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
378}
379
380void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
381 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
382}
383
384static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
385 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
386}
387
388void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
389 CreateFPToFPLocations(arena_, invoke);
390}
391
392void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
393 MathAbsFP(invoke, GetAssembler());
394}
395
396void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
397 CreateFPToFPLocations(arena_, invoke);
398}
399
400void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
401 MathAbsFP(invoke, GetAssembler());
402}
403
404static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
405 LocationSummary* locations = new (arena) LocationSummary(invoke,
406 LocationSummary::kNoCall,
407 kIntrinsified);
408 locations->SetInAt(0, Location::RequiresRegister());
409 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
410
411 locations->AddTemp(Location::RequiresRegister());
412}
413
414static void GenAbsInteger(LocationSummary* locations,
415 bool is64bit,
416 ArmVIXLAssembler* assembler) {
417 Location in = locations->InAt(0);
418 Location output = locations->Out();
419
420 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
421
422 if (is64bit) {
423 vixl32::Register in_reg_lo = LowRegisterFrom(in);
424 vixl32::Register in_reg_hi = HighRegisterFrom(in);
425 vixl32::Register out_reg_lo = LowRegisterFrom(output);
426 vixl32::Register out_reg_hi = HighRegisterFrom(output);
427
428 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
429
430 __ Asr(mask, in_reg_hi, 31);
431 __ Adds(out_reg_lo, in_reg_lo, mask);
432 __ Adc(out_reg_hi, in_reg_hi, mask);
433 __ Eor(out_reg_lo, mask, out_reg_lo);
434 __ Eor(out_reg_hi, mask, out_reg_hi);
435 } else {
436 vixl32::Register in_reg = RegisterFrom(in);
437 vixl32::Register out_reg = RegisterFrom(output);
438
439 __ Asr(mask, in_reg, 31);
440 __ Add(out_reg, in_reg, mask);
441 __ Eor(out_reg, mask, out_reg);
442 }
443}
444
445void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
446 CreateIntToIntPlusTemp(arena_, invoke);
447}
448
449void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
450 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
451}
452
453
454void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
455 CreateIntToIntPlusTemp(arena_, invoke);
456}
457
458void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
459 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
460}
461
462static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
463 vixl32::Register op1 = InputRegisterAt(invoke, 0);
464 vixl32::Register op2 = InputRegisterAt(invoke, 1);
465 vixl32::Register out = OutputRegister(invoke);
466
467 __ Cmp(op1, op2);
468
469 {
470 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
471 3 * kMaxInstructionSizeInBytes,
472 CodeBufferCheckScope::kMaximumSize);
473
474 __ ite(is_min ? lt : gt);
475 __ mov(is_min ? lt : gt, out, op1);
476 __ mov(is_min ? ge : le, out, op2);
477 }
478}
479
480static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
481 LocationSummary* locations = new (arena) LocationSummary(invoke,
482 LocationSummary::kNoCall,
483 kIntrinsified);
484 locations->SetInAt(0, Location::RequiresRegister());
485 locations->SetInAt(1, Location::RequiresRegister());
486 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
487}
488
489void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
490 CreateIntIntToIntLocations(arena_, invoke);
491}
492
493void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
494 GenMinMax(invoke, /* is_min */ true, GetAssembler());
495}
496
497void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
498 CreateIntIntToIntLocations(arena_, invoke);
499}
500
501void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
502 GenMinMax(invoke, /* is_min */ false, GetAssembler());
503}
504
505void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
506 CreateFPToFPLocations(arena_, invoke);
507}
508
509void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
510 ArmVIXLAssembler* assembler = GetAssembler();
511 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
512}
513
514void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
515 CreateIntToIntLocations(arena_, invoke);
516}
517
518void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
519 ArmVIXLAssembler* assembler = GetAssembler();
520 // Ignore upper 4B of long address.
521 __ Ldrsb(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
522}
523
524void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
525 CreateIntToIntLocations(arena_, invoke);
526}
527
528void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
529 ArmVIXLAssembler* assembler = GetAssembler();
530 // Ignore upper 4B of long address.
531 __ Ldr(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
532}
533
534void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
535 CreateIntToIntLocations(arena_, invoke);
536}
537
538void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
539 ArmVIXLAssembler* assembler = GetAssembler();
540 // Ignore upper 4B of long address.
541 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
542 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
543 // exception. So we can't use ldrd as addr may be unaligned.
544 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
545 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
546 if (addr.Is(lo)) {
547 __ Ldr(hi, MemOperand(addr, 4));
548 __ Ldr(lo, addr);
549 } else {
550 __ Ldr(lo, addr);
551 __ Ldr(hi, MemOperand(addr, 4));
552 }
553}
554
555void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
556 CreateIntToIntLocations(arena_, invoke);
557}
558
559void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
560 ArmVIXLAssembler* assembler = GetAssembler();
561 // Ignore upper 4B of long address.
562 __ Ldrsh(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
563}
564
565static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
566 LocationSummary* locations = new (arena) LocationSummary(invoke,
567 LocationSummary::kNoCall,
568 kIntrinsified);
569 locations->SetInAt(0, Location::RequiresRegister());
570 locations->SetInAt(1, Location::RequiresRegister());
571}
572
573void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
574 CreateIntIntToVoidLocations(arena_, invoke);
575}
576
577void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
578 ArmVIXLAssembler* assembler = GetAssembler();
579 __ Strb(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
580}
581
582void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
583 CreateIntIntToVoidLocations(arena_, invoke);
584}
585
586void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
587 ArmVIXLAssembler* assembler = GetAssembler();
588 __ Str(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
589}
590
591void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
592 CreateIntIntToVoidLocations(arena_, invoke);
593}
594
595void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
596 ArmVIXLAssembler* assembler = GetAssembler();
597 // Ignore upper 4B of long address.
598 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
599 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
600 // exception. So we can't use ldrd as addr may be unaligned.
601 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), addr);
602 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
603}
604
605void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
606 CreateIntIntToVoidLocations(arena_, invoke);
607}
608
609void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
610 ArmVIXLAssembler* assembler = GetAssembler();
611 __ Strh(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
612}
613
614void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
615 LocationSummary* locations = new (arena_) LocationSummary(invoke,
616 LocationSummary::kNoCall,
617 kIntrinsified);
618 locations->SetOut(Location::RequiresRegister());
619}
620
621void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
622 ArmVIXLAssembler* assembler = GetAssembler();
623 __ Ldr(OutputRegister(invoke),
624 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
625}
626
627static void GenUnsafeGet(HInvoke* invoke,
628 Primitive::Type type,
629 bool is_volatile,
630 CodeGeneratorARMVIXL* codegen) {
631 LocationSummary* locations = invoke->GetLocations();
632 ArmVIXLAssembler* assembler = codegen->GetAssembler();
633 Location base_loc = locations->InAt(1);
634 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
635 Location offset_loc = locations->InAt(2);
636 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
637 Location trg_loc = locations->Out();
638
639 switch (type) {
640 case Primitive::kPrimInt: {
641 vixl32::Register trg = RegisterFrom(trg_loc);
642 __ Ldr(trg, MemOperand(base, offset));
643 if (is_volatile) {
644 __ Dmb(vixl32::ISH);
645 }
646 break;
647 }
648
649 case Primitive::kPrimNot: {
650 vixl32::Register trg = RegisterFrom(trg_loc);
651 if (kEmitCompilerReadBarrier) {
652 if (kUseBakerReadBarrier) {
653 Location temp = locations->GetTemp(0);
654 codegen->GenerateReferenceLoadWithBakerReadBarrier(
655 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
656 if (is_volatile) {
657 __ Dmb(vixl32::ISH);
658 }
659 } else {
660 __ Ldr(trg, MemOperand(base, offset));
661 if (is_volatile) {
662 __ Dmb(vixl32::ISH);
663 }
664 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
665 }
666 } else {
667 __ Ldr(trg, MemOperand(base, offset));
668 if (is_volatile) {
669 __ Dmb(vixl32::ISH);
670 }
671 assembler->MaybeUnpoisonHeapReference(trg);
672 }
673 break;
674 }
675
676 case Primitive::kPrimLong: {
677 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
678 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
679 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
680 __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset));
681 } else {
682 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
683 }
684 if (is_volatile) {
685 __ Dmb(vixl32::ISH);
686 }
687 break;
688 }
689
690 default:
691 LOG(FATAL) << "Unexpected type " << type;
692 UNREACHABLE();
693 }
694}
695
696static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
697 HInvoke* invoke,
698 Primitive::Type type) {
699 bool can_call = kEmitCompilerReadBarrier &&
700 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
701 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
702 LocationSummary* locations = new (arena) LocationSummary(invoke,
703 (can_call
704 ? LocationSummary::kCallOnSlowPath
705 : LocationSummary::kNoCall),
706 kIntrinsified);
707 if (can_call && kUseBakerReadBarrier) {
708 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
709 }
710 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
711 locations->SetInAt(1, Location::RequiresRegister());
712 locations->SetInAt(2, Location::RequiresRegister());
713 locations->SetOut(Location::RequiresRegister(),
714 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
715 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
716 // We need a temporary register for the read barrier marking slow
717 // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
718 locations->AddTemp(Location::RequiresRegister());
719 }
720}
721
722void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
723 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
724}
725void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
726 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
727}
728void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
729 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
730}
731void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
732 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
733}
734void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
735 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
736}
737void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
738 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
739}
740
741void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
742 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
743}
744void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
745 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
746}
747void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
748 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
749}
750void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
751 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
752}
753void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
754 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
755}
756void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
757 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
758}
759
760static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
761 const ArmInstructionSetFeatures& features,
762 Primitive::Type type,
763 bool is_volatile,
764 HInvoke* invoke) {
765 LocationSummary* locations = new (arena) LocationSummary(invoke,
766 LocationSummary::kNoCall,
767 kIntrinsified);
768 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
769 locations->SetInAt(1, Location::RequiresRegister());
770 locations->SetInAt(2, Location::RequiresRegister());
771 locations->SetInAt(3, Location::RequiresRegister());
772
773 if (type == Primitive::kPrimLong) {
774 // Potentially need temps for ldrexd-strexd loop.
775 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
776 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
777 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
778 }
779 } else if (type == Primitive::kPrimNot) {
780 // Temps for card-marking.
781 locations->AddTemp(Location::RequiresRegister()); // Temp.
782 locations->AddTemp(Location::RequiresRegister()); // Card.
783 }
784}
785
786void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
787 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
788}
789void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
790 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
791}
792void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
793 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
794}
795void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
796 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
797}
798void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
799 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
800}
801void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
802 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
803}
804void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
805 CreateIntIntIntIntToVoid(
806 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
807}
808void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
809 CreateIntIntIntIntToVoid(
810 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
811}
812void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
813 CreateIntIntIntIntToVoid(
814 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
815}
816
817static void GenUnsafePut(LocationSummary* locations,
818 Primitive::Type type,
819 bool is_volatile,
820 bool is_ordered,
821 CodeGeneratorARMVIXL* codegen) {
822 ArmVIXLAssembler* assembler = codegen->GetAssembler();
823
824 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
825 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
826 vixl32::Register value;
827
828 if (is_volatile || is_ordered) {
829 __ Dmb(vixl32::ISH);
830 }
831
832 if (type == Primitive::kPrimLong) {
833 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
834 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
835 value = value_lo;
836 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
837 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
838 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
839 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
840 const vixl32::Register temp_reg = temps.Acquire();
841
842 __ Add(temp_reg, base, offset);
843 vixl32::Label loop_head;
844 __ Bind(&loop_head);
845 __ Ldrexd(temp_lo, temp_hi, temp_reg);
846 __ Strexd(temp_lo, value_lo, value_hi, temp_reg);
847 __ Cmp(temp_lo, 0);
848 __ B(ne, &loop_head);
849 } else {
850 __ Strd(value_lo, value_hi, MemOperand(base, offset));
851 }
852 } else {
853 value = RegisterFrom(locations->InAt(3));
854 vixl32::Register source = value;
855 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
856 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
857 __ Mov(temp, value);
858 assembler->PoisonHeapReference(temp);
859 source = temp;
860 }
861 __ Str(source, MemOperand(base, offset));
862 }
863
864 if (is_volatile) {
865 __ Dmb(vixl32::ISH);
866 }
867
868 if (type == Primitive::kPrimNot) {
869 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
870 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
871 bool value_can_be_null = true; // TODO: Worth finding out this information?
872 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
873 }
874}
875
876void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
877 GenUnsafePut(invoke->GetLocations(),
878 Primitive::kPrimInt,
879 /* is_volatile */ false,
880 /* is_ordered */ false,
881 codegen_);
882}
883void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
884 GenUnsafePut(invoke->GetLocations(),
885 Primitive::kPrimInt,
886 /* is_volatile */ false,
887 /* is_ordered */ true,
888 codegen_);
889}
890void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
891 GenUnsafePut(invoke->GetLocations(),
892 Primitive::kPrimInt,
893 /* is_volatile */ true,
894 /* is_ordered */ false,
895 codegen_);
896}
897void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
898 GenUnsafePut(invoke->GetLocations(),
899 Primitive::kPrimNot,
900 /* is_volatile */ false,
901 /* is_ordered */ false,
902 codegen_);
903}
904void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
905 GenUnsafePut(invoke->GetLocations(),
906 Primitive::kPrimNot,
907 /* is_volatile */ false,
908 /* is_ordered */ true,
909 codegen_);
910}
911void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
912 GenUnsafePut(invoke->GetLocations(),
913 Primitive::kPrimNot,
914 /* is_volatile */ true,
915 /* is_ordered */ false,
916 codegen_);
917}
918void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
919 GenUnsafePut(invoke->GetLocations(),
920 Primitive::kPrimLong,
921 /* is_volatile */ false,
922 /* is_ordered */ false,
923 codegen_);
924}
925void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
926 GenUnsafePut(invoke->GetLocations(),
927 Primitive::kPrimLong,
928 /* is_volatile */ false,
929 /* is_ordered */ true,
930 codegen_);
931}
932void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
933 GenUnsafePut(invoke->GetLocations(),
934 Primitive::kPrimLong,
935 /* is_volatile */ true,
936 /* is_ordered */ false,
937 codegen_);
938}
939
940static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
941 HInvoke* invoke,
942 Primitive::Type type) {
943 bool can_call = kEmitCompilerReadBarrier &&
944 kUseBakerReadBarrier &&
945 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
946 LocationSummary* locations = new (arena) LocationSummary(invoke,
947 (can_call
948 ? LocationSummary::kCallOnSlowPath
949 : LocationSummary::kNoCall),
950 kIntrinsified);
951 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
952 locations->SetInAt(1, Location::RequiresRegister());
953 locations->SetInAt(2, Location::RequiresRegister());
954 locations->SetInAt(3, Location::RequiresRegister());
955 locations->SetInAt(4, Location::RequiresRegister());
956
957 // If heap poisoning is enabled, we don't want the unpoisoning
958 // operations to potentially clobber the output. Likewise when
959 // emitting a (Baker) read barrier, which may call.
960 Location::OutputOverlap overlaps =
961 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
962 ? Location::kOutputOverlap
963 : Location::kNoOutputOverlap;
964 locations->SetOut(Location::RequiresRegister(), overlaps);
965
966 // Temporary registers used in CAS. In the object case
967 // (UnsafeCASObject intrinsic), these are also used for
968 // card-marking, and possibly for (Baker) read barrier.
969 locations->AddTemp(Location::RequiresRegister()); // Pointer.
970 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
971}
972
973static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
974 DCHECK_NE(type, Primitive::kPrimLong);
975
976 ArmVIXLAssembler* assembler = codegen->GetAssembler();
977 LocationSummary* locations = invoke->GetLocations();
978
979 Location out_loc = locations->Out();
980 vixl32::Register out = OutputRegister(invoke); // Boolean result.
981
982 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
983 Location offset_loc = locations->InAt(2);
984 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
985 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
986 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
987
988 Location tmp_ptr_loc = locations->GetTemp(0);
989 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
990 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
991
992 if (type == Primitive::kPrimNot) {
993 // The only read barrier implementation supporting the
994 // UnsafeCASObject intrinsic is the Baker-style read barriers.
995 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
996
997 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
998 // object and scan the receiver at the next GC for nothing.
999 bool value_can_be_null = true; // TODO: Worth finding out this information?
1000 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1001
1002 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1003 // Need to make sure the reference stored in the field is a to-space
1004 // one before attempting the CAS or the CAS could fail incorrectly.
1005 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1006 invoke,
1007 out_loc, // Unused, used only as a "temporary" within the read barrier.
1008 base,
1009 /* offset */ 0u,
1010 /* index */ offset_loc,
1011 ScaleFactor::TIMES_1,
1012 tmp_ptr_loc,
1013 /* needs_null_check */ false,
1014 /* always_update_field */ true,
1015 &tmp);
1016 }
1017 }
1018
1019 // Prevent reordering with prior memory operations.
1020 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1021 // latter allows a preceding load to be delayed past the STXR
1022 // instruction below.
1023 __ Dmb(vixl32::ISH);
1024
1025 __ Add(tmp_ptr, base, offset);
1026
1027 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1028 codegen->GetAssembler()->PoisonHeapReference(expected);
1029 if (value.Is(expected)) {
1030 // Do not poison `value`, as it is the same register as
1031 // `expected`, which has just been poisoned.
1032 } else {
1033 codegen->GetAssembler()->PoisonHeapReference(value);
1034 }
1035 }
1036
1037 // do {
1038 // tmp = [r_ptr] - expected;
1039 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1040 // result = tmp != 0;
1041
1042 vixl32::Label loop_head;
1043 __ Bind(&loop_head);
1044
1045 __ Ldrex(tmp, tmp_ptr);
1046
1047 __ Subs(tmp, tmp, expected);
1048
1049 {
1050 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1051 3 * kMaxInstructionSizeInBytes,
1052 CodeBufferCheckScope::kMaximumSize);
1053
1054 __ itt(eq);
1055 __ strex(eq, tmp, value, tmp_ptr);
1056 __ cmp(eq, tmp, 1);
1057 }
1058
1059 __ B(eq, &loop_head);
1060
1061 __ Dmb(vixl32::ISH);
1062
1063 __ Rsbs(out, tmp, 1);
1064
1065 {
1066 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1067 2 * kMaxInstructionSizeInBytes,
1068 CodeBufferCheckScope::kMaximumSize);
1069
1070 __ it(cc);
1071 __ mov(cc, out, 0);
1072 }
1073
1074 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1075 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1076 if (value.Is(expected)) {
1077 // Do not unpoison `value`, as it is the same register as
1078 // `expected`, which has just been unpoisoned.
1079 } else {
1080 codegen->GetAssembler()->UnpoisonHeapReference(value);
1081 }
1082 }
1083}
1084
1085void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1086 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1087}
1088void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1089 // The only read barrier implementation supporting the
1090 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1091 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1092 return;
1093 }
1094
1095 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1096}
1097void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1098 GenCas(invoke, Primitive::kPrimInt, codegen_);
1099}
1100void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1101 // The only read barrier implementation supporting the
1102 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1103 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1104
1105 GenCas(invoke, Primitive::kPrimNot, codegen_);
1106}
1107
1108void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1109 // The inputs plus one temp.
1110 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1111 invoke->InputAt(1)->CanBeNull()
1112 ? LocationSummary::kCallOnSlowPath
1113 : LocationSummary::kNoCall,
1114 kIntrinsified);
1115 locations->SetInAt(0, Location::RequiresRegister());
1116 locations->SetInAt(1, Location::RequiresRegister());
1117 locations->AddTemp(Location::RequiresRegister());
1118 locations->AddTemp(Location::RequiresRegister());
1119 locations->AddTemp(Location::RequiresRegister());
1120 // Need temporary registers for String compression's feature.
1121 if (mirror::kUseStringCompression) {
1122 locations->AddTemp(Location::RequiresRegister());
1123 locations->AddTemp(Location::RequiresRegister());
1124 }
1125 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1126}
1127
1128void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1129 ArmVIXLAssembler* assembler = GetAssembler();
1130 LocationSummary* locations = invoke->GetLocations();
1131
1132 vixl32::Register str = InputRegisterAt(invoke, 0);
1133 vixl32::Register arg = InputRegisterAt(invoke, 1);
1134 vixl32::Register out = OutputRegister(invoke);
1135
1136 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1137 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1138 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1139 vixl32::Register temp3, temp4;
1140 if (mirror::kUseStringCompression) {
1141 temp3 = RegisterFrom(locations->GetTemp(3));
1142 temp4 = RegisterFrom(locations->GetTemp(4));
1143 }
1144
1145 vixl32::Label loop;
1146 vixl32::Label find_char_diff;
1147 vixl32::Label end;
1148 vixl32::Label different_compression;
1149
1150 // Get offsets of count and value fields within a string object.
1151 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1152 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1153
1154 // Note that the null check must have been done earlier.
1155 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1156
1157 // Take slow path and throw if input can be and is null.
1158 SlowPathCodeARMVIXL* slow_path = nullptr;
1159 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1160 if (can_slow_path) {
1161 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1162 codegen_->AddSlowPath(slow_path);
1163 __ Cbz(arg, slow_path->GetEntryLabel());
1164 }
1165
1166 // Reference equality check, return 0 if same reference.
1167 __ Subs(out, str, arg);
1168 __ B(eq, &end);
1169
1170 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1171 vixl32::Register temp_reg = temps.Acquire();
1172
1173 if (mirror::kUseStringCompression) {
1174 // Load lengths of this and argument strings.
1175 __ Ldr(temp3, MemOperand(str, count_offset));
1176 __ Ldr(temp4, MemOperand(arg, count_offset));
1177 // Clean out compression flag from lengths.
1178 __ Bic(temp0, temp3, 0x80000000);
1179 __ Bic(temp_reg, temp4, 0x80000000);
1180 } else {
1181 // Load lengths of this and argument strings.
1182 __ Ldr(temp0, MemOperand(str, count_offset));
1183 __ Ldr(temp_reg, MemOperand(arg, count_offset));
1184 }
1185 // out = length diff.
1186 __ Subs(out, temp0, temp_reg);
1187 // temp0 = min(len(str), len(arg)).
1188
1189 {
1190 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1191 2 * kMaxInstructionSizeInBytes,
1192 CodeBufferCheckScope::kMaximumSize);
1193
1194 __ it(gt);
1195 __ mov(gt, temp0, temp_reg);
1196 }
1197
1198 temps.Release(temp_reg);
1199 // Shorter string is empty?
1200 __ Cbz(temp0, &end);
1201
1202 if (mirror::kUseStringCompression) {
1203 // Check if both strings using same compression style to use this comparison loop.
1204 __ Eors(temp3, temp3, temp4);
1205 __ B(mi, &different_compression);
1206 }
1207 // Store offset of string value in preparation for comparison loop.
1208 __ Mov(temp1, value_offset);
1209 if (mirror::kUseStringCompression) {
1210 // For string compression, calculate the number of bytes to compare (not chars).
1211 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
1212 __ Cmp(temp4, 0);
1213
1214 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1215 2 * kMaxInstructionSizeInBytes,
1216 CodeBufferCheckScope::kMaximumSize);
1217
1218 __ it(ge);
1219 __ add(ge, temp0, temp0, temp0);
1220 }
1221
1222 // Assertions that must hold in order to compare multiple characters at a time.
1223 CHECK_ALIGNED(value_offset, 8);
1224 static_assert(IsAligned<8>(kObjectAlignment),
1225 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1226
1227 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1228 DCHECK_EQ(char_size, 2u);
1229
1230 vixl32::Label find_char_diff_2nd_cmp;
1231 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1232 __ Bind(&loop);
1233 temp_reg = temps.Acquire();
1234 __ Ldr(temp_reg, MemOperand(str, temp1));
1235 __ Ldr(temp2, MemOperand(arg, temp1));
1236 __ Cmp(temp_reg, temp2);
1237 __ B(ne, &find_char_diff);
1238 __ Add(temp1, temp1, char_size * 2);
1239
1240 __ Ldr(temp_reg, MemOperand(str, temp1));
1241 __ Ldr(temp2, MemOperand(arg, temp1));
1242 __ Cmp(temp_reg, temp2);
1243 __ B(ne, &find_char_diff_2nd_cmp);
1244 __ Add(temp1, temp1, char_size * 2);
1245 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1246 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1247 __ B(hi, &loop);
1248 __ B(&end);
1249
1250 __ Bind(&find_char_diff_2nd_cmp);
1251 if (mirror::kUseStringCompression) {
1252 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
1253 __ B(ls, &end); // Was the second comparison fully beyond the end?
1254 } else {
1255 // Without string compression, we can start treating temp0 as signed
1256 // and rely on the signed comparison below.
1257 __ Sub(temp0, temp0, 2);
1258 }
1259
1260 // Find the single character difference.
1261 __ Bind(&find_char_diff);
1262 // Get the bit position of the first character that differs.
1263 __ Eor(temp1, temp2, temp_reg);
1264 __ Rbit(temp1, temp1);
1265 __ Clz(temp1, temp1);
1266
1267 // temp0 = number of characters remaining to compare.
1268 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1269 // in the comparison loop, and after the end of the shorter string data).
1270
1271 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1272 // two words compared, in the interval [0,1].
1273 // (0 for low half-word different, 1 for high half-word different).
1274 // With string compression, (temp1 << 3) = byte where the difference occurs,
1275 // in the interval [0,3].
1276
1277 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1278 // the remaining string data, so just return length diff (out).
1279 // The comparison is unsigned for string compression, otherwise signed.
1280 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1281 __ B((mirror::kUseStringCompression ? ls : le), &end);
1282 // Extract the characters and calculate the difference.
1283 vixl32::Label uncompressed_string, continue_process;
1284 if (mirror::kUseStringCompression) {
1285 __ Cmp(temp4, 0);
1286 __ B(ge, &uncompressed_string);
1287 __ Bic(temp1, temp1, 0x7);
1288 __ B(&continue_process);
1289 }
1290 __ Bind(&uncompressed_string);
1291 __ Bic(temp1, temp1, 0xf);
1292 __ Bind(&continue_process);
1293
1294 __ Lsr(temp2, temp2, temp1);
1295 __ Lsr(temp_reg, temp_reg, temp1);
1296 vixl32::Label calculate_difference, uncompressed_string_extract_chars;
1297 if (mirror::kUseStringCompression) {
1298 __ Cmp(temp4, 0);
1299 __ B(ge, &uncompressed_string_extract_chars);
1300 __ Ubfx(temp2, temp2, 0, 8);
1301 __ Ubfx(temp_reg, temp_reg, 0, 8);
1302 __ B(&calculate_difference);
1303 }
1304 __ Bind(&uncompressed_string_extract_chars);
1305 __ Movt(temp2, 0);
1306 __ Movt(temp_reg, 0);
1307 __ Bind(&calculate_difference);
1308 __ Sub(out, temp_reg, temp2);
1309 temps.Release(temp_reg);
1310 __ B(&end);
1311
1312 if (mirror::kUseStringCompression) {
1313 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1314 DCHECK_EQ(c_char_size, 1u);
1315 vixl32::Label loop_arg_compressed, loop_this_compressed, find_diff;
1316 // Comparison for different compression style.
1317 // This part is when THIS is compressed and ARG is not.
1318 __ Bind(&different_compression);
1319 __ Add(temp2, str, value_offset);
1320 __ Add(temp3, arg, value_offset);
1321 __ Cmp(temp4, 0);
1322 __ B(lt, &loop_arg_compressed);
1323
1324 __ Bind(&loop_this_compressed);
1325 temp_reg = temps.Acquire();
1326 __ Ldrb(temp_reg, MemOperand(temp2, c_char_size, PostIndex));
1327 __ Ldrh(temp4, MemOperand(temp3, char_size, PostIndex));
1328 __ Cmp(temp_reg, temp4);
1329 __ B(ne, &find_diff);
1330 __ Subs(temp0, temp0, 1);
1331 __ B(gt, &loop_this_compressed);
1332 __ B(&end);
1333
1334 // This part is when THIS is not compressed and ARG is.
1335 __ Bind(&loop_arg_compressed);
1336 __ Ldrh(temp_reg, MemOperand(temp2, char_size, PostIndex));
1337 __ Ldrb(temp4, MemOperand(temp3, c_char_size, PostIndex));
1338 __ Cmp(temp_reg, temp4);
1339 __ B(ne, &find_diff);
1340 __ Subs(temp0, temp0, 1);
1341 __ B(gt, &loop_arg_compressed);
1342 __ B(&end);
1343
1344 // Calculate the difference.
1345 __ Bind(&find_diff);
1346 __ Sub(out, temp_reg, temp4);
1347 temps.Release(temp_reg);
1348 }
1349
1350 __ Bind(&end);
1351
1352 if (can_slow_path) {
1353 __ Bind(slow_path->GetExitLabel());
1354 }
1355}
1356
1357void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1358 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1359 LocationSummary::kNoCall,
1360 kIntrinsified);
1361 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1362 locations->SetInAt(0, Location::RequiresRegister());
1363 locations->SetInAt(1, Location::RequiresRegister());
1364 // Temporary registers to store lengths of strings and for calculations.
1365 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1366 locations->AddTemp(LocationFrom(r0));
1367 locations->AddTemp(Location::RequiresRegister());
1368 locations->AddTemp(Location::RequiresRegister());
1369
1370 locations->SetOut(Location::RequiresRegister());
1371}
1372
1373void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1374 ArmVIXLAssembler* assembler = GetAssembler();
1375 LocationSummary* locations = invoke->GetLocations();
1376
1377 vixl32::Register str = InputRegisterAt(invoke, 0);
1378 vixl32::Register arg = InputRegisterAt(invoke, 1);
1379 vixl32::Register out = OutputRegister(invoke);
1380
1381 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1382 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1383 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1384
1385 vixl32::Label loop, preloop;
1386 vixl32::Label end;
1387 vixl32::Label return_true;
1388 vixl32::Label return_false;
1389
1390 // Get offsets of count, value, and class fields within a string object.
1391 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1392 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1393 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1394
1395 // Note that the null check must have been done earlier.
1396 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1397
1398 StringEqualsOptimizations optimizations(invoke);
1399 if (!optimizations.GetArgumentNotNull()) {
1400 // Check if input is null, return false if it is.
1401 __ Cbz(arg, &return_false);
1402 }
1403
1404 if (!optimizations.GetArgumentIsString()) {
1405 // Instanceof check for the argument by comparing class fields.
1406 // All string objects must have the same type since String cannot be subclassed.
1407 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1408 // If the argument is a string object, its class field must be equal to receiver's class field.
1409 __ Ldr(temp, MemOperand(str, class_offset));
1410 __ Ldr(temp1, MemOperand(arg, class_offset));
1411 __ Cmp(temp, temp1);
1412 __ B(ne, &return_false);
1413 }
1414
1415 // Load lengths of this and argument strings.
1416 __ Ldr(temp, MemOperand(str, count_offset));
1417 __ Ldr(temp1, MemOperand(arg, count_offset));
1418 // Check if lengths are equal, return false if they're not.
1419 // Also compares the compression style, if differs return false.
1420 __ Cmp(temp, temp1);
1421 __ B(ne, &return_false);
1422 // Return true if both strings are empty.
1423 if (mirror::kUseStringCompression) {
1424 // Length needs to be masked out first because 0 is treated as compressed.
1425 __ Bic(temp, temp, 0x80000000);
1426 }
1427 __ Cbz(temp, &return_true);
1428 // Reference equality check, return true if same reference.
1429 __ Cmp(str, arg);
1430 __ B(eq, &return_true);
1431
1432 // Assertions that must hold in order to compare strings 2 characters at a time.
1433 DCHECK_ALIGNED(value_offset, 4);
1434 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1435
1436 if (mirror::kUseStringCompression) {
1437 // If not compressed, directly to fast compare. Else do preprocess on length.
1438 __ Cmp(temp1, 0);
1439 __ B(gt, &preloop);
1440 // Mask out compression flag and adjust length for compressed string (8-bit)
1441 // as if it is a 16-bit data, new_length = (length + 1) / 2.
1442 __ Add(temp, temp, 1);
1443 __ Lsr(temp, temp, 1);
1444 __ Bind(&preloop);
1445 }
1446 // Loop to compare strings 2 characters at a time starting at the front of the string.
1447 // Ok to do this because strings with an odd length are zero-padded.
1448 __ Mov(temp1, value_offset);
1449 __ Bind(&loop);
1450 __ Ldr(out, MemOperand(str, temp1));
1451 __ Ldr(temp2, MemOperand(arg, temp1));
1452 __ Cmp(out, temp2);
1453 __ B(ne, &return_false);
1454 __ Add(temp1, temp1, sizeof(uint32_t));
1455 __ Subs(temp, temp, sizeof(uint32_t) / sizeof(uint16_t));
1456 __ B(gt, &loop);
1457
1458 // Return true and exit the function.
1459 // If loop does not result in returning false, we return true.
1460 __ Bind(&return_true);
1461 __ Mov(out, 1);
1462 __ B(&end);
1463
1464 // Return false and exit the function.
1465 __ Bind(&return_false);
1466 __ Mov(out, 0);
1467 __ Bind(&end);
1468}
1469
1470static void GenerateVisitStringIndexOf(HInvoke* invoke,
1471 ArmVIXLAssembler* assembler,
1472 CodeGeneratorARMVIXL* codegen,
1473 ArenaAllocator* allocator,
1474 bool start_at_zero) {
1475 LocationSummary* locations = invoke->GetLocations();
1476
1477 // Note that the null check must have been done earlier.
1478 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1479
1480 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1481 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1482 SlowPathCodeARMVIXL* slow_path = nullptr;
1483 HInstruction* code_point = invoke->InputAt(1);
1484 if (code_point->IsIntConstant()) {
1485 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1486 std::numeric_limits<uint16_t>::max()) {
1487 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1488 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1489 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1490 codegen->AddSlowPath(slow_path);
1491 __ B(slow_path->GetEntryLabel());
1492 __ Bind(slow_path->GetExitLabel());
1493 return;
1494 }
1495 } else if (code_point->GetType() != Primitive::kPrimChar) {
1496 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1497 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1498 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1499 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1500 codegen->AddSlowPath(slow_path);
1501 __ B(hs, slow_path->GetEntryLabel());
1502 }
1503
1504 if (start_at_zero) {
1505 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1506 DCHECK(tmp_reg.Is(r2));
1507 // Start-index = 0.
1508 __ Mov(tmp_reg, 0);
1509 }
1510
1511 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1512 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1513
1514 if (slow_path != nullptr) {
1515 __ Bind(slow_path->GetExitLabel());
1516 }
1517}
1518
1519void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1520 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1521 LocationSummary::kCallOnMainAndSlowPath,
1522 kIntrinsified);
1523 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1524 // best to align the inputs accordingly.
1525 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1526 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1527 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1528 locations->SetOut(LocationFrom(r0));
1529
1530 // Need to send start-index=0.
1531 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1532}
1533
1534void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1535 GenerateVisitStringIndexOf(
1536 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1537}
1538
1539void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1540 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1541 LocationSummary::kCallOnMainAndSlowPath,
1542 kIntrinsified);
1543 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1544 // best to align the inputs accordingly.
1545 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1546 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1547 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1548 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1549 locations->SetOut(LocationFrom(r0));
1550}
1551
1552void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1553 GenerateVisitStringIndexOf(
1554 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1555}
1556
1557void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1558 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1559 LocationSummary::kCallOnMainAndSlowPath,
1560 kIntrinsified);
1561 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1562 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1563 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1564 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1565 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1566 locations->SetOut(LocationFrom(r0));
1567}
1568
1569void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1570 ArmVIXLAssembler* assembler = GetAssembler();
1571 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1572 __ Cmp(byte_array, 0);
1573 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1574 codegen_->AddSlowPath(slow_path);
1575 __ B(eq, slow_path->GetEntryLabel());
1576
1577 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1578 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1579 __ Bind(slow_path->GetExitLabel());
1580}
1581
1582void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1583 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1584 LocationSummary::kCallOnMainOnly,
1585 kIntrinsified);
1586 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1587 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1588 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1589 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1590 locations->SetOut(LocationFrom(r0));
1591}
1592
1593void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1594 // No need to emit code checking whether `locations->InAt(2)` is a null
1595 // pointer, as callers of the native method
1596 //
1597 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1598 //
1599 // all include a null check on `data` before calling that method.
1600 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1601 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1602}
1603
1604void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1605 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1606 LocationSummary::kCallOnMainAndSlowPath,
1607 kIntrinsified);
1608 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1609 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1610 locations->SetOut(LocationFrom(r0));
1611}
1612
1613void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1614 ArmVIXLAssembler* assembler = GetAssembler();
1615 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1616 __ Cmp(string_to_copy, 0);
1617 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1618 codegen_->AddSlowPath(slow_path);
1619 __ B(eq, slow_path->GetEntryLabel());
1620
1621 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1622 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1623
1624 __ Bind(slow_path->GetExitLabel());
1625}
1626
1627void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1628 // The only read barrier implementation supporting the
1629 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1630 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1631 return;
1632 }
1633
1634 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1635 LocationSummary* locations = invoke->GetLocations();
1636 if (locations == nullptr) {
1637 return;
1638 }
1639
1640 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1641 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1642 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1643
1644 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1645 locations->SetInAt(1, Location::RequiresRegister());
1646 }
1647 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1648 locations->SetInAt(3, Location::RequiresRegister());
1649 }
1650 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1651 locations->SetInAt(4, Location::RequiresRegister());
1652 }
1653 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1654 // Temporary register IP cannot be used in
1655 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1656 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1657 // temporary register from the register allocator.
1658 locations->AddTemp(Location::RequiresRegister());
1659 }
1660}
1661
1662static void CheckPosition(ArmVIXLAssembler* assembler,
1663 Location pos,
1664 vixl32::Register input,
1665 Location length,
1666 SlowPathCodeARMVIXL* slow_path,
1667 vixl32::Register temp,
1668 bool length_is_input_length = false) {
1669 // Where is the length in the Array?
1670 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1671
1672 if (pos.IsConstant()) {
1673 int32_t pos_const = Int32ConstantFrom(pos);
1674 if (pos_const == 0) {
1675 if (!length_is_input_length) {
1676 // Check that length(input) >= length.
1677 __ Ldr(temp, MemOperand(input, length_offset));
1678 if (length.IsConstant()) {
1679 __ Cmp(temp, Int32ConstantFrom(length));
1680 } else {
1681 __ Cmp(temp, RegisterFrom(length));
1682 }
1683 __ B(lt, slow_path->GetEntryLabel());
1684 }
1685 } else {
1686 // Check that length(input) >= pos.
1687 __ Ldr(temp, MemOperand(input, length_offset));
1688 __ Subs(temp, temp, pos_const);
1689 __ B(lt, slow_path->GetEntryLabel());
1690
1691 // Check that (length(input) - pos) >= length.
1692 if (length.IsConstant()) {
1693 __ Cmp(temp, Int32ConstantFrom(length));
1694 } else {
1695 __ Cmp(temp, RegisterFrom(length));
1696 }
1697 __ B(lt, slow_path->GetEntryLabel());
1698 }
1699 } else if (length_is_input_length) {
1700 // The only way the copy can succeed is if pos is zero.
1701 vixl32::Register pos_reg = RegisterFrom(pos);
1702 __ Cbnz(pos_reg, slow_path->GetEntryLabel());
1703 } else {
1704 // Check that pos >= 0.
1705 vixl32::Register pos_reg = RegisterFrom(pos);
1706 __ Cmp(pos_reg, 0);
1707 __ B(lt, slow_path->GetEntryLabel());
1708
1709 // Check that pos <= length(input).
1710 __ Ldr(temp, MemOperand(input, length_offset));
1711 __ Subs(temp, temp, pos_reg);
1712 __ B(lt, slow_path->GetEntryLabel());
1713
1714 // Check that (length(input) - pos) >= length.
1715 if (length.IsConstant()) {
1716 __ Cmp(temp, Int32ConstantFrom(length));
1717 } else {
1718 __ Cmp(temp, RegisterFrom(length));
1719 }
1720 __ B(lt, slow_path->GetEntryLabel());
1721 }
1722}
1723
1724void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1725 // The only read barrier implementation supporting the
1726 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1727 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1728
1729 ArmVIXLAssembler* assembler = GetAssembler();
1730 LocationSummary* locations = invoke->GetLocations();
1731
1732 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1733 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1734 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1735 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1736 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1737
1738 vixl32::Register src = InputRegisterAt(invoke, 0);
1739 Location src_pos = locations->InAt(1);
1740 vixl32::Register dest = InputRegisterAt(invoke, 2);
1741 Location dest_pos = locations->InAt(3);
1742 Location length = locations->InAt(4);
1743 Location temp1_loc = locations->GetTemp(0);
1744 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1745 Location temp2_loc = locations->GetTemp(1);
1746 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1747 Location temp3_loc = locations->GetTemp(2);
1748 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1749
1750 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1751 codegen_->AddSlowPath(intrinsic_slow_path);
1752
1753 vixl32::Label conditions_on_positions_validated;
1754 SystemArrayCopyOptimizations optimizations(invoke);
1755
1756 // If source and destination are the same, we go to slow path if we need to do
1757 // forward copying.
1758 if (src_pos.IsConstant()) {
1759 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1760 if (dest_pos.IsConstant()) {
1761 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1762 if (optimizations.GetDestinationIsSource()) {
1763 // Checked when building locations.
1764 DCHECK_GE(src_pos_constant, dest_pos_constant);
1765 } else if (src_pos_constant < dest_pos_constant) {
1766 __ Cmp(src, dest);
1767 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1768 }
1769
1770 // Checked when building locations.
1771 DCHECK(!optimizations.GetDestinationIsSource()
1772 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1773 } else {
1774 if (!optimizations.GetDestinationIsSource()) {
1775 __ Cmp(src, dest);
1776 __ B(ne, &conditions_on_positions_validated);
1777 }
1778 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1779 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1780 }
1781 } else {
1782 if (!optimizations.GetDestinationIsSource()) {
1783 __ Cmp(src, dest);
1784 __ B(ne, &conditions_on_positions_validated);
1785 }
1786 if (dest_pos.IsConstant()) {
1787 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1788 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1789 } else {
1790 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1791 }
1792 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1793 }
1794
1795 __ Bind(&conditions_on_positions_validated);
1796
1797 if (!optimizations.GetSourceIsNotNull()) {
1798 // Bail out if the source is null.
1799 __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
1800 }
1801
1802 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1803 // Bail out if the destination is null.
1804 __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
1805 }
1806
1807 // If the length is negative, bail out.
1808 // We have already checked in the LocationsBuilder for the constant case.
1809 if (!length.IsConstant() &&
1810 !optimizations.GetCountIsSourceLength() &&
1811 !optimizations.GetCountIsDestinationLength()) {
1812 __ Cmp(RegisterFrom(length), 0);
1813 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1814 }
1815
1816 // Validity checks: source.
1817 CheckPosition(assembler,
1818 src_pos,
1819 src,
1820 length,
1821 intrinsic_slow_path,
1822 temp1,
1823 optimizations.GetCountIsSourceLength());
1824
1825 // Validity checks: dest.
1826 CheckPosition(assembler,
1827 dest_pos,
1828 dest,
1829 length,
1830 intrinsic_slow_path,
1831 temp1,
1832 optimizations.GetCountIsDestinationLength());
1833
1834 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1835 // Check whether all elements of the source array are assignable to the component
1836 // type of the destination array. We do two checks: the classes are the same,
1837 // or the destination is Object[]. If none of these checks succeed, we go to the
1838 // slow path.
1839
1840 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1841 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1842 // /* HeapReference<Class> */ temp1 = src->klass_
1843 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1844 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1845 // Bail out if the source is not a non primitive array.
1846 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1847 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1848 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1849 __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
1850 // If heap poisoning is enabled, `temp1` has been unpoisoned
1851 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1852 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1853 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1854 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1855 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
1856 }
1857
1858 // /* HeapReference<Class> */ temp1 = dest->klass_
1859 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1860 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
1861
1862 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1863 // Bail out if the destination is not a non primitive array.
1864 //
1865 // Register `temp1` is not trashed by the read barrier emitted
1866 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1867 // method produces a call to a ReadBarrierMarkRegX entry point,
1868 // which saves all potentially live registers, including
1869 // temporaries such a `temp1`.
1870 // /* HeapReference<Class> */ temp2 = temp1->component_type_
1871 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1872 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
1873 __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
1874 // If heap poisoning is enabled, `temp2` has been unpoisoned
1875 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1876 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1877 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1878 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1879 __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
1880 }
1881
1882 // For the same reason given earlier, `temp1` is not trashed by the
1883 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1884 // /* HeapReference<Class> */ temp2 = src->klass_
1885 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1886 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
1887 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1888 __ Cmp(temp1, temp2);
1889
1890 if (optimizations.GetDestinationIsTypedObjectArray()) {
1891 vixl32::Label do_copy;
1892 __ B(eq, &do_copy);
1893 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1894 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1895 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1896 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1897 // We do not need to emit a read barrier for the following
1898 // heap reference load, as `temp1` is only used in a
1899 // comparison with null below, and this reference is not
1900 // kept afterwards.
1901 __ Ldr(temp1, MemOperand(temp1, super_offset));
1902 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
1903 __ Bind(&do_copy);
1904 } else {
1905 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1906 }
1907 } else {
1908 // Non read barrier code.
1909
1910 // /* HeapReference<Class> */ temp1 = dest->klass_
1911 __ Ldr(temp1, MemOperand(dest, class_offset));
1912 // /* HeapReference<Class> */ temp2 = src->klass_
1913 __ Ldr(temp2, MemOperand(src, class_offset));
1914 bool did_unpoison = false;
1915 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1916 !optimizations.GetSourceIsNonPrimitiveArray()) {
1917 // One or two of the references need to be unpoisoned. Unpoison them
1918 // both to make the identity check valid.
1919 assembler->MaybeUnpoisonHeapReference(temp1);
1920 assembler->MaybeUnpoisonHeapReference(temp2);
1921 did_unpoison = true;
1922 }
1923
1924 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1925 // Bail out if the destination is not a non primitive array.
1926 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1927 __ Ldr(temp3, MemOperand(temp1, component_offset));
1928 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
1929 assembler->MaybeUnpoisonHeapReference(temp3);
1930 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1931 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1932 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1933 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
1934 }
1935
1936 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1937 // Bail out if the source is not a non primitive array.
1938 // /* HeapReference<Class> */ temp3 = temp2->component_type_
1939 __ Ldr(temp3, MemOperand(temp2, component_offset));
1940 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
1941 assembler->MaybeUnpoisonHeapReference(temp3);
1942 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1943 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1944 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1945 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
1946 }
1947
1948 __ Cmp(temp1, temp2);
1949
1950 if (optimizations.GetDestinationIsTypedObjectArray()) {
1951 vixl32::Label do_copy;
1952 __ B(eq, &do_copy);
1953 if (!did_unpoison) {
1954 assembler->MaybeUnpoisonHeapReference(temp1);
1955 }
1956 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1957 __ Ldr(temp1, MemOperand(temp1, component_offset));
1958 assembler->MaybeUnpoisonHeapReference(temp1);
1959 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1960 __ Ldr(temp1, MemOperand(temp1, super_offset));
1961 // No need to unpoison the result, we're comparing against null.
1962 __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
1963 __ Bind(&do_copy);
1964 } else {
1965 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1966 }
1967 }
1968 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1969 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1970 // Bail out if the source is not a non primitive array.
1971 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1972 // /* HeapReference<Class> */ temp1 = src->klass_
1973 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1974 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1975 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1976 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1977 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1978 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
1979 // If heap poisoning is enabled, `temp3` has been unpoisoned
1980 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1981 } else {
1982 // /* HeapReference<Class> */ temp1 = src->klass_
1983 __ Ldr(temp1, MemOperand(src, class_offset));
1984 assembler->MaybeUnpoisonHeapReference(temp1);
1985 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1986 __ Ldr(temp3, MemOperand(temp1, component_offset));
1987 __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
1988 assembler->MaybeUnpoisonHeapReference(temp3);
1989 }
1990 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1991 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1992 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
1993 __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
1994 }
1995
1996 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
1997 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
1998 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
1999
2000 // Compute the base source address in `temp1`.
2001 if (src_pos.IsConstant()) {
2002 int32_t constant = Int32ConstantFrom(src_pos);
2003 __ Add(temp1, src, element_size * constant + offset);
2004 } else {
2005 __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
2006 __ Add(temp1, temp1, offset);
2007 }
2008
2009 // Compute the end source address in `temp3`.
2010 if (length.IsConstant()) {
2011 int32_t constant = Int32ConstantFrom(length);
2012 __ Add(temp3, temp1, element_size * constant);
2013 } else {
2014 __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
2015 }
2016
2017 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2018 // The base destination address is computed later, as `temp2` is
2019 // used for intermediate computations.
2020
2021 // SystemArrayCopy implementation for Baker read barriers (see
2022 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2023 //
2024 // if (src_ptr != end_ptr) {
2025 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2026 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
2027 // bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
2028 // if (is_gray) {
2029 // // Slow-path copy.
2030 // do {
2031 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2032 // } while (src_ptr != end_ptr)
2033 // } else {
2034 // // Fast-path copy.
2035 // do {
2036 // *dest_ptr++ = *src_ptr++;
2037 // } while (src_ptr != end_ptr)
2038 // }
2039 // }
2040
2041 vixl32::Label loop, done;
2042
2043 // Don't enter copy loop if `length == 0`.
2044 __ Cmp(temp1, temp3);
2045 __ B(eq, &done);
2046
2047 // /* int32_t */ monitor = src->monitor_
2048 __ Ldr(temp2, MemOperand(src, monitor_offset));
2049 // /* LockWord */ lock_word = LockWord(monitor)
2050 static_assert(sizeof(LockWord) == sizeof(int32_t),
2051 "art::LockWord and int32_t have different sizes.");
2052
2053 // Introduce a dependency on the lock_word including the rb_state,
2054 // which shall prevent load-load reordering without using
2055 // a memory barrier (which would be more expensive).
2056 // `src` is unchanged by this operation, but its value now depends
2057 // on `temp2`.
2058 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2059
2060 // Slow path used to copy array when `src` is gray.
2061 SlowPathCodeARMVIXL* read_barrier_slow_path =
2062 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2063 codegen_->AddSlowPath(read_barrier_slow_path);
2064
2065 // Given the numeric representation, it's enough to check the low bit of the
2066 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2067 // which can be a 16-bit instruction unlike the TST immediate.
2068 static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
2069 static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
2070 static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
2071 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2072 // Carry flag is the last bit shifted out by LSRS.
2073 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2074
2075 // Fast-path copy.
2076
2077 // Compute the base destination address in `temp2`.
2078 if (dest_pos.IsConstant()) {
2079 int32_t constant = Int32ConstantFrom(dest_pos);
2080 __ Add(temp2, dest, element_size * constant + offset);
2081 } else {
2082 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2083 __ Add(temp2, temp2, offset);
2084 }
2085
2086 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2087 // poison/unpoison.
2088 __ Bind(&loop);
2089
2090 {
2091 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2092 const vixl32::Register temp_reg = temps.Acquire();
2093
2094 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2095 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2096 }
2097
2098 __ Cmp(temp1, temp3);
2099 __ B(ne, &loop);
2100
2101 __ Bind(read_barrier_slow_path->GetExitLabel());
2102 __ Bind(&done);
2103 } else {
2104 // Non read barrier code.
2105
2106 // Compute the base destination address in `temp2`.
2107 if (dest_pos.IsConstant()) {
2108 int32_t constant = Int32ConstantFrom(dest_pos);
2109 __ Add(temp2, dest, element_size * constant + offset);
2110 } else {
2111 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2112 __ Add(temp2, temp2, offset);
2113 }
2114
2115 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2116 // poison/unpoison.
2117 vixl32::Label loop, done;
2118 __ Cmp(temp1, temp3);
2119 __ B(eq, &done);
2120 __ Bind(&loop);
2121
2122 {
2123 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2124 const vixl32::Register temp_reg = temps.Acquire();
2125
2126 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2127 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2128 }
2129
2130 __ Cmp(temp1, temp3);
2131 __ B(ne, &loop);
2132 __ Bind(&done);
2133 }
2134
2135 // We only need one card marking on the destination array.
2136 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2137
2138 __ Bind(intrinsic_slow_path->GetExitLabel());
2139}
2140
2141static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2142 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2143 // the code generator. Furthermore, the register allocator creates fixed live intervals
2144 // for all caller-saved registers because we are doing a function call. As a result, if
2145 // the input and output locations are unallocated, the register allocator runs out of
2146 // registers and fails; however, a debuggable graph is not the common case.
2147 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2148 return;
2149 }
2150
2151 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2152 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2153 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2154
2155 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2156 LocationSummary::kCallOnMainOnly,
2157 kIntrinsified);
2158 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2159
2160 locations->SetInAt(0, Location::RequiresFpuRegister());
2161 locations->SetOut(Location::RequiresFpuRegister());
2162 // Native code uses the soft float ABI.
2163 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2164 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2165}
2166
2167static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2168 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2169 // the code generator. Furthermore, the register allocator creates fixed live intervals
2170 // for all caller-saved registers because we are doing a function call. As a result, if
2171 // the input and output locations are unallocated, the register allocator runs out of
2172 // registers and fails; however, a debuggable graph is not the common case.
2173 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2174 return;
2175 }
2176
2177 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2178 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2179 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2180 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2181
2182 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2183 LocationSummary::kCallOnMainOnly,
2184 kIntrinsified);
2185 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2186
2187 locations->SetInAt(0, Location::RequiresFpuRegister());
2188 locations->SetInAt(1, Location::RequiresFpuRegister());
2189 locations->SetOut(Location::RequiresFpuRegister());
2190 // Native code uses the soft float ABI.
2191 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2192 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2193 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2194 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2195}
2196
2197static void GenFPToFPCall(HInvoke* invoke,
2198 ArmVIXLAssembler* assembler,
2199 CodeGeneratorARMVIXL* codegen,
2200 QuickEntrypointEnum entry) {
2201 LocationSummary* const locations = invoke->GetLocations();
2202
2203 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2204 DCHECK(locations->WillCall() && locations->Intrinsified());
2205
2206 // Native code uses the soft float ABI.
2207 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2208 RegisterFrom(locations->GetTemp(1)),
2209 InputDRegisterAt(invoke, 0));
2210 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2211 __ Vmov(OutputDRegister(invoke),
2212 RegisterFrom(locations->GetTemp(0)),
2213 RegisterFrom(locations->GetTemp(1)));
2214}
2215
2216static void GenFPFPToFPCall(HInvoke* invoke,
2217 ArmVIXLAssembler* assembler,
2218 CodeGeneratorARMVIXL* codegen,
2219 QuickEntrypointEnum entry) {
2220 LocationSummary* const locations = invoke->GetLocations();
2221
2222 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2223 DCHECK(locations->WillCall() && locations->Intrinsified());
2224
2225 // Native code uses the soft float ABI.
2226 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2227 RegisterFrom(locations->GetTemp(1)),
2228 InputDRegisterAt(invoke, 0));
2229 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2230 RegisterFrom(locations->GetTemp(3)),
2231 InputDRegisterAt(invoke, 1));
2232 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2233 __ Vmov(OutputDRegister(invoke),
2234 RegisterFrom(locations->GetTemp(0)),
2235 RegisterFrom(locations->GetTemp(1)));
2236}
2237
2238void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2239 CreateFPToFPCallLocations(arena_, invoke);
2240}
2241
2242void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2243 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2244}
2245
2246void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2247 CreateFPToFPCallLocations(arena_, invoke);
2248}
2249
2250void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2251 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2252}
2253
2254void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2255 CreateFPToFPCallLocations(arena_, invoke);
2256}
2257
2258void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2259 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2260}
2261
2262void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2263 CreateFPToFPCallLocations(arena_, invoke);
2264}
2265
2266void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2267 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2268}
2269
2270void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2271 CreateFPToFPCallLocations(arena_, invoke);
2272}
2273
2274void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2275 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2276}
2277
2278void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2279 CreateFPToFPCallLocations(arena_, invoke);
2280}
2281
2282void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2283 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2284}
2285
2286void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2287 CreateFPToFPCallLocations(arena_, invoke);
2288}
2289
2290void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2291 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2292}
2293
2294void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2295 CreateFPToFPCallLocations(arena_, invoke);
2296}
2297
2298void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2299 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2300}
2301
2302void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2303 CreateFPToFPCallLocations(arena_, invoke);
2304}
2305
2306void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2307 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2308}
2309
2310void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2311 CreateFPToFPCallLocations(arena_, invoke);
2312}
2313
2314void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2315 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2316}
2317
2318void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2319 CreateFPToFPCallLocations(arena_, invoke);
2320}
2321
2322void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2323 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2324}
2325
2326void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2327 CreateFPToFPCallLocations(arena_, invoke);
2328}
2329
2330void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2331 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2332}
2333
2334void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2335 CreateFPToFPCallLocations(arena_, invoke);
2336}
2337
2338void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2339 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2340}
2341
2342void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2343 CreateFPToFPCallLocations(arena_, invoke);
2344}
2345
2346void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2347 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2348}
2349
2350void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2351 CreateFPFPToFPCallLocations(arena_, invoke);
2352}
2353
2354void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2355 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2356}
2357
2358void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2359 CreateFPFPToFPCallLocations(arena_, invoke);
2360}
2361
2362void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2363 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2364}
2365
2366void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2367 CreateFPFPToFPCallLocations(arena_, invoke);
2368}
2369
2370void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2371 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2372}
2373
2374void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2375 CreateIntToIntLocations(arena_, invoke);
2376}
2377
2378void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2379 ArmVIXLAssembler* assembler = GetAssembler();
2380 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2381}
2382
2383void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2384 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2385 LocationSummary::kNoCall,
2386 kIntrinsified);
2387 locations->SetInAt(0, Location::RequiresRegister());
2388 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2389}
2390
2391void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2392 ArmVIXLAssembler* assembler = GetAssembler();
2393 LocationSummary* locations = invoke->GetLocations();
2394
2395 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2396 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2397 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2398 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2399
2400 __ Rbit(out_reg_lo, in_reg_hi);
2401 __ Rbit(out_reg_hi, in_reg_lo);
2402}
2403
2404void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2405 CreateIntToIntLocations(arena_, invoke);
2406}
2407
2408void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2409 ArmVIXLAssembler* assembler = GetAssembler();
2410 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2411}
2412
2413void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2414 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2415 LocationSummary::kNoCall,
2416 kIntrinsified);
2417 locations->SetInAt(0, Location::RequiresRegister());
2418 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2419}
2420
2421void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2422 ArmVIXLAssembler* assembler = GetAssembler();
2423 LocationSummary* locations = invoke->GetLocations();
2424
2425 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2426 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2427 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2428 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2429
2430 __ Rev(out_reg_lo, in_reg_hi);
2431 __ Rev(out_reg_hi, in_reg_lo);
2432}
2433
2434void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2435 CreateIntToIntLocations(arena_, invoke);
2436}
2437
2438void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2439 ArmVIXLAssembler* assembler = GetAssembler();
2440 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2441}
2442
2443static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
2444 DCHECK(Primitive::IsIntOrLongType(type)) << type;
2445 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2446 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2447
2448 bool is_long = type == Primitive::kPrimLong;
2449 LocationSummary* locations = instr->GetLocations();
2450 Location in = locations->InAt(0);
2451 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2452 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2453 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2454 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2455 vixl32::Register out_r = OutputRegister(instr);
2456
2457 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2458 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2459 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2460 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2461 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2462 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2463 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2464 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2465 if (is_long) {
2466 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2467 }
2468 __ Vmov(out_r, tmp_s);
2469}
2470
2471void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2472 CreateIntToIntLocations(arena_, invoke);
2473 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2474}
2475
2476void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2477 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2478}
2479
2480void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2481 VisitIntegerBitCount(invoke);
2482}
2483
2484void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2485 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2486}
2487
2488void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2489 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2490 LocationSummary::kNoCall,
2491 kIntrinsified);
2492 locations->SetInAt(0, Location::RequiresRegister());
2493 locations->SetInAt(1, Location::RequiresRegister());
2494 locations->SetInAt(2, Location::RequiresRegister());
2495 locations->SetInAt(3, Location::RequiresRegister());
2496 locations->SetInAt(4, Location::RequiresRegister());
2497
2498 // Temporary registers to store lengths of strings and for calculations.
2499 locations->AddTemp(Location::RequiresRegister());
2500 locations->AddTemp(Location::RequiresRegister());
2501 locations->AddTemp(Location::RequiresRegister());
2502}
2503
2504void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2505 ArmVIXLAssembler* assembler = GetAssembler();
2506 LocationSummary* locations = invoke->GetLocations();
2507
2508 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2509 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2510 DCHECK_EQ(char_size, 2u);
2511
2512 // Location of data in char array buffer.
2513 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2514
2515 // Location of char array data in string.
2516 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2517
2518 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2519 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2520 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2521 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2522 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2523 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2524 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2525
2526 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2527 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2528 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2529
2530 vixl32::Label done, compressed_string_loop;
2531 // dst to be copied.
2532 __ Add(dst_ptr, dstObj, data_offset);
2533 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2534
2535 __ Subs(num_chr, srcEnd, srcBegin);
2536 // Early out for valid zero-length retrievals.
2537 __ B(eq, &done);
2538
2539 // src range to copy.
2540 __ Add(src_ptr, srcObj, value_offset);
2541
2542 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2543 vixl32::Register temp;
2544 vixl32::Label compressed_string_preloop;
2545 if (mirror::kUseStringCompression) {
2546 // Location of count in string.
2547 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2548 temp = temps.Acquire();
2549 // String's length.
2550 __ Ldr(temp, MemOperand(srcObj, count_offset));
2551 __ Cmp(temp, 0);
2552 temps.Release(temp);
2553 __ B(lt, &compressed_string_preloop);
2554 }
2555 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2556
2557 // Do the copy.
2558 vixl32::Label loop, remainder;
2559
2560 temp = temps.Acquire();
2561 // Save repairing the value of num_chr on the < 4 character path.
2562 __ Subs(temp, num_chr, 4);
2563 __ B(lt, &remainder);
2564
2565 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2566 __ Mov(num_chr, temp);
2567
2568 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2569 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2570 // to rectify these everywhere this intrinsic applies.)
2571 __ Bind(&loop);
2572 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2573 __ Subs(num_chr, num_chr, 4);
2574 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2575 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2576 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2577 temps.Release(temp);
2578 __ B(ge, &loop);
2579
2580 __ Adds(num_chr, num_chr, 4);
2581 __ B(eq, &done);
2582
2583 // Main loop for < 4 character case and remainder handling. Loads and stores one
2584 // 16-bit Java character at a time.
2585 __ Bind(&remainder);
2586 temp = temps.Acquire();
2587 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2588 __ Subs(num_chr, num_chr, 1);
2589 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2590 temps.Release(temp);
2591 __ B(gt, &remainder);
2592 __ B(&done);
2593
2594 if (mirror::kUseStringCompression) {
2595 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2596 DCHECK_EQ(c_char_size, 1u);
2597 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2598 __ Bind(&compressed_string_preloop);
2599 __ Add(src_ptr, src_ptr, srcBegin);
2600 __ Bind(&compressed_string_loop);
2601 temp = temps.Acquire();
2602 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2603 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2604 temps.Release(temp);
2605 __ Subs(num_chr, num_chr, 1);
2606 __ B(gt, &compressed_string_loop);
2607 }
2608
2609 __ Bind(&done);
2610}
2611
2612void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2613 CreateFPToIntLocations(arena_, invoke);
2614}
2615
2616void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2617 ArmVIXLAssembler* const assembler = GetAssembler();
2618 const vixl32::Register out = OutputRegister(invoke);
2619 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2620 // we don't care about the sign bit anyway.
2621 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2622
2623 __ Vmov(out, InputSRegisterAt(invoke, 0));
2624 // We don't care about the sign bit, so shift left.
2625 __ Lsl(out, out, 1);
2626 __ Eor(out, out, infinity);
2627 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2628 __ Clz(out, out);
2629 // Any number less than 32 logically shifted right by 5 bits results in 0;
2630 // the same operation on 32 yields 1.
2631 __ Lsr(out, out, 5);
2632}
2633
2634void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2635 CreateFPToIntLocations(arena_, invoke);
2636}
2637
2638void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2639 ArmVIXLAssembler* const assembler = GetAssembler();
2640 const vixl32::Register out = OutputRegister(invoke);
2641 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2642 const vixl32::Register temp = temps.Acquire();
2643 // The highest 32 bits of double precision positive infinity separated into
2644 // two constants encodable as immediate operands.
2645 constexpr uint32_t infinity_high = 0x7f000000U;
2646 constexpr uint32_t infinity_high2 = 0x00f00000U;
2647
2648 static_assert((infinity_high | infinity_high2) ==
2649 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2650 "The constants do not add up to the high 32 bits of double "
2651 "precision positive infinity.");
2652 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2653 __ Eor(out, out, infinity_high);
2654 __ Eor(out, out, infinity_high2);
2655 // We don't care about the sign bit, so shift left.
2656 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2657 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2658 __ Clz(out, out);
2659 // Any number less than 32 logically shifted right by 5 bits results in 0;
2660 // the same operation on 32 yields 1.
2661 __ Lsr(out, out, 5);
2662}
2663
2664UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
2665UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
2666UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
2667UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
2668UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
2669UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
2670UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil) // Could be done by changing rounding mode, maybe?
2671UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor) // Could be done by changing rounding mode, maybe?
2672UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
2673UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
2674UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
2675UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
2676UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
2677UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
2678UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
2679UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
2680UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
2681UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
2682
2683// 1.8.
2684UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
2685UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
2686UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
2687UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
2688UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
2689
2690UNREACHABLE_INTRINSICS(ARMVIXL)
2691
2692#undef __
2693
2694} // namespace arm
2695} // namespace art