blob: 95551c8fd93b2025e688382f19c3fb65a2f3492f [file] [log] [blame]
Anton Kirilov5ec62182016-10-13 20:16:02 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm_vixl.h"
18
19#include "arch/arm/instruction_set_features_arm.h"
20#include "code_generator_arm_vixl.h"
21#include "common_arm.h"
22#include "lock_word.h"
23#include "mirror/array-inl.h"
24
25#include "aarch32/constants-aarch32.h"
26
27namespace art {
28namespace arm {
29
30#define __ assembler->GetVIXLAssembler()->
31
32using helpers::DRegisterFrom;
33using helpers::HighRegisterFrom;
34using helpers::InputDRegisterAt;
35using helpers::InputRegisterAt;
36using helpers::InputSRegisterAt;
37using helpers::InputVRegisterAt;
38using helpers::Int32ConstantFrom;
39using helpers::LocationFrom;
40using helpers::LowRegisterFrom;
41using helpers::LowSRegisterFrom;
42using helpers::OutputDRegister;
43using helpers::OutputRegister;
44using helpers::OutputVRegister;
45using helpers::RegisterFrom;
46using helpers::SRegisterFrom;
47
48using namespace vixl::aarch32; // NOLINT(build/namespaces)
49
Artem Serov0fb37192016-12-06 18:13:40 +000050using vixl::ExactAssemblyScope;
51using vixl::CodeBufferCheckScope;
52
Anton Kirilov5ec62182016-10-13 20:16:02 +010053ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
54 return codegen_->GetAssembler();
55}
56
57ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
58 return codegen_->GetGraph()->GetArena();
59}
60
61// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
62// intrinsified call. This will copy the arguments into the positions for a regular call.
63//
64// Note: The actual parameters are required to be in the locations given by the invoke's location
65// summary. If an intrinsic modifies those locations before a slowpath call, they must be
66// restored!
67//
68// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
69// sub-optimal (compared to a direct pointer call), but this is a slow-path.
70
71class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
72 public:
73 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
74 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
75
76 Location MoveArguments(CodeGenerator* codegen) {
Artem Serovd4cc5b22016-11-04 11:19:09 +000077 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
Anton Kirilov5ec62182016-10-13 20:16:02 +010078 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
79 return calling_convention_visitor.GetMethodLocation();
80 }
81
82 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
83 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
84 __ Bind(GetEntryLabel());
85
86 SaveLiveRegisters(codegen, invoke_->GetLocations());
87
88 Location method_loc = MoveArguments(codegen);
89
90 if (invoke_->IsInvokeStaticOrDirect()) {
91 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
92 } else {
93 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
94 }
95 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
96
97 // Copy the result back to the expected output.
98 Location out = invoke_->GetLocations()->Out();
99 if (out.IsValid()) {
100 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
101 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
102 codegen->MoveFromReturnRegister(out, invoke_->GetType());
103 }
104
105 RestoreLiveRegisters(codegen, invoke_->GetLocations());
106 __ B(GetExitLabel());
107 }
108
109 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
110
111 private:
112 // The instruction where this slow path is happening.
113 HInvoke* const invoke_;
114
115 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
116};
117
118// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
119class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
120 public:
121 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
122 : SlowPathCodeARMVIXL(instruction) {
123 DCHECK(kEmitCompilerReadBarrier);
124 DCHECK(kUseBakerReadBarrier);
125 }
126
127 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
128 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
129 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
130 LocationSummary* locations = instruction_->GetLocations();
131 DCHECK(locations->CanCall());
132 DCHECK(instruction_->IsInvokeStaticOrDirect())
133 << "Unexpected instruction in read barrier arraycopy slow path: "
134 << instruction_->DebugName();
135 DCHECK(instruction_->GetLocations()->Intrinsified());
136 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
137
138 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
139 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
140 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
141
142 vixl32::Register dest = InputRegisterAt(instruction_, 2);
143 Location dest_pos = locations->InAt(3);
144 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
145 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
146 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
147 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
148
149 __ Bind(GetEntryLabel());
150 // Compute the base destination address in `dst_curr_addr`.
151 if (dest_pos.IsConstant()) {
152 int32_t constant = Int32ConstantFrom(dest_pos);
153 __ Add(dst_curr_addr, dest, element_size * constant + offset);
154 } else {
155 __ Add(dst_curr_addr,
156 dest,
157 Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
158 __ Add(dst_curr_addr, dst_curr_addr, offset);
159 }
160
161 vixl32::Label loop;
162 __ Bind(&loop);
163 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
164 assembler->MaybeUnpoisonHeapReference(tmp);
165 // TODO: Inline the mark bit check before calling the runtime?
166 // tmp = ReadBarrier::Mark(tmp);
167 // No need to save live registers; it's taken care of by the
168 // entrypoint. Also, there is no need to update the stack mask,
169 // as this runtime call will not trigger a garbage collection.
170 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
171 // explanations.)
172 DCHECK(!tmp.IsSP());
173 DCHECK(!tmp.IsLR());
174 DCHECK(!tmp.IsPC());
175 // IP is used internally by the ReadBarrierMarkRegX entry point
176 // as a temporary (and not preserved). It thus cannot be used by
177 // any live register in this slow path.
178 DCHECK(!src_curr_addr.Is(ip));
179 DCHECK(!dst_curr_addr.Is(ip));
180 DCHECK(!src_stop_addr.Is(ip));
181 DCHECK(!tmp.Is(ip));
182 DCHECK(tmp.IsRegister()) << tmp;
183 int32_t entry_point_offset =
184 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
185 // This runtime call does not require a stack map.
186 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
187 assembler->MaybePoisonHeapReference(tmp);
188 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
189 __ Cmp(src_curr_addr, src_stop_addr);
190 __ B(ne, &loop);
191 __ B(GetExitLabel());
192 }
193
194 const char* GetDescription() const OVERRIDE {
195 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
196 }
197
198 private:
199 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
200};
201
202IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
203 : arena_(codegen->GetGraph()->GetArena()),
204 assembler_(codegen->GetAssembler()),
205 features_(codegen->GetInstructionSetFeatures()) {}
206
207bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
208 Dispatch(invoke);
209 LocationSummary* res = invoke->GetLocations();
210 if (res == nullptr) {
211 return false;
212 }
213 return res->Intrinsified();
214}
215
216static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
217 LocationSummary* locations = new (arena) LocationSummary(invoke,
218 LocationSummary::kNoCall,
219 kIntrinsified);
220 locations->SetInAt(0, Location::RequiresFpuRegister());
221 locations->SetOut(Location::RequiresRegister());
222}
223
224static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
225 LocationSummary* locations = new (arena) LocationSummary(invoke,
226 LocationSummary::kNoCall,
227 kIntrinsified);
228 locations->SetInAt(0, Location::RequiresRegister());
229 locations->SetOut(Location::RequiresFpuRegister());
230}
231
232static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
233 Location input = locations->InAt(0);
234 Location output = locations->Out();
235 if (is64bit) {
236 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
237 } else {
238 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
239 }
240}
241
242static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
243 Location input = locations->InAt(0);
244 Location output = locations->Out();
245 if (is64bit) {
246 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
247 } else {
248 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
249 }
250}
251
252void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
253 CreateFPToIntLocations(arena_, invoke);
254}
255void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
256 CreateIntToFPLocations(arena_, invoke);
257}
258
259void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
260 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
261}
262void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
263 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
264}
265
266void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
267 CreateFPToIntLocations(arena_, invoke);
268}
269void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
270 CreateIntToFPLocations(arena_, invoke);
271}
272
273void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
274 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
275}
276void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
277 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
278}
279
280static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
281 LocationSummary* locations = new (arena) LocationSummary(invoke,
282 LocationSummary::kNoCall,
283 kIntrinsified);
284 locations->SetInAt(0, Location::RequiresRegister());
285 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
286}
287
288static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
289 LocationSummary* locations = new (arena) LocationSummary(invoke,
290 LocationSummary::kNoCall,
291 kIntrinsified);
292 locations->SetInAt(0, Location::RequiresFpuRegister());
293 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
294}
295
296static void GenNumberOfLeadingZeros(LocationSummary* locations,
297 Primitive::Type type,
298 ArmVIXLAssembler* assembler) {
299 Location in = locations->InAt(0);
300 vixl32::Register out = RegisterFrom(locations->Out());
301
302 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
303
304 if (type == Primitive::kPrimLong) {
305 vixl32::Register in_reg_lo = LowRegisterFrom(in);
306 vixl32::Register in_reg_hi = HighRegisterFrom(in);
307 vixl32::Label end;
308 __ Clz(out, in_reg_hi);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000309 __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100310 __ Clz(out, in_reg_lo);
311 __ Add(out, out, 32);
312 __ Bind(&end);
313 } else {
314 __ Clz(out, RegisterFrom(in));
315 }
316}
317
318void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
319 CreateIntToIntLocations(arena_, invoke);
320}
321
322void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
323 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
324}
325
326void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
327 LocationSummary* locations = new (arena_) LocationSummary(invoke,
328 LocationSummary::kNoCall,
329 kIntrinsified);
330 locations->SetInAt(0, Location::RequiresRegister());
331 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
332}
333
334void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
335 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
336}
337
338static void GenNumberOfTrailingZeros(LocationSummary* locations,
339 Primitive::Type type,
340 ArmVIXLAssembler* assembler) {
341 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
342
343 vixl32::Register out = RegisterFrom(locations->Out());
344
345 if (type == Primitive::kPrimLong) {
346 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
347 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
348 vixl32::Label end;
349 __ Rbit(out, in_reg_lo);
350 __ Clz(out, out);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000351 __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100352 __ Rbit(out, in_reg_hi);
353 __ Clz(out, out);
354 __ Add(out, out, 32);
355 __ Bind(&end);
356 } else {
357 vixl32::Register in = RegisterFrom(locations->InAt(0));
358 __ Rbit(out, in);
359 __ Clz(out, out);
360 }
361}
362
363void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
364 LocationSummary* locations = new (arena_) LocationSummary(invoke,
365 LocationSummary::kNoCall,
366 kIntrinsified);
367 locations->SetInAt(0, Location::RequiresRegister());
368 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
369}
370
371void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
372 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
373}
374
375void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
376 LocationSummary* locations = new (arena_) LocationSummary(invoke,
377 LocationSummary::kNoCall,
378 kIntrinsified);
379 locations->SetInAt(0, Location::RequiresRegister());
380 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
381}
382
383void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
384 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
385}
386
387static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
388 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
389}
390
391void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
392 CreateFPToFPLocations(arena_, invoke);
393}
394
395void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
396 MathAbsFP(invoke, GetAssembler());
397}
398
399void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
400 CreateFPToFPLocations(arena_, invoke);
401}
402
403void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
404 MathAbsFP(invoke, GetAssembler());
405}
406
407static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
408 LocationSummary* locations = new (arena) LocationSummary(invoke,
409 LocationSummary::kNoCall,
410 kIntrinsified);
411 locations->SetInAt(0, Location::RequiresRegister());
412 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
413
414 locations->AddTemp(Location::RequiresRegister());
415}
416
417static void GenAbsInteger(LocationSummary* locations,
418 bool is64bit,
419 ArmVIXLAssembler* assembler) {
420 Location in = locations->InAt(0);
421 Location output = locations->Out();
422
423 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
424
425 if (is64bit) {
426 vixl32::Register in_reg_lo = LowRegisterFrom(in);
427 vixl32::Register in_reg_hi = HighRegisterFrom(in);
428 vixl32::Register out_reg_lo = LowRegisterFrom(output);
429 vixl32::Register out_reg_hi = HighRegisterFrom(output);
430
431 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
432
433 __ Asr(mask, in_reg_hi, 31);
434 __ Adds(out_reg_lo, in_reg_lo, mask);
435 __ Adc(out_reg_hi, in_reg_hi, mask);
436 __ Eor(out_reg_lo, mask, out_reg_lo);
437 __ Eor(out_reg_hi, mask, out_reg_hi);
438 } else {
439 vixl32::Register in_reg = RegisterFrom(in);
440 vixl32::Register out_reg = RegisterFrom(output);
441
442 __ Asr(mask, in_reg, 31);
443 __ Add(out_reg, in_reg, mask);
444 __ Eor(out_reg, mask, out_reg);
445 }
446}
447
448void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
449 CreateIntToIntPlusTemp(arena_, invoke);
450}
451
452void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
453 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
454}
455
456
457void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
458 CreateIntToIntPlusTemp(arena_, invoke);
459}
460
461void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
462 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
463}
464
465static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
466 vixl32::Register op1 = InputRegisterAt(invoke, 0);
467 vixl32::Register op2 = InputRegisterAt(invoke, 1);
468 vixl32::Register out = OutputRegister(invoke);
469
470 __ Cmp(op1, op2);
471
472 {
Artem Serov0fb37192016-12-06 18:13:40 +0000473 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
474 3 * kMaxInstructionSizeInBytes,
475 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100476
477 __ ite(is_min ? lt : gt);
478 __ mov(is_min ? lt : gt, out, op1);
479 __ mov(is_min ? ge : le, out, op2);
480 }
481}
482
483static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
484 LocationSummary* locations = new (arena) LocationSummary(invoke,
485 LocationSummary::kNoCall,
486 kIntrinsified);
487 locations->SetInAt(0, Location::RequiresRegister());
488 locations->SetInAt(1, Location::RequiresRegister());
489 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
490}
491
492void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
493 CreateIntIntToIntLocations(arena_, invoke);
494}
495
496void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
497 GenMinMax(invoke, /* is_min */ true, GetAssembler());
498}
499
500void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
501 CreateIntIntToIntLocations(arena_, invoke);
502}
503
504void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
505 GenMinMax(invoke, /* is_min */ false, GetAssembler());
506}
507
508void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
509 CreateFPToFPLocations(arena_, invoke);
510}
511
512void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
513 ArmVIXLAssembler* assembler = GetAssembler();
514 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
515}
516
517void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
518 CreateIntToIntLocations(arena_, invoke);
519}
520
521void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
522 ArmVIXLAssembler* assembler = GetAssembler();
523 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000524 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100525}
526
527void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
528 CreateIntToIntLocations(arena_, invoke);
529}
530
531void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
532 ArmVIXLAssembler* assembler = GetAssembler();
533 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000534 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100535}
536
537void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
538 CreateIntToIntLocations(arena_, invoke);
539}
540
541void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
542 ArmVIXLAssembler* assembler = GetAssembler();
543 // Ignore upper 4B of long address.
544 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
545 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
546 // exception. So we can't use ldrd as addr may be unaligned.
547 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
548 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
549 if (addr.Is(lo)) {
550 __ Ldr(hi, MemOperand(addr, 4));
Scott Wakelingb77051e2016-11-21 19:46:00 +0000551 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100552 } else {
Scott Wakelingb77051e2016-11-21 19:46:00 +0000553 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100554 __ Ldr(hi, MemOperand(addr, 4));
555 }
556}
557
558void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
559 CreateIntToIntLocations(arena_, invoke);
560}
561
562void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
563 ArmVIXLAssembler* assembler = GetAssembler();
564 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000565 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100566}
567
568static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
569 LocationSummary* locations = new (arena) LocationSummary(invoke,
570 LocationSummary::kNoCall,
571 kIntrinsified);
572 locations->SetInAt(0, Location::RequiresRegister());
573 locations->SetInAt(1, Location::RequiresRegister());
574}
575
576void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
577 CreateIntIntToVoidLocations(arena_, invoke);
578}
579
580void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
581 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000582 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100583}
584
585void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
586 CreateIntIntToVoidLocations(arena_, invoke);
587}
588
589void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
590 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000591 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100592}
593
594void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
595 CreateIntIntToVoidLocations(arena_, invoke);
596}
597
598void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
599 ArmVIXLAssembler* assembler = GetAssembler();
600 // Ignore upper 4B of long address.
601 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
602 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
603 // exception. So we can't use ldrd as addr may be unaligned.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000604 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100605 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
606}
607
608void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
609 CreateIntIntToVoidLocations(arena_, invoke);
610}
611
612void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
613 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000614 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100615}
616
617void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
618 LocationSummary* locations = new (arena_) LocationSummary(invoke,
619 LocationSummary::kNoCall,
620 kIntrinsified);
621 locations->SetOut(Location::RequiresRegister());
622}
623
624void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
625 ArmVIXLAssembler* assembler = GetAssembler();
626 __ Ldr(OutputRegister(invoke),
627 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
628}
629
630static void GenUnsafeGet(HInvoke* invoke,
631 Primitive::Type type,
632 bool is_volatile,
633 CodeGeneratorARMVIXL* codegen) {
634 LocationSummary* locations = invoke->GetLocations();
635 ArmVIXLAssembler* assembler = codegen->GetAssembler();
636 Location base_loc = locations->InAt(1);
637 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
638 Location offset_loc = locations->InAt(2);
639 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
640 Location trg_loc = locations->Out();
641
642 switch (type) {
643 case Primitive::kPrimInt: {
644 vixl32::Register trg = RegisterFrom(trg_loc);
645 __ Ldr(trg, MemOperand(base, offset));
646 if (is_volatile) {
647 __ Dmb(vixl32::ISH);
648 }
649 break;
650 }
651
652 case Primitive::kPrimNot: {
653 vixl32::Register trg = RegisterFrom(trg_loc);
654 if (kEmitCompilerReadBarrier) {
655 if (kUseBakerReadBarrier) {
656 Location temp = locations->GetTemp(0);
657 codegen->GenerateReferenceLoadWithBakerReadBarrier(
658 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
659 if (is_volatile) {
660 __ Dmb(vixl32::ISH);
661 }
662 } else {
663 __ Ldr(trg, MemOperand(base, offset));
664 if (is_volatile) {
665 __ Dmb(vixl32::ISH);
666 }
667 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
668 }
669 } else {
670 __ Ldr(trg, MemOperand(base, offset));
671 if (is_volatile) {
672 __ Dmb(vixl32::ISH);
673 }
674 assembler->MaybeUnpoisonHeapReference(trg);
675 }
676 break;
677 }
678
679 case Primitive::kPrimLong: {
680 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
681 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
682 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
Artem Serov657022c2016-11-23 14:19:38 +0000683 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
684 const vixl32::Register temp_reg = temps.Acquire();
685 __ Add(temp_reg, base, offset);
686 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100687 } else {
688 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
689 }
690 if (is_volatile) {
691 __ Dmb(vixl32::ISH);
692 }
693 break;
694 }
695
696 default:
697 LOG(FATAL) << "Unexpected type " << type;
698 UNREACHABLE();
699 }
700}
701
702static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
703 HInvoke* invoke,
704 Primitive::Type type) {
705 bool can_call = kEmitCompilerReadBarrier &&
706 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
707 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
708 LocationSummary* locations = new (arena) LocationSummary(invoke,
709 (can_call
710 ? LocationSummary::kCallOnSlowPath
711 : LocationSummary::kNoCall),
712 kIntrinsified);
713 if (can_call && kUseBakerReadBarrier) {
714 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
715 }
716 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
717 locations->SetInAt(1, Location::RequiresRegister());
718 locations->SetInAt(2, Location::RequiresRegister());
719 locations->SetOut(Location::RequiresRegister(),
720 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
721 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
722 // We need a temporary register for the read barrier marking slow
723 // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
724 locations->AddTemp(Location::RequiresRegister());
725 }
726}
727
728void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
729 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
730}
731void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
732 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
733}
734void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
735 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
736}
737void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
738 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
739}
740void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
741 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
742}
743void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
744 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
745}
746
747void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
748 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
749}
750void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
751 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
752}
753void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
754 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
755}
756void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
757 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
758}
759void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
760 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
761}
762void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
763 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
764}
765
766static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
767 const ArmInstructionSetFeatures& features,
768 Primitive::Type type,
769 bool is_volatile,
770 HInvoke* invoke) {
771 LocationSummary* locations = new (arena) LocationSummary(invoke,
772 LocationSummary::kNoCall,
773 kIntrinsified);
774 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
775 locations->SetInAt(1, Location::RequiresRegister());
776 locations->SetInAt(2, Location::RequiresRegister());
777 locations->SetInAt(3, Location::RequiresRegister());
778
779 if (type == Primitive::kPrimLong) {
780 // Potentially need temps for ldrexd-strexd loop.
781 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
782 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
783 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
784 }
785 } else if (type == Primitive::kPrimNot) {
786 // Temps for card-marking.
787 locations->AddTemp(Location::RequiresRegister()); // Temp.
788 locations->AddTemp(Location::RequiresRegister()); // Card.
789 }
790}
791
792void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
793 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
794}
795void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
796 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
797}
798void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
799 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
800}
801void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
802 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
803}
804void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
805 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
806}
807void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
808 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
809}
810void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
811 CreateIntIntIntIntToVoid(
812 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
813}
814void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
815 CreateIntIntIntIntToVoid(
816 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
817}
818void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
819 CreateIntIntIntIntToVoid(
820 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
821}
822
823static void GenUnsafePut(LocationSummary* locations,
824 Primitive::Type type,
825 bool is_volatile,
826 bool is_ordered,
827 CodeGeneratorARMVIXL* codegen) {
828 ArmVIXLAssembler* assembler = codegen->GetAssembler();
829
830 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
831 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
832 vixl32::Register value;
833
834 if (is_volatile || is_ordered) {
835 __ Dmb(vixl32::ISH);
836 }
837
838 if (type == Primitive::kPrimLong) {
839 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
840 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
841 value = value_lo;
842 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
843 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
844 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
845 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
846 const vixl32::Register temp_reg = temps.Acquire();
847
848 __ Add(temp_reg, base, offset);
849 vixl32::Label loop_head;
850 __ Bind(&loop_head);
Scott Wakelingb77051e2016-11-21 19:46:00 +0000851 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
852 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100853 __ Cmp(temp_lo, 0);
854 __ B(ne, &loop_head);
855 } else {
856 __ Strd(value_lo, value_hi, MemOperand(base, offset));
857 }
858 } else {
859 value = RegisterFrom(locations->InAt(3));
860 vixl32::Register source = value;
861 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
862 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
863 __ Mov(temp, value);
864 assembler->PoisonHeapReference(temp);
865 source = temp;
866 }
867 __ Str(source, MemOperand(base, offset));
868 }
869
870 if (is_volatile) {
871 __ Dmb(vixl32::ISH);
872 }
873
874 if (type == Primitive::kPrimNot) {
875 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
876 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
877 bool value_can_be_null = true; // TODO: Worth finding out this information?
878 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
879 }
880}
881
882void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
883 GenUnsafePut(invoke->GetLocations(),
884 Primitive::kPrimInt,
885 /* is_volatile */ false,
886 /* is_ordered */ false,
887 codegen_);
888}
889void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
890 GenUnsafePut(invoke->GetLocations(),
891 Primitive::kPrimInt,
892 /* is_volatile */ false,
893 /* is_ordered */ true,
894 codegen_);
895}
896void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
897 GenUnsafePut(invoke->GetLocations(),
898 Primitive::kPrimInt,
899 /* is_volatile */ true,
900 /* is_ordered */ false,
901 codegen_);
902}
903void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
904 GenUnsafePut(invoke->GetLocations(),
905 Primitive::kPrimNot,
906 /* is_volatile */ false,
907 /* is_ordered */ false,
908 codegen_);
909}
910void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
911 GenUnsafePut(invoke->GetLocations(),
912 Primitive::kPrimNot,
913 /* is_volatile */ false,
914 /* is_ordered */ true,
915 codegen_);
916}
917void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
918 GenUnsafePut(invoke->GetLocations(),
919 Primitive::kPrimNot,
920 /* is_volatile */ true,
921 /* is_ordered */ false,
922 codegen_);
923}
924void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
925 GenUnsafePut(invoke->GetLocations(),
926 Primitive::kPrimLong,
927 /* is_volatile */ false,
928 /* is_ordered */ false,
929 codegen_);
930}
931void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
932 GenUnsafePut(invoke->GetLocations(),
933 Primitive::kPrimLong,
934 /* is_volatile */ false,
935 /* is_ordered */ true,
936 codegen_);
937}
938void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
939 GenUnsafePut(invoke->GetLocations(),
940 Primitive::kPrimLong,
941 /* is_volatile */ true,
942 /* is_ordered */ false,
943 codegen_);
944}
945
946static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
947 HInvoke* invoke,
948 Primitive::Type type) {
949 bool can_call = kEmitCompilerReadBarrier &&
950 kUseBakerReadBarrier &&
951 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
952 LocationSummary* locations = new (arena) LocationSummary(invoke,
953 (can_call
954 ? LocationSummary::kCallOnSlowPath
955 : LocationSummary::kNoCall),
956 kIntrinsified);
957 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
958 locations->SetInAt(1, Location::RequiresRegister());
959 locations->SetInAt(2, Location::RequiresRegister());
960 locations->SetInAt(3, Location::RequiresRegister());
961 locations->SetInAt(4, Location::RequiresRegister());
962
963 // If heap poisoning is enabled, we don't want the unpoisoning
964 // operations to potentially clobber the output. Likewise when
965 // emitting a (Baker) read barrier, which may call.
966 Location::OutputOverlap overlaps =
967 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
968 ? Location::kOutputOverlap
969 : Location::kNoOutputOverlap;
970 locations->SetOut(Location::RequiresRegister(), overlaps);
971
972 // Temporary registers used in CAS. In the object case
973 // (UnsafeCASObject intrinsic), these are also used for
974 // card-marking, and possibly for (Baker) read barrier.
975 locations->AddTemp(Location::RequiresRegister()); // Pointer.
976 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
977}
978
979static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
980 DCHECK_NE(type, Primitive::kPrimLong);
981
982 ArmVIXLAssembler* assembler = codegen->GetAssembler();
983 LocationSummary* locations = invoke->GetLocations();
984
985 Location out_loc = locations->Out();
986 vixl32::Register out = OutputRegister(invoke); // Boolean result.
987
988 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
989 Location offset_loc = locations->InAt(2);
990 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
991 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
992 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
993
994 Location tmp_ptr_loc = locations->GetTemp(0);
995 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
996 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
997
998 if (type == Primitive::kPrimNot) {
999 // The only read barrier implementation supporting the
1000 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1001 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1002
1003 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1004 // object and scan the receiver at the next GC for nothing.
1005 bool value_can_be_null = true; // TODO: Worth finding out this information?
1006 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1007
1008 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1009 // Need to make sure the reference stored in the field is a to-space
1010 // one before attempting the CAS or the CAS could fail incorrectly.
1011 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1012 invoke,
1013 out_loc, // Unused, used only as a "temporary" within the read barrier.
1014 base,
1015 /* offset */ 0u,
1016 /* index */ offset_loc,
1017 ScaleFactor::TIMES_1,
1018 tmp_ptr_loc,
1019 /* needs_null_check */ false,
1020 /* always_update_field */ true,
1021 &tmp);
1022 }
1023 }
1024
1025 // Prevent reordering with prior memory operations.
1026 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1027 // latter allows a preceding load to be delayed past the STXR
1028 // instruction below.
1029 __ Dmb(vixl32::ISH);
1030
1031 __ Add(tmp_ptr, base, offset);
1032
1033 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1034 codegen->GetAssembler()->PoisonHeapReference(expected);
1035 if (value.Is(expected)) {
1036 // Do not poison `value`, as it is the same register as
1037 // `expected`, which has just been poisoned.
1038 } else {
1039 codegen->GetAssembler()->PoisonHeapReference(value);
1040 }
1041 }
1042
1043 // do {
1044 // tmp = [r_ptr] - expected;
1045 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1046 // result = tmp != 0;
1047
1048 vixl32::Label loop_head;
1049 __ Bind(&loop_head);
1050
Scott Wakelingb77051e2016-11-21 19:46:00 +00001051 __ Ldrex(tmp, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001052
1053 __ Subs(tmp, tmp, expected);
1054
1055 {
Artem Serov0fb37192016-12-06 18:13:40 +00001056 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1057 3 * kMaxInstructionSizeInBytes,
1058 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001059
1060 __ itt(eq);
Scott Wakelingb77051e2016-11-21 19:46:00 +00001061 __ strex(eq, tmp, value, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001062 __ cmp(eq, tmp, 1);
1063 }
1064
1065 __ B(eq, &loop_head);
1066
1067 __ Dmb(vixl32::ISH);
1068
1069 __ Rsbs(out, tmp, 1);
1070
1071 {
Artem Serov0fb37192016-12-06 18:13:40 +00001072 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1073 2 * kMaxInstructionSizeInBytes,
1074 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001075
1076 __ it(cc);
1077 __ mov(cc, out, 0);
1078 }
1079
1080 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1081 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1082 if (value.Is(expected)) {
1083 // Do not unpoison `value`, as it is the same register as
1084 // `expected`, which has just been unpoisoned.
1085 } else {
1086 codegen->GetAssembler()->UnpoisonHeapReference(value);
1087 }
1088 }
1089}
1090
1091void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1092 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1093}
1094void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1095 // The only read barrier implementation supporting the
1096 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1097 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1098 return;
1099 }
1100
1101 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1102}
1103void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1104 GenCas(invoke, Primitive::kPrimInt, codegen_);
1105}
1106void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1107 // The only read barrier implementation supporting the
1108 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1109 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1110
1111 GenCas(invoke, Primitive::kPrimNot, codegen_);
1112}
1113
1114void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1115 // The inputs plus one temp.
1116 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1117 invoke->InputAt(1)->CanBeNull()
1118 ? LocationSummary::kCallOnSlowPath
1119 : LocationSummary::kNoCall,
1120 kIntrinsified);
1121 locations->SetInAt(0, Location::RequiresRegister());
1122 locations->SetInAt(1, Location::RequiresRegister());
1123 locations->AddTemp(Location::RequiresRegister());
1124 locations->AddTemp(Location::RequiresRegister());
1125 locations->AddTemp(Location::RequiresRegister());
1126 // Need temporary registers for String compression's feature.
1127 if (mirror::kUseStringCompression) {
1128 locations->AddTemp(Location::RequiresRegister());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001129 }
1130 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1131}
1132
1133void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1134 ArmVIXLAssembler* assembler = GetAssembler();
1135 LocationSummary* locations = invoke->GetLocations();
1136
1137 vixl32::Register str = InputRegisterAt(invoke, 0);
1138 vixl32::Register arg = InputRegisterAt(invoke, 1);
1139 vixl32::Register out = OutputRegister(invoke);
1140
1141 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1142 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1143 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001144 vixl32::Register temp3;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001145 if (mirror::kUseStringCompression) {
1146 temp3 = RegisterFrom(locations->GetTemp(3));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001147 }
1148
1149 vixl32::Label loop;
1150 vixl32::Label find_char_diff;
1151 vixl32::Label end;
1152 vixl32::Label different_compression;
1153
1154 // Get offsets of count and value fields within a string object.
1155 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1156 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1157
1158 // Note that the null check must have been done earlier.
1159 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1160
1161 // Take slow path and throw if input can be and is null.
1162 SlowPathCodeARMVIXL* slow_path = nullptr;
1163 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1164 if (can_slow_path) {
1165 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1166 codegen_->AddSlowPath(slow_path);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001167 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001168 }
1169
1170 // Reference equality check, return 0 if same reference.
1171 __ Subs(out, str, arg);
1172 __ B(eq, &end);
1173
Anton Kirilov5ec62182016-10-13 20:16:02 +01001174 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001175 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001176 __ Ldr(temp3, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001177 __ Ldr(temp2, MemOperand(arg, count_offset));
1178 // Extract lengths from the `count` fields.
1179 __ Lsr(temp0, temp3, 1u);
1180 __ Lsr(temp1, temp2, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001181 } else {
1182 // Load lengths of this and argument strings.
1183 __ Ldr(temp0, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001184 __ Ldr(temp1, MemOperand(arg, count_offset));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001185 }
1186 // out = length diff.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001187 __ Subs(out, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001188 // temp0 = min(len(str), len(arg)).
1189
1190 {
Artem Serov0fb37192016-12-06 18:13:40 +00001191 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1192 2 * kMaxInstructionSizeInBytes,
1193 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001194
1195 __ it(gt);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001196 __ mov(gt, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001197 }
1198
Anton Kirilov5ec62182016-10-13 20:16:02 +01001199 // Shorter string is empty?
xueliang.zhongf51bc622016-11-04 09:23:32 +00001200 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1201 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1202 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001203
1204 if (mirror::kUseStringCompression) {
1205 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001206 __ Eors(temp2, temp2, temp3);
1207 __ Lsrs(temp2, temp2, 1u);
1208 __ B(cs, &different_compression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001209 // For string compression, calculate the number of bytes to compare (not chars).
1210 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001211 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001212
Artem Serov0fb37192016-12-06 18:13:40 +00001213 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1214 2 * kMaxInstructionSizeInBytes,
1215 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001216
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001217 __ it(ne);
1218 __ add(ne, temp0, temp0, temp0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001219 }
1220
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001221 // Store offset of string value in preparation for comparison loop.
1222 __ Mov(temp1, value_offset);
1223
Anton Kirilov5ec62182016-10-13 20:16:02 +01001224 // Assertions that must hold in order to compare multiple characters at a time.
1225 CHECK_ALIGNED(value_offset, 8);
1226 static_assert(IsAligned<8>(kObjectAlignment),
1227 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1228
Scott Wakelingb77051e2016-11-21 19:46:00 +00001229 const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001230 DCHECK_EQ(char_size, 2u);
1231
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001232 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1233
Anton Kirilov5ec62182016-10-13 20:16:02 +01001234 vixl32::Label find_char_diff_2nd_cmp;
1235 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1236 __ Bind(&loop);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001237 vixl32::Register temp_reg = temps.Acquire();
Anton Kirilov5ec62182016-10-13 20:16:02 +01001238 __ Ldr(temp_reg, MemOperand(str, temp1));
1239 __ Ldr(temp2, MemOperand(arg, temp1));
1240 __ Cmp(temp_reg, temp2);
1241 __ B(ne, &find_char_diff);
1242 __ Add(temp1, temp1, char_size * 2);
1243
1244 __ Ldr(temp_reg, MemOperand(str, temp1));
1245 __ Ldr(temp2, MemOperand(arg, temp1));
1246 __ Cmp(temp_reg, temp2);
1247 __ B(ne, &find_char_diff_2nd_cmp);
1248 __ Add(temp1, temp1, char_size * 2);
1249 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1250 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1251 __ B(hi, &loop);
1252 __ B(&end);
1253
1254 __ Bind(&find_char_diff_2nd_cmp);
1255 if (mirror::kUseStringCompression) {
1256 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
1257 __ B(ls, &end); // Was the second comparison fully beyond the end?
1258 } else {
1259 // Without string compression, we can start treating temp0 as signed
1260 // and rely on the signed comparison below.
1261 __ Sub(temp0, temp0, 2);
1262 }
1263
1264 // Find the single character difference.
1265 __ Bind(&find_char_diff);
1266 // Get the bit position of the first character that differs.
1267 __ Eor(temp1, temp2, temp_reg);
1268 __ Rbit(temp1, temp1);
1269 __ Clz(temp1, temp1);
1270
1271 // temp0 = number of characters remaining to compare.
1272 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1273 // in the comparison loop, and after the end of the shorter string data).
1274
1275 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1276 // two words compared, in the interval [0,1].
1277 // (0 for low half-word different, 1 for high half-word different).
1278 // With string compression, (temp1 << 3) = byte where the difference occurs,
1279 // in the interval [0,3].
1280
1281 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1282 // the remaining string data, so just return length diff (out).
1283 // The comparison is unsigned for string compression, otherwise signed.
1284 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1285 __ B((mirror::kUseStringCompression ? ls : le), &end);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001286
Anton Kirilov5ec62182016-10-13 20:16:02 +01001287 // Extract the characters and calculate the difference.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001288 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001289 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1290 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1291 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001292 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1293 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001294 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1295 __ Lsr(temp2, temp2, temp1); // Extract second character.
1296 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1297 __ Lsr(out, temp_reg, temp1); // Extract first character.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001298 __ And(temp2, temp2, temp3);
1299 __ And(out, out, temp3);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001300 } else {
Anton Kirilovb88c4842016-11-14 14:37:00 +00001301 __ Bic(temp1, temp1, 0xf);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001302 __ Lsr(temp2, temp2, temp1);
1303 __ Lsr(out, temp_reg, temp1);
Anton Kirilovb88c4842016-11-14 14:37:00 +00001304 __ Movt(temp2, 0);
1305 __ Movt(out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001306 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001307
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001308 __ Sub(out, out, temp2);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001309 temps.Release(temp_reg);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001310
1311 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001312 __ B(&end);
1313 __ Bind(&different_compression);
1314
1315 // Comparison for different compression style.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001316 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1317 DCHECK_EQ(c_char_size, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001318
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001319 // We want to free up the temp3, currently holding `str.count`, for comparison.
1320 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1321 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1322 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1323 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001324 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001325 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001326 __ Mov(temp1, str);
1327 __ Mov(temp2, arg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001328 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1329 {
Artem Serov0fb37192016-12-06 18:13:40 +00001330 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1331 3 * kMaxInstructionSizeInBytes,
1332 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001333 __ itt(cs); // Interleave with selection of temp1 and temp2.
1334 __ mov(cs, temp1, arg); // Preserves flags.
1335 __ mov(cs, temp2, str); // Preserves flags.
1336 }
Anton Kirilovb88c4842016-11-14 14:37:00 +00001337 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001338
1339 // Adjust temp1 and temp2 from string pointers to data pointers.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001340 __ Add(temp1, temp1, value_offset);
1341 __ Add(temp2, temp2, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001342
1343 vixl32::Label different_compression_loop;
1344 vixl32::Label different_compression_diff;
1345
1346 // Main loop for different compression.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001347 temp_reg = temps.Acquire();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001348 __ Bind(&different_compression_loop);
1349 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1350 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
Anton Kirilovb88c4842016-11-14 14:37:00 +00001351 __ Cmp(temp_reg, temp3);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001352 __ B(ne, &different_compression_diff);
1353 __ Subs(temp0, temp0, 2);
1354 __ B(hi, &different_compression_loop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001355 __ B(&end);
1356
1357 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001358 __ Bind(&different_compression_diff);
1359 __ Sub(out, temp_reg, temp3);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001360 temps.Release(temp_reg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001361 // Flip the difference if the `arg` is compressed.
1362 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1363 __ Lsrs(temp0, temp0, 1u);
1364 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1365 "Expecting 0=compressed, 1=uncompressed");
1366
Artem Serov0fb37192016-12-06 18:13:40 +00001367 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1368 2 * kMaxInstructionSizeInBytes,
1369 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001370 __ it(cc);
1371 __ rsb(cc, out, out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001372 }
1373
1374 __ Bind(&end);
1375
1376 if (can_slow_path) {
1377 __ Bind(slow_path->GetExitLabel());
1378 }
1379}
1380
1381void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1382 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1383 LocationSummary::kNoCall,
1384 kIntrinsified);
1385 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1386 locations->SetInAt(0, Location::RequiresRegister());
1387 locations->SetInAt(1, Location::RequiresRegister());
1388 // Temporary registers to store lengths of strings and for calculations.
1389 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1390 locations->AddTemp(LocationFrom(r0));
1391 locations->AddTemp(Location::RequiresRegister());
1392 locations->AddTemp(Location::RequiresRegister());
1393
1394 locations->SetOut(Location::RequiresRegister());
1395}
1396
1397void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1398 ArmVIXLAssembler* assembler = GetAssembler();
1399 LocationSummary* locations = invoke->GetLocations();
1400
1401 vixl32::Register str = InputRegisterAt(invoke, 0);
1402 vixl32::Register arg = InputRegisterAt(invoke, 1);
1403 vixl32::Register out = OutputRegister(invoke);
1404
1405 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1406 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1407 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1408
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001409 vixl32::Label loop;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001410 vixl32::Label end;
1411 vixl32::Label return_true;
1412 vixl32::Label return_false;
1413
1414 // Get offsets of count, value, and class fields within a string object.
1415 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1416 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1417 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1418
1419 // Note that the null check must have been done earlier.
1420 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1421
1422 StringEqualsOptimizations optimizations(invoke);
1423 if (!optimizations.GetArgumentNotNull()) {
1424 // Check if input is null, return false if it is.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001425 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001426 }
1427
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001428 // Reference equality check, return true if same reference.
1429 __ Cmp(str, arg);
1430 __ B(eq, &return_true);
1431
Anton Kirilov5ec62182016-10-13 20:16:02 +01001432 if (!optimizations.GetArgumentIsString()) {
1433 // Instanceof check for the argument by comparing class fields.
1434 // All string objects must have the same type since String cannot be subclassed.
1435 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1436 // If the argument is a string object, its class field must be equal to receiver's class field.
1437 __ Ldr(temp, MemOperand(str, class_offset));
1438 __ Ldr(temp1, MemOperand(arg, class_offset));
1439 __ Cmp(temp, temp1);
1440 __ B(ne, &return_false);
1441 }
1442
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001443 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001444 __ Ldr(temp, MemOperand(str, count_offset));
1445 __ Ldr(temp1, MemOperand(arg, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001446 // Check if `count` fields are equal, return false if they're not.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001447 // Also compares the compression style, if differs return false.
1448 __ Cmp(temp, temp1);
1449 __ B(ne, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001450 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1451 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1452 "Expecting 0=compressed, 1=uncompressed");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001453 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001454
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001455 // Assertions that must hold in order to compare strings 4 bytes at a time.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001456 DCHECK_ALIGNED(value_offset, 4);
1457 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1458
1459 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001460 // For string compression, calculate the number of bytes to compare (not chars).
1461 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1462 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
Artem Serov0fb37192016-12-06 18:13:40 +00001463 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1464 2 * kMaxInstructionSizeInBytes,
1465 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001466 __ it(cs); // If uncompressed,
1467 __ add(cs, temp, temp, temp); // double the byte count.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001468 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001469
1470 // Store offset of string value in preparation for comparison loop.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001471 __ Mov(temp1, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001472
1473 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1474 // Ok to do this because strings are zero-padded to kObjectAlignment.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001475 __ Bind(&loop);
1476 __ Ldr(out, MemOperand(str, temp1));
1477 __ Ldr(temp2, MemOperand(arg, temp1));
Scott Wakelingb77051e2016-11-21 19:46:00 +00001478 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001479 __ Cmp(out, temp2);
1480 __ B(ne, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001481 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1482 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1483 __ B(hi, &loop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001484
1485 // Return true and exit the function.
1486 // If loop does not result in returning false, we return true.
1487 __ Bind(&return_true);
1488 __ Mov(out, 1);
1489 __ B(&end);
1490
1491 // Return false and exit the function.
1492 __ Bind(&return_false);
1493 __ Mov(out, 0);
1494 __ Bind(&end);
1495}
1496
1497static void GenerateVisitStringIndexOf(HInvoke* invoke,
1498 ArmVIXLAssembler* assembler,
1499 CodeGeneratorARMVIXL* codegen,
1500 ArenaAllocator* allocator,
1501 bool start_at_zero) {
1502 LocationSummary* locations = invoke->GetLocations();
1503
1504 // Note that the null check must have been done earlier.
1505 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1506
1507 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1508 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1509 SlowPathCodeARMVIXL* slow_path = nullptr;
1510 HInstruction* code_point = invoke->InputAt(1);
1511 if (code_point->IsIntConstant()) {
1512 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1513 std::numeric_limits<uint16_t>::max()) {
1514 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1515 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1516 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1517 codegen->AddSlowPath(slow_path);
1518 __ B(slow_path->GetEntryLabel());
1519 __ Bind(slow_path->GetExitLabel());
1520 return;
1521 }
1522 } else if (code_point->GetType() != Primitive::kPrimChar) {
1523 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1524 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1525 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1526 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1527 codegen->AddSlowPath(slow_path);
1528 __ B(hs, slow_path->GetEntryLabel());
1529 }
1530
1531 if (start_at_zero) {
1532 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1533 DCHECK(tmp_reg.Is(r2));
1534 // Start-index = 0.
1535 __ Mov(tmp_reg, 0);
1536 }
1537
1538 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1539 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1540
1541 if (slow_path != nullptr) {
1542 __ Bind(slow_path->GetExitLabel());
1543 }
1544}
1545
1546void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1547 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1548 LocationSummary::kCallOnMainAndSlowPath,
1549 kIntrinsified);
1550 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1551 // best to align the inputs accordingly.
1552 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1553 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1554 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1555 locations->SetOut(LocationFrom(r0));
1556
1557 // Need to send start-index=0.
1558 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1559}
1560
1561void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1562 GenerateVisitStringIndexOf(
1563 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1564}
1565
1566void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1567 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1568 LocationSummary::kCallOnMainAndSlowPath,
1569 kIntrinsified);
1570 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1571 // best to align the inputs accordingly.
1572 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1573 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1574 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1575 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1576 locations->SetOut(LocationFrom(r0));
1577}
1578
1579void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1580 GenerateVisitStringIndexOf(
1581 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1582}
1583
1584void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1585 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1586 LocationSummary::kCallOnMainAndSlowPath,
1587 kIntrinsified);
1588 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1589 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1590 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1591 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1592 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1593 locations->SetOut(LocationFrom(r0));
1594}
1595
1596void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1597 ArmVIXLAssembler* assembler = GetAssembler();
1598 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1599 __ Cmp(byte_array, 0);
1600 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1601 codegen_->AddSlowPath(slow_path);
1602 __ B(eq, slow_path->GetEntryLabel());
1603
1604 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1605 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1606 __ Bind(slow_path->GetExitLabel());
1607}
1608
1609void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1610 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1611 LocationSummary::kCallOnMainOnly,
1612 kIntrinsified);
1613 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1614 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1615 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1616 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1617 locations->SetOut(LocationFrom(r0));
1618}
1619
1620void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1621 // No need to emit code checking whether `locations->InAt(2)` is a null
1622 // pointer, as callers of the native method
1623 //
1624 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1625 //
1626 // all include a null check on `data` before calling that method.
1627 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1628 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1629}
1630
1631void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1632 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1633 LocationSummary::kCallOnMainAndSlowPath,
1634 kIntrinsified);
1635 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1636 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1637 locations->SetOut(LocationFrom(r0));
1638}
1639
1640void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1641 ArmVIXLAssembler* assembler = GetAssembler();
1642 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1643 __ Cmp(string_to_copy, 0);
1644 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1645 codegen_->AddSlowPath(slow_path);
1646 __ B(eq, slow_path->GetEntryLabel());
1647
1648 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1649 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1650
1651 __ Bind(slow_path->GetExitLabel());
1652}
1653
1654void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1655 // The only read barrier implementation supporting the
1656 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1657 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1658 return;
1659 }
1660
1661 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1662 LocationSummary* locations = invoke->GetLocations();
1663 if (locations == nullptr) {
1664 return;
1665 }
1666
1667 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1668 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1669 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1670
1671 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1672 locations->SetInAt(1, Location::RequiresRegister());
1673 }
1674 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1675 locations->SetInAt(3, Location::RequiresRegister());
1676 }
1677 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1678 locations->SetInAt(4, Location::RequiresRegister());
1679 }
1680 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1681 // Temporary register IP cannot be used in
1682 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1683 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1684 // temporary register from the register allocator.
1685 locations->AddTemp(Location::RequiresRegister());
1686 }
1687}
1688
1689static void CheckPosition(ArmVIXLAssembler* assembler,
1690 Location pos,
1691 vixl32::Register input,
1692 Location length,
1693 SlowPathCodeARMVIXL* slow_path,
1694 vixl32::Register temp,
1695 bool length_is_input_length = false) {
1696 // Where is the length in the Array?
1697 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1698
1699 if (pos.IsConstant()) {
1700 int32_t pos_const = Int32ConstantFrom(pos);
1701 if (pos_const == 0) {
1702 if (!length_is_input_length) {
1703 // Check that length(input) >= length.
1704 __ Ldr(temp, MemOperand(input, length_offset));
1705 if (length.IsConstant()) {
1706 __ Cmp(temp, Int32ConstantFrom(length));
1707 } else {
1708 __ Cmp(temp, RegisterFrom(length));
1709 }
1710 __ B(lt, slow_path->GetEntryLabel());
1711 }
1712 } else {
1713 // Check that length(input) >= pos.
1714 __ Ldr(temp, MemOperand(input, length_offset));
1715 __ Subs(temp, temp, pos_const);
1716 __ B(lt, slow_path->GetEntryLabel());
1717
1718 // Check that (length(input) - pos) >= length.
1719 if (length.IsConstant()) {
1720 __ Cmp(temp, Int32ConstantFrom(length));
1721 } else {
1722 __ Cmp(temp, RegisterFrom(length));
1723 }
1724 __ B(lt, slow_path->GetEntryLabel());
1725 }
1726 } else if (length_is_input_length) {
1727 // The only way the copy can succeed is if pos is zero.
1728 vixl32::Register pos_reg = RegisterFrom(pos);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001729 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001730 } else {
1731 // Check that pos >= 0.
1732 vixl32::Register pos_reg = RegisterFrom(pos);
1733 __ Cmp(pos_reg, 0);
1734 __ B(lt, slow_path->GetEntryLabel());
1735
1736 // Check that pos <= length(input).
1737 __ Ldr(temp, MemOperand(input, length_offset));
1738 __ Subs(temp, temp, pos_reg);
1739 __ B(lt, slow_path->GetEntryLabel());
1740
1741 // Check that (length(input) - pos) >= length.
1742 if (length.IsConstant()) {
1743 __ Cmp(temp, Int32ConstantFrom(length));
1744 } else {
1745 __ Cmp(temp, RegisterFrom(length));
1746 }
1747 __ B(lt, slow_path->GetEntryLabel());
1748 }
1749}
1750
1751void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1752 // The only read barrier implementation supporting the
1753 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1754 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1755
1756 ArmVIXLAssembler* assembler = GetAssembler();
1757 LocationSummary* locations = invoke->GetLocations();
1758
1759 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1760 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1761 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1762 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1763 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1764
1765 vixl32::Register src = InputRegisterAt(invoke, 0);
1766 Location src_pos = locations->InAt(1);
1767 vixl32::Register dest = InputRegisterAt(invoke, 2);
1768 Location dest_pos = locations->InAt(3);
1769 Location length = locations->InAt(4);
1770 Location temp1_loc = locations->GetTemp(0);
1771 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1772 Location temp2_loc = locations->GetTemp(1);
1773 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1774 Location temp3_loc = locations->GetTemp(2);
1775 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1776
1777 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1778 codegen_->AddSlowPath(intrinsic_slow_path);
1779
1780 vixl32::Label conditions_on_positions_validated;
1781 SystemArrayCopyOptimizations optimizations(invoke);
1782
1783 // If source and destination are the same, we go to slow path if we need to do
1784 // forward copying.
1785 if (src_pos.IsConstant()) {
1786 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1787 if (dest_pos.IsConstant()) {
1788 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1789 if (optimizations.GetDestinationIsSource()) {
1790 // Checked when building locations.
1791 DCHECK_GE(src_pos_constant, dest_pos_constant);
1792 } else if (src_pos_constant < dest_pos_constant) {
1793 __ Cmp(src, dest);
1794 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1795 }
1796
1797 // Checked when building locations.
1798 DCHECK(!optimizations.GetDestinationIsSource()
1799 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1800 } else {
1801 if (!optimizations.GetDestinationIsSource()) {
1802 __ Cmp(src, dest);
1803 __ B(ne, &conditions_on_positions_validated);
1804 }
1805 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1806 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1807 }
1808 } else {
1809 if (!optimizations.GetDestinationIsSource()) {
1810 __ Cmp(src, dest);
1811 __ B(ne, &conditions_on_positions_validated);
1812 }
1813 if (dest_pos.IsConstant()) {
1814 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1815 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1816 } else {
1817 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1818 }
1819 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1820 }
1821
1822 __ Bind(&conditions_on_positions_validated);
1823
1824 if (!optimizations.GetSourceIsNotNull()) {
1825 // Bail out if the source is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001826 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001827 }
1828
1829 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1830 // Bail out if the destination is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001831 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001832 }
1833
1834 // If the length is negative, bail out.
1835 // We have already checked in the LocationsBuilder for the constant case.
1836 if (!length.IsConstant() &&
1837 !optimizations.GetCountIsSourceLength() &&
1838 !optimizations.GetCountIsDestinationLength()) {
1839 __ Cmp(RegisterFrom(length), 0);
1840 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1841 }
1842
1843 // Validity checks: source.
1844 CheckPosition(assembler,
1845 src_pos,
1846 src,
1847 length,
1848 intrinsic_slow_path,
1849 temp1,
1850 optimizations.GetCountIsSourceLength());
1851
1852 // Validity checks: dest.
1853 CheckPosition(assembler,
1854 dest_pos,
1855 dest,
1856 length,
1857 intrinsic_slow_path,
1858 temp1,
1859 optimizations.GetCountIsDestinationLength());
1860
1861 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1862 // Check whether all elements of the source array are assignable to the component
1863 // type of the destination array. We do two checks: the classes are the same,
1864 // or the destination is Object[]. If none of these checks succeed, we go to the
1865 // slow path.
1866
1867 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1868 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1869 // /* HeapReference<Class> */ temp1 = src->klass_
1870 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1871 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1872 // Bail out if the source is not a non primitive array.
1873 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1874 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1875 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001876 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001877 // If heap poisoning is enabled, `temp1` has been unpoisoned
1878 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1879 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1880 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1881 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001882 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001883 }
1884
1885 // /* HeapReference<Class> */ temp1 = dest->klass_
1886 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1887 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
1888
1889 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1890 // Bail out if the destination is not a non primitive array.
1891 //
1892 // Register `temp1` is not trashed by the read barrier emitted
1893 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1894 // method produces a call to a ReadBarrierMarkRegX entry point,
1895 // which saves all potentially live registers, including
1896 // temporaries such a `temp1`.
1897 // /* HeapReference<Class> */ temp2 = temp1->component_type_
1898 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1899 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001900 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001901 // If heap poisoning is enabled, `temp2` has been unpoisoned
1902 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1903 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1904 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1905 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001906 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001907 }
1908
1909 // For the same reason given earlier, `temp1` is not trashed by the
1910 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1911 // /* HeapReference<Class> */ temp2 = src->klass_
1912 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1913 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
1914 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1915 __ Cmp(temp1, temp2);
1916
1917 if (optimizations.GetDestinationIsTypedObjectArray()) {
1918 vixl32::Label do_copy;
1919 __ B(eq, &do_copy);
1920 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1921 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1922 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1923 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1924 // We do not need to emit a read barrier for the following
1925 // heap reference load, as `temp1` is only used in a
1926 // comparison with null below, and this reference is not
1927 // kept afterwards.
1928 __ Ldr(temp1, MemOperand(temp1, super_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001929 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001930 __ Bind(&do_copy);
1931 } else {
1932 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1933 }
1934 } else {
1935 // Non read barrier code.
1936
1937 // /* HeapReference<Class> */ temp1 = dest->klass_
1938 __ Ldr(temp1, MemOperand(dest, class_offset));
1939 // /* HeapReference<Class> */ temp2 = src->klass_
1940 __ Ldr(temp2, MemOperand(src, class_offset));
1941 bool did_unpoison = false;
1942 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1943 !optimizations.GetSourceIsNonPrimitiveArray()) {
1944 // One or two of the references need to be unpoisoned. Unpoison them
1945 // both to make the identity check valid.
1946 assembler->MaybeUnpoisonHeapReference(temp1);
1947 assembler->MaybeUnpoisonHeapReference(temp2);
1948 did_unpoison = true;
1949 }
1950
1951 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1952 // Bail out if the destination is not a non primitive array.
1953 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1954 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001955 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001956 assembler->MaybeUnpoisonHeapReference(temp3);
1957 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1958 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1959 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001960 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001961 }
1962
1963 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1964 // Bail out if the source is not a non primitive array.
1965 // /* HeapReference<Class> */ temp3 = temp2->component_type_
1966 __ Ldr(temp3, MemOperand(temp2, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001967 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001968 assembler->MaybeUnpoisonHeapReference(temp3);
1969 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1970 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1971 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001972 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001973 }
1974
1975 __ Cmp(temp1, temp2);
1976
1977 if (optimizations.GetDestinationIsTypedObjectArray()) {
1978 vixl32::Label do_copy;
1979 __ B(eq, &do_copy);
1980 if (!did_unpoison) {
1981 assembler->MaybeUnpoisonHeapReference(temp1);
1982 }
1983 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1984 __ Ldr(temp1, MemOperand(temp1, component_offset));
1985 assembler->MaybeUnpoisonHeapReference(temp1);
1986 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1987 __ Ldr(temp1, MemOperand(temp1, super_offset));
1988 // No need to unpoison the result, we're comparing against null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001989 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001990 __ Bind(&do_copy);
1991 } else {
1992 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1993 }
1994 }
1995 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1996 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1997 // Bail out if the source is not a non primitive array.
1998 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1999 // /* HeapReference<Class> */ temp1 = src->klass_
2000 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2001 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2002 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2003 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2004 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002005 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002006 // If heap poisoning is enabled, `temp3` has been unpoisoned
2007 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2008 } else {
2009 // /* HeapReference<Class> */ temp1 = src->klass_
2010 __ Ldr(temp1, MemOperand(src, class_offset));
2011 assembler->MaybeUnpoisonHeapReference(temp1);
2012 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2013 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002014 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002015 assembler->MaybeUnpoisonHeapReference(temp3);
2016 }
2017 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2018 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2019 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002020 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002021 }
2022
2023 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
2024 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
2025 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
2026
2027 // Compute the base source address in `temp1`.
2028 if (src_pos.IsConstant()) {
2029 int32_t constant = Int32ConstantFrom(src_pos);
2030 __ Add(temp1, src, element_size * constant + offset);
2031 } else {
2032 __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
2033 __ Add(temp1, temp1, offset);
2034 }
2035
2036 // Compute the end source address in `temp3`.
2037 if (length.IsConstant()) {
2038 int32_t constant = Int32ConstantFrom(length);
2039 __ Add(temp3, temp1, element_size * constant);
2040 } else {
2041 __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
2042 }
2043
2044 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2045 // The base destination address is computed later, as `temp2` is
2046 // used for intermediate computations.
2047
2048 // SystemArrayCopy implementation for Baker read barriers (see
2049 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2050 //
2051 // if (src_ptr != end_ptr) {
2052 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2053 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002054 // bool is_gray = (rb_state == ReadBarrier::GrayState());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002055 // if (is_gray) {
2056 // // Slow-path copy.
2057 // do {
2058 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2059 // } while (src_ptr != end_ptr)
2060 // } else {
2061 // // Fast-path copy.
2062 // do {
2063 // *dest_ptr++ = *src_ptr++;
2064 // } while (src_ptr != end_ptr)
2065 // }
2066 // }
2067
2068 vixl32::Label loop, done;
2069
2070 // Don't enter copy loop if `length == 0`.
2071 __ Cmp(temp1, temp3);
2072 __ B(eq, &done);
2073
2074 // /* int32_t */ monitor = src->monitor_
2075 __ Ldr(temp2, MemOperand(src, monitor_offset));
2076 // /* LockWord */ lock_word = LockWord(monitor)
2077 static_assert(sizeof(LockWord) == sizeof(int32_t),
2078 "art::LockWord and int32_t have different sizes.");
2079
2080 // Introduce a dependency on the lock_word including the rb_state,
2081 // which shall prevent load-load reordering without using
2082 // a memory barrier (which would be more expensive).
2083 // `src` is unchanged by this operation, but its value now depends
2084 // on `temp2`.
2085 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2086
2087 // Slow path used to copy array when `src` is gray.
2088 SlowPathCodeARMVIXL* read_barrier_slow_path =
2089 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2090 codegen_->AddSlowPath(read_barrier_slow_path);
2091
2092 // Given the numeric representation, it's enough to check the low bit of the
2093 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2094 // which can be a 16-bit instruction unlike the TST immediate.
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002095 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2096 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
Anton Kirilov5ec62182016-10-13 20:16:02 +01002097 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2098 // Carry flag is the last bit shifted out by LSRS.
2099 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2100
2101 // Fast-path copy.
2102
2103 // Compute the base destination address in `temp2`.
2104 if (dest_pos.IsConstant()) {
2105 int32_t constant = Int32ConstantFrom(dest_pos);
2106 __ Add(temp2, dest, element_size * constant + offset);
2107 } else {
2108 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2109 __ Add(temp2, temp2, offset);
2110 }
2111
2112 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2113 // poison/unpoison.
2114 __ Bind(&loop);
2115
2116 {
2117 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2118 const vixl32::Register temp_reg = temps.Acquire();
2119
2120 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2121 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2122 }
2123
2124 __ Cmp(temp1, temp3);
2125 __ B(ne, &loop);
2126
2127 __ Bind(read_barrier_slow_path->GetExitLabel());
2128 __ Bind(&done);
2129 } else {
2130 // Non read barrier code.
2131
2132 // Compute the base destination address in `temp2`.
2133 if (dest_pos.IsConstant()) {
2134 int32_t constant = Int32ConstantFrom(dest_pos);
2135 __ Add(temp2, dest, element_size * constant + offset);
2136 } else {
2137 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2138 __ Add(temp2, temp2, offset);
2139 }
2140
2141 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2142 // poison/unpoison.
2143 vixl32::Label loop, done;
2144 __ Cmp(temp1, temp3);
2145 __ B(eq, &done);
2146 __ Bind(&loop);
2147
2148 {
2149 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2150 const vixl32::Register temp_reg = temps.Acquire();
2151
2152 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2153 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2154 }
2155
2156 __ Cmp(temp1, temp3);
2157 __ B(ne, &loop);
2158 __ Bind(&done);
2159 }
2160
2161 // We only need one card marking on the destination array.
2162 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2163
2164 __ Bind(intrinsic_slow_path->GetExitLabel());
2165}
2166
2167static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2168 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2169 // the code generator. Furthermore, the register allocator creates fixed live intervals
2170 // for all caller-saved registers because we are doing a function call. As a result, if
2171 // the input and output locations are unallocated, the register allocator runs out of
2172 // registers and fails; however, a debuggable graph is not the common case.
2173 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2174 return;
2175 }
2176
2177 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2178 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2179 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2180
2181 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2182 LocationSummary::kCallOnMainOnly,
2183 kIntrinsified);
2184 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2185
2186 locations->SetInAt(0, Location::RequiresFpuRegister());
2187 locations->SetOut(Location::RequiresFpuRegister());
2188 // Native code uses the soft float ABI.
2189 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2190 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2191}
2192
2193static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2194 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2195 // the code generator. Furthermore, the register allocator creates fixed live intervals
2196 // for all caller-saved registers because we are doing a function call. As a result, if
2197 // the input and output locations are unallocated, the register allocator runs out of
2198 // registers and fails; however, a debuggable graph is not the common case.
2199 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2200 return;
2201 }
2202
2203 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2204 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2205 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2206 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2207
2208 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2209 LocationSummary::kCallOnMainOnly,
2210 kIntrinsified);
2211 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2212
2213 locations->SetInAt(0, Location::RequiresFpuRegister());
2214 locations->SetInAt(1, Location::RequiresFpuRegister());
2215 locations->SetOut(Location::RequiresFpuRegister());
2216 // Native code uses the soft float ABI.
2217 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2218 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2219 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2220 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2221}
2222
2223static void GenFPToFPCall(HInvoke* invoke,
2224 ArmVIXLAssembler* assembler,
2225 CodeGeneratorARMVIXL* codegen,
2226 QuickEntrypointEnum entry) {
2227 LocationSummary* const locations = invoke->GetLocations();
2228
2229 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2230 DCHECK(locations->WillCall() && locations->Intrinsified());
2231
2232 // Native code uses the soft float ABI.
2233 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2234 RegisterFrom(locations->GetTemp(1)),
2235 InputDRegisterAt(invoke, 0));
2236 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2237 __ Vmov(OutputDRegister(invoke),
2238 RegisterFrom(locations->GetTemp(0)),
2239 RegisterFrom(locations->GetTemp(1)));
2240}
2241
2242static void GenFPFPToFPCall(HInvoke* invoke,
2243 ArmVIXLAssembler* assembler,
2244 CodeGeneratorARMVIXL* codegen,
2245 QuickEntrypointEnum entry) {
2246 LocationSummary* const locations = invoke->GetLocations();
2247
2248 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2249 DCHECK(locations->WillCall() && locations->Intrinsified());
2250
2251 // Native code uses the soft float ABI.
2252 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2253 RegisterFrom(locations->GetTemp(1)),
2254 InputDRegisterAt(invoke, 0));
2255 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2256 RegisterFrom(locations->GetTemp(3)),
2257 InputDRegisterAt(invoke, 1));
2258 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2259 __ Vmov(OutputDRegister(invoke),
2260 RegisterFrom(locations->GetTemp(0)),
2261 RegisterFrom(locations->GetTemp(1)));
2262}
2263
2264void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2265 CreateFPToFPCallLocations(arena_, invoke);
2266}
2267
2268void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2269 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2270}
2271
2272void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2273 CreateFPToFPCallLocations(arena_, invoke);
2274}
2275
2276void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2277 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2278}
2279
2280void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2281 CreateFPToFPCallLocations(arena_, invoke);
2282}
2283
2284void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2285 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2286}
2287
2288void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2289 CreateFPToFPCallLocations(arena_, invoke);
2290}
2291
2292void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2293 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2294}
2295
2296void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2297 CreateFPToFPCallLocations(arena_, invoke);
2298}
2299
2300void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2301 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2302}
2303
2304void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2305 CreateFPToFPCallLocations(arena_, invoke);
2306}
2307
2308void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2309 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2310}
2311
2312void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2313 CreateFPToFPCallLocations(arena_, invoke);
2314}
2315
2316void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2317 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2318}
2319
2320void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2321 CreateFPToFPCallLocations(arena_, invoke);
2322}
2323
2324void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2325 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2326}
2327
2328void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2329 CreateFPToFPCallLocations(arena_, invoke);
2330}
2331
2332void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2333 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2334}
2335
2336void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2337 CreateFPToFPCallLocations(arena_, invoke);
2338}
2339
2340void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2341 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2342}
2343
2344void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2345 CreateFPToFPCallLocations(arena_, invoke);
2346}
2347
2348void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2349 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2350}
2351
2352void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2353 CreateFPToFPCallLocations(arena_, invoke);
2354}
2355
2356void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2357 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2358}
2359
2360void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2361 CreateFPToFPCallLocations(arena_, invoke);
2362}
2363
2364void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2365 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2366}
2367
2368void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2369 CreateFPToFPCallLocations(arena_, invoke);
2370}
2371
2372void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2373 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2374}
2375
2376void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2377 CreateFPFPToFPCallLocations(arena_, invoke);
2378}
2379
2380void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2381 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2382}
2383
2384void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2385 CreateFPFPToFPCallLocations(arena_, invoke);
2386}
2387
2388void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2389 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2390}
2391
2392void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2393 CreateFPFPToFPCallLocations(arena_, invoke);
2394}
2395
2396void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2397 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2398}
2399
2400void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2401 CreateIntToIntLocations(arena_, invoke);
2402}
2403
2404void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2405 ArmVIXLAssembler* assembler = GetAssembler();
2406 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2407}
2408
2409void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2410 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2411 LocationSummary::kNoCall,
2412 kIntrinsified);
2413 locations->SetInAt(0, Location::RequiresRegister());
2414 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2415}
2416
2417void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2418 ArmVIXLAssembler* assembler = GetAssembler();
2419 LocationSummary* locations = invoke->GetLocations();
2420
2421 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2422 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2423 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2424 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2425
2426 __ Rbit(out_reg_lo, in_reg_hi);
2427 __ Rbit(out_reg_hi, in_reg_lo);
2428}
2429
2430void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2431 CreateIntToIntLocations(arena_, invoke);
2432}
2433
2434void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2435 ArmVIXLAssembler* assembler = GetAssembler();
2436 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2437}
2438
2439void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2440 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2441 LocationSummary::kNoCall,
2442 kIntrinsified);
2443 locations->SetInAt(0, Location::RequiresRegister());
2444 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2445}
2446
2447void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2448 ArmVIXLAssembler* assembler = GetAssembler();
2449 LocationSummary* locations = invoke->GetLocations();
2450
2451 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2452 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2453 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2454 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2455
2456 __ Rev(out_reg_lo, in_reg_hi);
2457 __ Rev(out_reg_hi, in_reg_lo);
2458}
2459
2460void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2461 CreateIntToIntLocations(arena_, invoke);
2462}
2463
2464void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2465 ArmVIXLAssembler* assembler = GetAssembler();
2466 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2467}
2468
2469static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
2470 DCHECK(Primitive::IsIntOrLongType(type)) << type;
2471 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2472 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2473
2474 bool is_long = type == Primitive::kPrimLong;
2475 LocationSummary* locations = instr->GetLocations();
2476 Location in = locations->InAt(0);
2477 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2478 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2479 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2480 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2481 vixl32::Register out_r = OutputRegister(instr);
2482
2483 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2484 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2485 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2486 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2487 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2488 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2489 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2490 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2491 if (is_long) {
2492 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2493 }
2494 __ Vmov(out_r, tmp_s);
2495}
2496
2497void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2498 CreateIntToIntLocations(arena_, invoke);
2499 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2500}
2501
2502void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2503 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2504}
2505
2506void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2507 VisitIntegerBitCount(invoke);
2508}
2509
2510void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2511 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2512}
2513
2514void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2515 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2516 LocationSummary::kNoCall,
2517 kIntrinsified);
2518 locations->SetInAt(0, Location::RequiresRegister());
2519 locations->SetInAt(1, Location::RequiresRegister());
2520 locations->SetInAt(2, Location::RequiresRegister());
2521 locations->SetInAt(3, Location::RequiresRegister());
2522 locations->SetInAt(4, Location::RequiresRegister());
2523
2524 // Temporary registers to store lengths of strings and for calculations.
2525 locations->AddTemp(Location::RequiresRegister());
2526 locations->AddTemp(Location::RequiresRegister());
2527 locations->AddTemp(Location::RequiresRegister());
2528}
2529
2530void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2531 ArmVIXLAssembler* assembler = GetAssembler();
2532 LocationSummary* locations = invoke->GetLocations();
2533
2534 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2535 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2536 DCHECK_EQ(char_size, 2u);
2537
2538 // Location of data in char array buffer.
2539 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2540
2541 // Location of char array data in string.
2542 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2543
2544 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2545 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2546 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2547 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2548 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2549 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2550 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2551
2552 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2553 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2554 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2555
2556 vixl32::Label done, compressed_string_loop;
2557 // dst to be copied.
2558 __ Add(dst_ptr, dstObj, data_offset);
2559 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2560
2561 __ Subs(num_chr, srcEnd, srcBegin);
2562 // Early out for valid zero-length retrievals.
2563 __ B(eq, &done);
2564
2565 // src range to copy.
2566 __ Add(src_ptr, srcObj, value_offset);
2567
2568 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2569 vixl32::Register temp;
2570 vixl32::Label compressed_string_preloop;
2571 if (mirror::kUseStringCompression) {
2572 // Location of count in string.
2573 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2574 temp = temps.Acquire();
2575 // String's length.
2576 __ Ldr(temp, MemOperand(srcObj, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002577 __ Tst(temp, 1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002578 temps.Release(temp);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002579 __ B(eq, &compressed_string_preloop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002580 }
2581 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2582
2583 // Do the copy.
2584 vixl32::Label loop, remainder;
2585
2586 temp = temps.Acquire();
2587 // Save repairing the value of num_chr on the < 4 character path.
2588 __ Subs(temp, num_chr, 4);
2589 __ B(lt, &remainder);
2590
2591 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2592 __ Mov(num_chr, temp);
2593
2594 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2595 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2596 // to rectify these everywhere this intrinsic applies.)
2597 __ Bind(&loop);
2598 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2599 __ Subs(num_chr, num_chr, 4);
2600 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2601 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2602 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2603 temps.Release(temp);
2604 __ B(ge, &loop);
2605
2606 __ Adds(num_chr, num_chr, 4);
2607 __ B(eq, &done);
2608
2609 // Main loop for < 4 character case and remainder handling. Loads and stores one
2610 // 16-bit Java character at a time.
2611 __ Bind(&remainder);
2612 temp = temps.Acquire();
2613 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2614 __ Subs(num_chr, num_chr, 1);
2615 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2616 temps.Release(temp);
2617 __ B(gt, &remainder);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002618
2619 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002620 __ B(&done);
2621
Anton Kirilov5ec62182016-10-13 20:16:02 +01002622 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2623 DCHECK_EQ(c_char_size, 1u);
2624 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2625 __ Bind(&compressed_string_preloop);
2626 __ Add(src_ptr, src_ptr, srcBegin);
2627 __ Bind(&compressed_string_loop);
2628 temp = temps.Acquire();
2629 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2630 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2631 temps.Release(temp);
2632 __ Subs(num_chr, num_chr, 1);
2633 __ B(gt, &compressed_string_loop);
2634 }
2635
2636 __ Bind(&done);
2637}
2638
2639void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2640 CreateFPToIntLocations(arena_, invoke);
2641}
2642
2643void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2644 ArmVIXLAssembler* const assembler = GetAssembler();
2645 const vixl32::Register out = OutputRegister(invoke);
2646 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2647 // we don't care about the sign bit anyway.
2648 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2649
2650 __ Vmov(out, InputSRegisterAt(invoke, 0));
2651 // We don't care about the sign bit, so shift left.
2652 __ Lsl(out, out, 1);
2653 __ Eor(out, out, infinity);
2654 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2655 __ Clz(out, out);
2656 // Any number less than 32 logically shifted right by 5 bits results in 0;
2657 // the same operation on 32 yields 1.
2658 __ Lsr(out, out, 5);
2659}
2660
2661void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2662 CreateFPToIntLocations(arena_, invoke);
2663}
2664
2665void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2666 ArmVIXLAssembler* const assembler = GetAssembler();
2667 const vixl32::Register out = OutputRegister(invoke);
2668 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2669 const vixl32::Register temp = temps.Acquire();
2670 // The highest 32 bits of double precision positive infinity separated into
2671 // two constants encodable as immediate operands.
2672 constexpr uint32_t infinity_high = 0x7f000000U;
2673 constexpr uint32_t infinity_high2 = 0x00f00000U;
2674
2675 static_assert((infinity_high | infinity_high2) ==
2676 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2677 "The constants do not add up to the high 32 bits of double "
2678 "precision positive infinity.");
2679 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2680 __ Eor(out, out, infinity_high);
2681 __ Eor(out, out, infinity_high2);
2682 // We don't care about the sign bit, so shift left.
2683 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2684 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2685 __ Clz(out, out);
2686 // Any number less than 32 logically shifted right by 5 bits results in 0;
2687 // the same operation on 32 yields 1.
2688 __ Lsr(out, out, 5);
2689}
2690
2691UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
2692UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
2693UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
2694UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
2695UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
2696UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
2697UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil) // Could be done by changing rounding mode, maybe?
2698UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor) // Could be done by changing rounding mode, maybe?
2699UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
2700UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
2701UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
2702UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
2703UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
2704UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
2705UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
2706UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
2707UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
2708UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
2709
Aart Bikff7d89c2016-11-07 08:49:28 -08002710UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
2711UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08002712UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
2713UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
2714UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
2715UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
2716UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
2717UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
Aart Bikff7d89c2016-11-07 08:49:28 -08002718
Anton Kirilov5ec62182016-10-13 20:16:02 +01002719// 1.8.
2720UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
2721UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
2722UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
2723UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
2724UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
2725
2726UNREACHABLE_INTRINSICS(ARMVIXL)
2727
2728#undef __
2729
2730} // namespace arm
2731} // namespace art