blob: cb6fcf9730f79fbf552dcf2b9adbb3b06e4cd5a1 [file] [log] [blame]
Anton Kirilov5ec62182016-10-13 20:16:02 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm_vixl.h"
18
19#include "arch/arm/instruction_set_features_arm.h"
20#include "code_generator_arm_vixl.h"
21#include "common_arm.h"
22#include "lock_word.h"
23#include "mirror/array-inl.h"
24
25#include "aarch32/constants-aarch32.h"
26
27namespace art {
28namespace arm {
29
30#define __ assembler->GetVIXLAssembler()->
31
32using helpers::DRegisterFrom;
33using helpers::HighRegisterFrom;
34using helpers::InputDRegisterAt;
35using helpers::InputRegisterAt;
36using helpers::InputSRegisterAt;
37using helpers::InputVRegisterAt;
38using helpers::Int32ConstantFrom;
39using helpers::LocationFrom;
40using helpers::LowRegisterFrom;
41using helpers::LowSRegisterFrom;
42using helpers::OutputDRegister;
43using helpers::OutputRegister;
44using helpers::OutputVRegister;
45using helpers::RegisterFrom;
46using helpers::SRegisterFrom;
47
48using namespace vixl::aarch32; // NOLINT(build/namespaces)
49
50ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
51 return codegen_->GetAssembler();
52}
53
54ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
55 return codegen_->GetGraph()->GetArena();
56}
57
58// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
59// intrinsified call. This will copy the arguments into the positions for a regular call.
60//
61// Note: The actual parameters are required to be in the locations given by the invoke's location
62// summary. If an intrinsic modifies those locations before a slowpath call, they must be
63// restored!
64//
65// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
66// sub-optimal (compared to a direct pointer call), but this is a slow-path.
67
68class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
69 public:
70 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
71 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
72
73 Location MoveArguments(CodeGenerator* codegen) {
74 InvokeDexCallingConventionVisitorARM calling_convention_visitor;
75 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
76 return calling_convention_visitor.GetMethodLocation();
77 }
78
79 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
80 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
81 __ Bind(GetEntryLabel());
82
83 SaveLiveRegisters(codegen, invoke_->GetLocations());
84
85 Location method_loc = MoveArguments(codegen);
86
87 if (invoke_->IsInvokeStaticOrDirect()) {
88 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
89 } else {
90 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
91 }
92 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
93
94 // Copy the result back to the expected output.
95 Location out = invoke_->GetLocations()->Out();
96 if (out.IsValid()) {
97 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
98 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
99 codegen->MoveFromReturnRegister(out, invoke_->GetType());
100 }
101
102 RestoreLiveRegisters(codegen, invoke_->GetLocations());
103 __ B(GetExitLabel());
104 }
105
106 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
107
108 private:
109 // The instruction where this slow path is happening.
110 HInvoke* const invoke_;
111
112 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
113};
114
115// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
116class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
117 public:
118 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
119 : SlowPathCodeARMVIXL(instruction) {
120 DCHECK(kEmitCompilerReadBarrier);
121 DCHECK(kUseBakerReadBarrier);
122 }
123
124 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
125 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
126 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
127 LocationSummary* locations = instruction_->GetLocations();
128 DCHECK(locations->CanCall());
129 DCHECK(instruction_->IsInvokeStaticOrDirect())
130 << "Unexpected instruction in read barrier arraycopy slow path: "
131 << instruction_->DebugName();
132 DCHECK(instruction_->GetLocations()->Intrinsified());
133 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
134
135 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
136 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
137 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
138
139 vixl32::Register dest = InputRegisterAt(instruction_, 2);
140 Location dest_pos = locations->InAt(3);
141 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
142 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
143 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
144 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
145
146 __ Bind(GetEntryLabel());
147 // Compute the base destination address in `dst_curr_addr`.
148 if (dest_pos.IsConstant()) {
149 int32_t constant = Int32ConstantFrom(dest_pos);
150 __ Add(dst_curr_addr, dest, element_size * constant + offset);
151 } else {
152 __ Add(dst_curr_addr,
153 dest,
154 Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
155 __ Add(dst_curr_addr, dst_curr_addr, offset);
156 }
157
158 vixl32::Label loop;
159 __ Bind(&loop);
160 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
161 assembler->MaybeUnpoisonHeapReference(tmp);
162 // TODO: Inline the mark bit check before calling the runtime?
163 // tmp = ReadBarrier::Mark(tmp);
164 // No need to save live registers; it's taken care of by the
165 // entrypoint. Also, there is no need to update the stack mask,
166 // as this runtime call will not trigger a garbage collection.
167 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
168 // explanations.)
169 DCHECK(!tmp.IsSP());
170 DCHECK(!tmp.IsLR());
171 DCHECK(!tmp.IsPC());
172 // IP is used internally by the ReadBarrierMarkRegX entry point
173 // as a temporary (and not preserved). It thus cannot be used by
174 // any live register in this slow path.
175 DCHECK(!src_curr_addr.Is(ip));
176 DCHECK(!dst_curr_addr.Is(ip));
177 DCHECK(!src_stop_addr.Is(ip));
178 DCHECK(!tmp.Is(ip));
179 DCHECK(tmp.IsRegister()) << tmp;
180 int32_t entry_point_offset =
181 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
182 // This runtime call does not require a stack map.
183 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
184 assembler->MaybePoisonHeapReference(tmp);
185 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
186 __ Cmp(src_curr_addr, src_stop_addr);
187 __ B(ne, &loop);
188 __ B(GetExitLabel());
189 }
190
191 const char* GetDescription() const OVERRIDE {
192 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
193 }
194
195 private:
196 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
197};
198
199IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
200 : arena_(codegen->GetGraph()->GetArena()),
201 assembler_(codegen->GetAssembler()),
202 features_(codegen->GetInstructionSetFeatures()) {}
203
204bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
205 Dispatch(invoke);
206 LocationSummary* res = invoke->GetLocations();
207 if (res == nullptr) {
208 return false;
209 }
210 return res->Intrinsified();
211}
212
213static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
214 LocationSummary* locations = new (arena) LocationSummary(invoke,
215 LocationSummary::kNoCall,
216 kIntrinsified);
217 locations->SetInAt(0, Location::RequiresFpuRegister());
218 locations->SetOut(Location::RequiresRegister());
219}
220
221static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
222 LocationSummary* locations = new (arena) LocationSummary(invoke,
223 LocationSummary::kNoCall,
224 kIntrinsified);
225 locations->SetInAt(0, Location::RequiresRegister());
226 locations->SetOut(Location::RequiresFpuRegister());
227}
228
229static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
230 Location input = locations->InAt(0);
231 Location output = locations->Out();
232 if (is64bit) {
233 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
234 } else {
235 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
236 }
237}
238
239static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
240 Location input = locations->InAt(0);
241 Location output = locations->Out();
242 if (is64bit) {
243 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
244 } else {
245 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
246 }
247}
248
249void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
250 CreateFPToIntLocations(arena_, invoke);
251}
252void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
253 CreateIntToFPLocations(arena_, invoke);
254}
255
256void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
257 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
258}
259void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
260 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
261}
262
263void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
264 CreateFPToIntLocations(arena_, invoke);
265}
266void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
267 CreateIntToFPLocations(arena_, invoke);
268}
269
270void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
271 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
272}
273void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
274 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
275}
276
277static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
278 LocationSummary* locations = new (arena) LocationSummary(invoke,
279 LocationSummary::kNoCall,
280 kIntrinsified);
281 locations->SetInAt(0, Location::RequiresRegister());
282 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
283}
284
285static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
286 LocationSummary* locations = new (arena) LocationSummary(invoke,
287 LocationSummary::kNoCall,
288 kIntrinsified);
289 locations->SetInAt(0, Location::RequiresFpuRegister());
290 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
291}
292
293static void GenNumberOfLeadingZeros(LocationSummary* locations,
294 Primitive::Type type,
295 ArmVIXLAssembler* assembler) {
296 Location in = locations->InAt(0);
297 vixl32::Register out = RegisterFrom(locations->Out());
298
299 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
300
301 if (type == Primitive::kPrimLong) {
302 vixl32::Register in_reg_lo = LowRegisterFrom(in);
303 vixl32::Register in_reg_hi = HighRegisterFrom(in);
304 vixl32::Label end;
305 __ Clz(out, in_reg_hi);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000306 __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100307 __ Clz(out, in_reg_lo);
308 __ Add(out, out, 32);
309 __ Bind(&end);
310 } else {
311 __ Clz(out, RegisterFrom(in));
312 }
313}
314
315void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
316 CreateIntToIntLocations(arena_, invoke);
317}
318
319void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
320 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
321}
322
323void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
324 LocationSummary* locations = new (arena_) LocationSummary(invoke,
325 LocationSummary::kNoCall,
326 kIntrinsified);
327 locations->SetInAt(0, Location::RequiresRegister());
328 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
329}
330
331void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
332 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
333}
334
335static void GenNumberOfTrailingZeros(LocationSummary* locations,
336 Primitive::Type type,
337 ArmVIXLAssembler* assembler) {
338 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
339
340 vixl32::Register out = RegisterFrom(locations->Out());
341
342 if (type == Primitive::kPrimLong) {
343 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
344 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
345 vixl32::Label end;
346 __ Rbit(out, in_reg_lo);
347 __ Clz(out, out);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000348 __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100349 __ Rbit(out, in_reg_hi);
350 __ Clz(out, out);
351 __ Add(out, out, 32);
352 __ Bind(&end);
353 } else {
354 vixl32::Register in = RegisterFrom(locations->InAt(0));
355 __ Rbit(out, in);
356 __ Clz(out, out);
357 }
358}
359
360void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
361 LocationSummary* locations = new (arena_) LocationSummary(invoke,
362 LocationSummary::kNoCall,
363 kIntrinsified);
364 locations->SetInAt(0, Location::RequiresRegister());
365 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
366}
367
368void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
369 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
370}
371
372void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
373 LocationSummary* locations = new (arena_) LocationSummary(invoke,
374 LocationSummary::kNoCall,
375 kIntrinsified);
376 locations->SetInAt(0, Location::RequiresRegister());
377 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
378}
379
380void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
381 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
382}
383
384static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
385 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
386}
387
388void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
389 CreateFPToFPLocations(arena_, invoke);
390}
391
392void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
393 MathAbsFP(invoke, GetAssembler());
394}
395
396void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
397 CreateFPToFPLocations(arena_, invoke);
398}
399
400void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
401 MathAbsFP(invoke, GetAssembler());
402}
403
404static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
405 LocationSummary* locations = new (arena) LocationSummary(invoke,
406 LocationSummary::kNoCall,
407 kIntrinsified);
408 locations->SetInAt(0, Location::RequiresRegister());
409 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
410
411 locations->AddTemp(Location::RequiresRegister());
412}
413
414static void GenAbsInteger(LocationSummary* locations,
415 bool is64bit,
416 ArmVIXLAssembler* assembler) {
417 Location in = locations->InAt(0);
418 Location output = locations->Out();
419
420 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
421
422 if (is64bit) {
423 vixl32::Register in_reg_lo = LowRegisterFrom(in);
424 vixl32::Register in_reg_hi = HighRegisterFrom(in);
425 vixl32::Register out_reg_lo = LowRegisterFrom(output);
426 vixl32::Register out_reg_hi = HighRegisterFrom(output);
427
428 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
429
430 __ Asr(mask, in_reg_hi, 31);
431 __ Adds(out_reg_lo, in_reg_lo, mask);
432 __ Adc(out_reg_hi, in_reg_hi, mask);
433 __ Eor(out_reg_lo, mask, out_reg_lo);
434 __ Eor(out_reg_hi, mask, out_reg_hi);
435 } else {
436 vixl32::Register in_reg = RegisterFrom(in);
437 vixl32::Register out_reg = RegisterFrom(output);
438
439 __ Asr(mask, in_reg, 31);
440 __ Add(out_reg, in_reg, mask);
441 __ Eor(out_reg, mask, out_reg);
442 }
443}
444
445void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
446 CreateIntToIntPlusTemp(arena_, invoke);
447}
448
449void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
450 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
451}
452
453
454void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
455 CreateIntToIntPlusTemp(arena_, invoke);
456}
457
458void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
459 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
460}
461
462static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
463 vixl32::Register op1 = InputRegisterAt(invoke, 0);
464 vixl32::Register op2 = InputRegisterAt(invoke, 1);
465 vixl32::Register out = OutputRegister(invoke);
466
467 __ Cmp(op1, op2);
468
469 {
470 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
471 3 * kMaxInstructionSizeInBytes,
472 CodeBufferCheckScope::kMaximumSize);
473
474 __ ite(is_min ? lt : gt);
475 __ mov(is_min ? lt : gt, out, op1);
476 __ mov(is_min ? ge : le, out, op2);
477 }
478}
479
480static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
481 LocationSummary* locations = new (arena) LocationSummary(invoke,
482 LocationSummary::kNoCall,
483 kIntrinsified);
484 locations->SetInAt(0, Location::RequiresRegister());
485 locations->SetInAt(1, Location::RequiresRegister());
486 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
487}
488
489void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
490 CreateIntIntToIntLocations(arena_, invoke);
491}
492
493void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
494 GenMinMax(invoke, /* is_min */ true, GetAssembler());
495}
496
497void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
498 CreateIntIntToIntLocations(arena_, invoke);
499}
500
501void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
502 GenMinMax(invoke, /* is_min */ false, GetAssembler());
503}
504
505void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
506 CreateFPToFPLocations(arena_, invoke);
507}
508
509void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
510 ArmVIXLAssembler* assembler = GetAssembler();
511 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
512}
513
514void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
515 CreateIntToIntLocations(arena_, invoke);
516}
517
518void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
519 ArmVIXLAssembler* assembler = GetAssembler();
520 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000521 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100522}
523
524void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
525 CreateIntToIntLocations(arena_, invoke);
526}
527
528void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
529 ArmVIXLAssembler* assembler = GetAssembler();
530 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000531 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100532}
533
534void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
535 CreateIntToIntLocations(arena_, invoke);
536}
537
538void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
539 ArmVIXLAssembler* assembler = GetAssembler();
540 // Ignore upper 4B of long address.
541 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
542 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
543 // exception. So we can't use ldrd as addr may be unaligned.
544 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
545 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
546 if (addr.Is(lo)) {
547 __ Ldr(hi, MemOperand(addr, 4));
Scott Wakelingb77051e2016-11-21 19:46:00 +0000548 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100549 } else {
Scott Wakelingb77051e2016-11-21 19:46:00 +0000550 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100551 __ Ldr(hi, MemOperand(addr, 4));
552 }
553}
554
555void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
556 CreateIntToIntLocations(arena_, invoke);
557}
558
559void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
560 ArmVIXLAssembler* assembler = GetAssembler();
561 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000562 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100563}
564
565static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
566 LocationSummary* locations = new (arena) LocationSummary(invoke,
567 LocationSummary::kNoCall,
568 kIntrinsified);
569 locations->SetInAt(0, Location::RequiresRegister());
570 locations->SetInAt(1, Location::RequiresRegister());
571}
572
573void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
574 CreateIntIntToVoidLocations(arena_, invoke);
575}
576
577void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
578 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000579 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100580}
581
582void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
583 CreateIntIntToVoidLocations(arena_, invoke);
584}
585
586void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
587 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000588 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100589}
590
591void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
592 CreateIntIntToVoidLocations(arena_, invoke);
593}
594
595void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
596 ArmVIXLAssembler* assembler = GetAssembler();
597 // Ignore upper 4B of long address.
598 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
599 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
600 // exception. So we can't use ldrd as addr may be unaligned.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000601 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100602 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
603}
604
605void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
606 CreateIntIntToVoidLocations(arena_, invoke);
607}
608
609void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
610 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000611 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100612}
613
614void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
615 LocationSummary* locations = new (arena_) LocationSummary(invoke,
616 LocationSummary::kNoCall,
617 kIntrinsified);
618 locations->SetOut(Location::RequiresRegister());
619}
620
621void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
622 ArmVIXLAssembler* assembler = GetAssembler();
623 __ Ldr(OutputRegister(invoke),
624 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
625}
626
627static void GenUnsafeGet(HInvoke* invoke,
628 Primitive::Type type,
629 bool is_volatile,
630 CodeGeneratorARMVIXL* codegen) {
631 LocationSummary* locations = invoke->GetLocations();
632 ArmVIXLAssembler* assembler = codegen->GetAssembler();
633 Location base_loc = locations->InAt(1);
634 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
635 Location offset_loc = locations->InAt(2);
636 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
637 Location trg_loc = locations->Out();
638
639 switch (type) {
640 case Primitive::kPrimInt: {
641 vixl32::Register trg = RegisterFrom(trg_loc);
642 __ Ldr(trg, MemOperand(base, offset));
643 if (is_volatile) {
644 __ Dmb(vixl32::ISH);
645 }
646 break;
647 }
648
649 case Primitive::kPrimNot: {
650 vixl32::Register trg = RegisterFrom(trg_loc);
651 if (kEmitCompilerReadBarrier) {
652 if (kUseBakerReadBarrier) {
653 Location temp = locations->GetTemp(0);
654 codegen->GenerateReferenceLoadWithBakerReadBarrier(
655 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
656 if (is_volatile) {
657 __ Dmb(vixl32::ISH);
658 }
659 } else {
660 __ Ldr(trg, MemOperand(base, offset));
661 if (is_volatile) {
662 __ Dmb(vixl32::ISH);
663 }
664 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
665 }
666 } else {
667 __ Ldr(trg, MemOperand(base, offset));
668 if (is_volatile) {
669 __ Dmb(vixl32::ISH);
670 }
671 assembler->MaybeUnpoisonHeapReference(trg);
672 }
673 break;
674 }
675
676 case Primitive::kPrimLong: {
677 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
678 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
679 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
Artem Serov657022c2016-11-23 14:19:38 +0000680 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
681 const vixl32::Register temp_reg = temps.Acquire();
682 __ Add(temp_reg, base, offset);
683 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100684 } else {
685 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
686 }
687 if (is_volatile) {
688 __ Dmb(vixl32::ISH);
689 }
690 break;
691 }
692
693 default:
694 LOG(FATAL) << "Unexpected type " << type;
695 UNREACHABLE();
696 }
697}
698
699static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
700 HInvoke* invoke,
701 Primitive::Type type) {
702 bool can_call = kEmitCompilerReadBarrier &&
703 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
704 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
705 LocationSummary* locations = new (arena) LocationSummary(invoke,
706 (can_call
707 ? LocationSummary::kCallOnSlowPath
708 : LocationSummary::kNoCall),
709 kIntrinsified);
710 if (can_call && kUseBakerReadBarrier) {
711 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
712 }
713 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
714 locations->SetInAt(1, Location::RequiresRegister());
715 locations->SetInAt(2, Location::RequiresRegister());
716 locations->SetOut(Location::RequiresRegister(),
717 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
718 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
719 // We need a temporary register for the read barrier marking slow
720 // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
721 locations->AddTemp(Location::RequiresRegister());
722 }
723}
724
725void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
726 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
727}
728void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
729 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
730}
731void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
732 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
733}
734void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
735 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
736}
737void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
738 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
739}
740void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
741 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
742}
743
744void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
745 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
746}
747void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
748 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
749}
750void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
751 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
752}
753void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
754 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
755}
756void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
757 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
758}
759void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
760 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
761}
762
763static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
764 const ArmInstructionSetFeatures& features,
765 Primitive::Type type,
766 bool is_volatile,
767 HInvoke* invoke) {
768 LocationSummary* locations = new (arena) LocationSummary(invoke,
769 LocationSummary::kNoCall,
770 kIntrinsified);
771 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
772 locations->SetInAt(1, Location::RequiresRegister());
773 locations->SetInAt(2, Location::RequiresRegister());
774 locations->SetInAt(3, Location::RequiresRegister());
775
776 if (type == Primitive::kPrimLong) {
777 // Potentially need temps for ldrexd-strexd loop.
778 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
779 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
780 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
781 }
782 } else if (type == Primitive::kPrimNot) {
783 // Temps for card-marking.
784 locations->AddTemp(Location::RequiresRegister()); // Temp.
785 locations->AddTemp(Location::RequiresRegister()); // Card.
786 }
787}
788
789void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
790 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
791}
792void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
793 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
794}
795void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
796 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
797}
798void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
799 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
800}
801void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
802 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
803}
804void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
805 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
806}
807void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
808 CreateIntIntIntIntToVoid(
809 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
810}
811void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
812 CreateIntIntIntIntToVoid(
813 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
814}
815void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
816 CreateIntIntIntIntToVoid(
817 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
818}
819
820static void GenUnsafePut(LocationSummary* locations,
821 Primitive::Type type,
822 bool is_volatile,
823 bool is_ordered,
824 CodeGeneratorARMVIXL* codegen) {
825 ArmVIXLAssembler* assembler = codegen->GetAssembler();
826
827 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
828 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
829 vixl32::Register value;
830
831 if (is_volatile || is_ordered) {
832 __ Dmb(vixl32::ISH);
833 }
834
835 if (type == Primitive::kPrimLong) {
836 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
837 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
838 value = value_lo;
839 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
840 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
841 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
842 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
843 const vixl32::Register temp_reg = temps.Acquire();
844
845 __ Add(temp_reg, base, offset);
846 vixl32::Label loop_head;
847 __ Bind(&loop_head);
Scott Wakelingb77051e2016-11-21 19:46:00 +0000848 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
849 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100850 __ Cmp(temp_lo, 0);
851 __ B(ne, &loop_head);
852 } else {
853 __ Strd(value_lo, value_hi, MemOperand(base, offset));
854 }
855 } else {
856 value = RegisterFrom(locations->InAt(3));
857 vixl32::Register source = value;
858 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
859 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
860 __ Mov(temp, value);
861 assembler->PoisonHeapReference(temp);
862 source = temp;
863 }
864 __ Str(source, MemOperand(base, offset));
865 }
866
867 if (is_volatile) {
868 __ Dmb(vixl32::ISH);
869 }
870
871 if (type == Primitive::kPrimNot) {
872 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
873 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
874 bool value_can_be_null = true; // TODO: Worth finding out this information?
875 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
876 }
877}
878
879void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
880 GenUnsafePut(invoke->GetLocations(),
881 Primitive::kPrimInt,
882 /* is_volatile */ false,
883 /* is_ordered */ false,
884 codegen_);
885}
886void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
887 GenUnsafePut(invoke->GetLocations(),
888 Primitive::kPrimInt,
889 /* is_volatile */ false,
890 /* is_ordered */ true,
891 codegen_);
892}
893void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
894 GenUnsafePut(invoke->GetLocations(),
895 Primitive::kPrimInt,
896 /* is_volatile */ true,
897 /* is_ordered */ false,
898 codegen_);
899}
900void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
901 GenUnsafePut(invoke->GetLocations(),
902 Primitive::kPrimNot,
903 /* is_volatile */ false,
904 /* is_ordered */ false,
905 codegen_);
906}
907void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
908 GenUnsafePut(invoke->GetLocations(),
909 Primitive::kPrimNot,
910 /* is_volatile */ false,
911 /* is_ordered */ true,
912 codegen_);
913}
914void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
915 GenUnsafePut(invoke->GetLocations(),
916 Primitive::kPrimNot,
917 /* is_volatile */ true,
918 /* is_ordered */ false,
919 codegen_);
920}
921void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
922 GenUnsafePut(invoke->GetLocations(),
923 Primitive::kPrimLong,
924 /* is_volatile */ false,
925 /* is_ordered */ false,
926 codegen_);
927}
928void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
929 GenUnsafePut(invoke->GetLocations(),
930 Primitive::kPrimLong,
931 /* is_volatile */ false,
932 /* is_ordered */ true,
933 codegen_);
934}
935void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
936 GenUnsafePut(invoke->GetLocations(),
937 Primitive::kPrimLong,
938 /* is_volatile */ true,
939 /* is_ordered */ false,
940 codegen_);
941}
942
943static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
944 HInvoke* invoke,
945 Primitive::Type type) {
946 bool can_call = kEmitCompilerReadBarrier &&
947 kUseBakerReadBarrier &&
948 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
949 LocationSummary* locations = new (arena) LocationSummary(invoke,
950 (can_call
951 ? LocationSummary::kCallOnSlowPath
952 : LocationSummary::kNoCall),
953 kIntrinsified);
954 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
955 locations->SetInAt(1, Location::RequiresRegister());
956 locations->SetInAt(2, Location::RequiresRegister());
957 locations->SetInAt(3, Location::RequiresRegister());
958 locations->SetInAt(4, Location::RequiresRegister());
959
960 // If heap poisoning is enabled, we don't want the unpoisoning
961 // operations to potentially clobber the output. Likewise when
962 // emitting a (Baker) read barrier, which may call.
963 Location::OutputOverlap overlaps =
964 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
965 ? Location::kOutputOverlap
966 : Location::kNoOutputOverlap;
967 locations->SetOut(Location::RequiresRegister(), overlaps);
968
969 // Temporary registers used in CAS. In the object case
970 // (UnsafeCASObject intrinsic), these are also used for
971 // card-marking, and possibly for (Baker) read barrier.
972 locations->AddTemp(Location::RequiresRegister()); // Pointer.
973 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
974}
975
976static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
977 DCHECK_NE(type, Primitive::kPrimLong);
978
979 ArmVIXLAssembler* assembler = codegen->GetAssembler();
980 LocationSummary* locations = invoke->GetLocations();
981
982 Location out_loc = locations->Out();
983 vixl32::Register out = OutputRegister(invoke); // Boolean result.
984
985 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
986 Location offset_loc = locations->InAt(2);
987 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
988 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
989 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
990
991 Location tmp_ptr_loc = locations->GetTemp(0);
992 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
993 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
994
995 if (type == Primitive::kPrimNot) {
996 // The only read barrier implementation supporting the
997 // UnsafeCASObject intrinsic is the Baker-style read barriers.
998 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
999
1000 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1001 // object and scan the receiver at the next GC for nothing.
1002 bool value_can_be_null = true; // TODO: Worth finding out this information?
1003 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1004
1005 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1006 // Need to make sure the reference stored in the field is a to-space
1007 // one before attempting the CAS or the CAS could fail incorrectly.
1008 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1009 invoke,
1010 out_loc, // Unused, used only as a "temporary" within the read barrier.
1011 base,
1012 /* offset */ 0u,
1013 /* index */ offset_loc,
1014 ScaleFactor::TIMES_1,
1015 tmp_ptr_loc,
1016 /* needs_null_check */ false,
1017 /* always_update_field */ true,
1018 &tmp);
1019 }
1020 }
1021
1022 // Prevent reordering with prior memory operations.
1023 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1024 // latter allows a preceding load to be delayed past the STXR
1025 // instruction below.
1026 __ Dmb(vixl32::ISH);
1027
1028 __ Add(tmp_ptr, base, offset);
1029
1030 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1031 codegen->GetAssembler()->PoisonHeapReference(expected);
1032 if (value.Is(expected)) {
1033 // Do not poison `value`, as it is the same register as
1034 // `expected`, which has just been poisoned.
1035 } else {
1036 codegen->GetAssembler()->PoisonHeapReference(value);
1037 }
1038 }
1039
1040 // do {
1041 // tmp = [r_ptr] - expected;
1042 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1043 // result = tmp != 0;
1044
1045 vixl32::Label loop_head;
1046 __ Bind(&loop_head);
1047
Scott Wakelingb77051e2016-11-21 19:46:00 +00001048 __ Ldrex(tmp, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001049
1050 __ Subs(tmp, tmp, expected);
1051
1052 {
1053 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1054 3 * kMaxInstructionSizeInBytes,
1055 CodeBufferCheckScope::kMaximumSize);
1056
1057 __ itt(eq);
Scott Wakelingb77051e2016-11-21 19:46:00 +00001058 __ strex(eq, tmp, value, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001059 __ cmp(eq, tmp, 1);
1060 }
1061
1062 __ B(eq, &loop_head);
1063
1064 __ Dmb(vixl32::ISH);
1065
1066 __ Rsbs(out, tmp, 1);
1067
1068 {
1069 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1070 2 * kMaxInstructionSizeInBytes,
1071 CodeBufferCheckScope::kMaximumSize);
1072
1073 __ it(cc);
1074 __ mov(cc, out, 0);
1075 }
1076
1077 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1078 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1079 if (value.Is(expected)) {
1080 // Do not unpoison `value`, as it is the same register as
1081 // `expected`, which has just been unpoisoned.
1082 } else {
1083 codegen->GetAssembler()->UnpoisonHeapReference(value);
1084 }
1085 }
1086}
1087
1088void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1089 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1090}
1091void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1092 // The only read barrier implementation supporting the
1093 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1094 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1095 return;
1096 }
1097
1098 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1099}
1100void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1101 GenCas(invoke, Primitive::kPrimInt, codegen_);
1102}
1103void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1104 // The only read barrier implementation supporting the
1105 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1106 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1107
1108 GenCas(invoke, Primitive::kPrimNot, codegen_);
1109}
1110
1111void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1112 // The inputs plus one temp.
1113 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1114 invoke->InputAt(1)->CanBeNull()
1115 ? LocationSummary::kCallOnSlowPath
1116 : LocationSummary::kNoCall,
1117 kIntrinsified);
1118 locations->SetInAt(0, Location::RequiresRegister());
1119 locations->SetInAt(1, Location::RequiresRegister());
1120 locations->AddTemp(Location::RequiresRegister());
1121 locations->AddTemp(Location::RequiresRegister());
1122 locations->AddTemp(Location::RequiresRegister());
1123 // Need temporary registers for String compression's feature.
1124 if (mirror::kUseStringCompression) {
1125 locations->AddTemp(Location::RequiresRegister());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001126 }
1127 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1128}
1129
1130void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1131 ArmVIXLAssembler* assembler = GetAssembler();
1132 LocationSummary* locations = invoke->GetLocations();
1133
1134 vixl32::Register str = InputRegisterAt(invoke, 0);
1135 vixl32::Register arg = InputRegisterAt(invoke, 1);
1136 vixl32::Register out = OutputRegister(invoke);
1137
1138 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1139 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1140 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001141 vixl32::Register temp3;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001142 if (mirror::kUseStringCompression) {
1143 temp3 = RegisterFrom(locations->GetTemp(3));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001144 }
1145
1146 vixl32::Label loop;
1147 vixl32::Label find_char_diff;
1148 vixl32::Label end;
1149 vixl32::Label different_compression;
1150
1151 // Get offsets of count and value fields within a string object.
1152 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1153 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1154
1155 // Note that the null check must have been done earlier.
1156 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1157
1158 // Take slow path and throw if input can be and is null.
1159 SlowPathCodeARMVIXL* slow_path = nullptr;
1160 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1161 if (can_slow_path) {
1162 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1163 codegen_->AddSlowPath(slow_path);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001164 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001165 }
1166
1167 // Reference equality check, return 0 if same reference.
1168 __ Subs(out, str, arg);
1169 __ B(eq, &end);
1170
Anton Kirilov5ec62182016-10-13 20:16:02 +01001171 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001172 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001173 __ Ldr(temp3, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001174 __ Ldr(temp2, MemOperand(arg, count_offset));
1175 // Extract lengths from the `count` fields.
1176 __ Lsr(temp0, temp3, 1u);
1177 __ Lsr(temp1, temp2, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001178 } else {
1179 // Load lengths of this and argument strings.
1180 __ Ldr(temp0, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001181 __ Ldr(temp1, MemOperand(arg, count_offset));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001182 }
1183 // out = length diff.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001184 __ Subs(out, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001185 // temp0 = min(len(str), len(arg)).
1186
1187 {
1188 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1189 2 * kMaxInstructionSizeInBytes,
1190 CodeBufferCheckScope::kMaximumSize);
1191
1192 __ it(gt);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001193 __ mov(gt, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001194 }
1195
Anton Kirilov5ec62182016-10-13 20:16:02 +01001196 // Shorter string is empty?
xueliang.zhongf51bc622016-11-04 09:23:32 +00001197 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1198 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1199 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001200
1201 if (mirror::kUseStringCompression) {
1202 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001203 __ Eors(temp2, temp2, temp3);
1204 __ Lsrs(temp2, temp2, 1u);
1205 __ B(cs, &different_compression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001206 // For string compression, calculate the number of bytes to compare (not chars).
1207 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001208 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001209
1210 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1211 2 * kMaxInstructionSizeInBytes,
1212 CodeBufferCheckScope::kMaximumSize);
1213
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001214 __ it(ne);
1215 __ add(ne, temp0, temp0, temp0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001216 }
1217
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001218 // Store offset of string value in preparation for comparison loop.
1219 __ Mov(temp1, value_offset);
1220
Anton Kirilov5ec62182016-10-13 20:16:02 +01001221 // Assertions that must hold in order to compare multiple characters at a time.
1222 CHECK_ALIGNED(value_offset, 8);
1223 static_assert(IsAligned<8>(kObjectAlignment),
1224 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1225
Scott Wakelingb77051e2016-11-21 19:46:00 +00001226 const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001227 DCHECK_EQ(char_size, 2u);
1228
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001229 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1230
Anton Kirilov5ec62182016-10-13 20:16:02 +01001231 vixl32::Label find_char_diff_2nd_cmp;
1232 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1233 __ Bind(&loop);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001234 vixl32::Register temp_reg = temps.Acquire();
Anton Kirilov5ec62182016-10-13 20:16:02 +01001235 __ Ldr(temp_reg, MemOperand(str, temp1));
1236 __ Ldr(temp2, MemOperand(arg, temp1));
1237 __ Cmp(temp_reg, temp2);
1238 __ B(ne, &find_char_diff);
1239 __ Add(temp1, temp1, char_size * 2);
1240
1241 __ Ldr(temp_reg, MemOperand(str, temp1));
1242 __ Ldr(temp2, MemOperand(arg, temp1));
1243 __ Cmp(temp_reg, temp2);
1244 __ B(ne, &find_char_diff_2nd_cmp);
1245 __ Add(temp1, temp1, char_size * 2);
1246 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1247 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
1248 __ B(hi, &loop);
1249 __ B(&end);
1250
1251 __ Bind(&find_char_diff_2nd_cmp);
1252 if (mirror::kUseStringCompression) {
1253 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
1254 __ B(ls, &end); // Was the second comparison fully beyond the end?
1255 } else {
1256 // Without string compression, we can start treating temp0 as signed
1257 // and rely on the signed comparison below.
1258 __ Sub(temp0, temp0, 2);
1259 }
1260
1261 // Find the single character difference.
1262 __ Bind(&find_char_diff);
1263 // Get the bit position of the first character that differs.
1264 __ Eor(temp1, temp2, temp_reg);
1265 __ Rbit(temp1, temp1);
1266 __ Clz(temp1, temp1);
1267
1268 // temp0 = number of characters remaining to compare.
1269 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1270 // in the comparison loop, and after the end of the shorter string data).
1271
1272 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1273 // two words compared, in the interval [0,1].
1274 // (0 for low half-word different, 1 for high half-word different).
1275 // With string compression, (temp1 << 3) = byte where the difference occurs,
1276 // in the interval [0,3].
1277
1278 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1279 // the remaining string data, so just return length diff (out).
1280 // The comparison is unsigned for string compression, otherwise signed.
1281 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
1282 __ B((mirror::kUseStringCompression ? ls : le), &end);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001283
Anton Kirilov5ec62182016-10-13 20:16:02 +01001284 // Extract the characters and calculate the difference.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001285 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001286 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1287 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1288 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001289 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1290 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001291 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1292 __ Lsr(temp2, temp2, temp1); // Extract second character.
1293 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1294 __ Lsr(out, temp_reg, temp1); // Extract first character.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001295 __ And(temp2, temp2, temp3);
1296 __ And(out, out, temp3);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001297 } else {
Anton Kirilovb88c4842016-11-14 14:37:00 +00001298 __ Bic(temp1, temp1, 0xf);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001299 __ Lsr(temp2, temp2, temp1);
1300 __ Lsr(out, temp_reg, temp1);
Anton Kirilovb88c4842016-11-14 14:37:00 +00001301 __ Movt(temp2, 0);
1302 __ Movt(out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001303 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001304
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001305 __ Sub(out, out, temp2);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001306 temps.Release(temp_reg);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001307
1308 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001309 __ B(&end);
1310 __ Bind(&different_compression);
1311
1312 // Comparison for different compression style.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001313 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1314 DCHECK_EQ(c_char_size, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001315
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001316 // We want to free up the temp3, currently holding `str.count`, for comparison.
1317 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1318 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1319 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1320 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001321 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001322 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001323 __ Mov(temp1, str);
1324 __ Mov(temp2, arg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001325 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1326 {
1327 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1328 3 * kMaxInstructionSizeInBytes,
1329 CodeBufferCheckScope::kMaximumSize);
1330 __ itt(cs); // Interleave with selection of temp1 and temp2.
1331 __ mov(cs, temp1, arg); // Preserves flags.
1332 __ mov(cs, temp2, str); // Preserves flags.
1333 }
Anton Kirilovb88c4842016-11-14 14:37:00 +00001334 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001335
1336 // Adjust temp1 and temp2 from string pointers to data pointers.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001337 __ Add(temp1, temp1, value_offset);
1338 __ Add(temp2, temp2, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001339
1340 vixl32::Label different_compression_loop;
1341 vixl32::Label different_compression_diff;
1342
1343 // Main loop for different compression.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001344 temp_reg = temps.Acquire();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001345 __ Bind(&different_compression_loop);
1346 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1347 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
Anton Kirilovb88c4842016-11-14 14:37:00 +00001348 __ Cmp(temp_reg, temp3);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001349 __ B(ne, &different_compression_diff);
1350 __ Subs(temp0, temp0, 2);
1351 __ B(hi, &different_compression_loop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001352 __ B(&end);
1353
1354 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001355 __ Bind(&different_compression_diff);
1356 __ Sub(out, temp_reg, temp3);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001357 temps.Release(temp_reg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001358 // Flip the difference if the `arg` is compressed.
1359 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1360 __ Lsrs(temp0, temp0, 1u);
1361 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1362 "Expecting 0=compressed, 1=uncompressed");
1363
1364 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1365 2 * kMaxInstructionSizeInBytes,
1366 CodeBufferCheckScope::kMaximumSize);
1367 __ it(cc);
1368 __ rsb(cc, out, out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001369 }
1370
1371 __ Bind(&end);
1372
1373 if (can_slow_path) {
1374 __ Bind(slow_path->GetExitLabel());
1375 }
1376}
1377
1378void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1379 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1380 LocationSummary::kNoCall,
1381 kIntrinsified);
1382 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1383 locations->SetInAt(0, Location::RequiresRegister());
1384 locations->SetInAt(1, Location::RequiresRegister());
1385 // Temporary registers to store lengths of strings and for calculations.
1386 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1387 locations->AddTemp(LocationFrom(r0));
1388 locations->AddTemp(Location::RequiresRegister());
1389 locations->AddTemp(Location::RequiresRegister());
1390
1391 locations->SetOut(Location::RequiresRegister());
1392}
1393
1394void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1395 ArmVIXLAssembler* assembler = GetAssembler();
1396 LocationSummary* locations = invoke->GetLocations();
1397
1398 vixl32::Register str = InputRegisterAt(invoke, 0);
1399 vixl32::Register arg = InputRegisterAt(invoke, 1);
1400 vixl32::Register out = OutputRegister(invoke);
1401
1402 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1403 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1404 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1405
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001406 vixl32::Label loop;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001407 vixl32::Label end;
1408 vixl32::Label return_true;
1409 vixl32::Label return_false;
1410
1411 // Get offsets of count, value, and class fields within a string object.
1412 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1413 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1414 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1415
1416 // Note that the null check must have been done earlier.
1417 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1418
1419 StringEqualsOptimizations optimizations(invoke);
1420 if (!optimizations.GetArgumentNotNull()) {
1421 // Check if input is null, return false if it is.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001422 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001423 }
1424
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001425 // Reference equality check, return true if same reference.
1426 __ Cmp(str, arg);
1427 __ B(eq, &return_true);
1428
Anton Kirilov5ec62182016-10-13 20:16:02 +01001429 if (!optimizations.GetArgumentIsString()) {
1430 // Instanceof check for the argument by comparing class fields.
1431 // All string objects must have the same type since String cannot be subclassed.
1432 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1433 // If the argument is a string object, its class field must be equal to receiver's class field.
1434 __ Ldr(temp, MemOperand(str, class_offset));
1435 __ Ldr(temp1, MemOperand(arg, class_offset));
1436 __ Cmp(temp, temp1);
1437 __ B(ne, &return_false);
1438 }
1439
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001440 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001441 __ Ldr(temp, MemOperand(str, count_offset));
1442 __ Ldr(temp1, MemOperand(arg, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001443 // Check if `count` fields are equal, return false if they're not.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001444 // Also compares the compression style, if differs return false.
1445 __ Cmp(temp, temp1);
1446 __ B(ne, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001447 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1448 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1449 "Expecting 0=compressed, 1=uncompressed");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001450 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001451
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001452 // Assertions that must hold in order to compare strings 4 bytes at a time.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001453 DCHECK_ALIGNED(value_offset, 4);
1454 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1455
1456 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001457 // For string compression, calculate the number of bytes to compare (not chars).
1458 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1459 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
1460 AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
1461 2 * kMaxInstructionSizeInBytes,
1462 CodeBufferCheckScope::kMaximumSize);
1463 __ it(cs); // If uncompressed,
1464 __ add(cs, temp, temp, temp); // double the byte count.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001465 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001466
1467 // Store offset of string value in preparation for comparison loop.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001468 __ Mov(temp1, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001469
1470 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1471 // Ok to do this because strings are zero-padded to kObjectAlignment.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001472 __ Bind(&loop);
1473 __ Ldr(out, MemOperand(str, temp1));
1474 __ Ldr(temp2, MemOperand(arg, temp1));
Scott Wakelingb77051e2016-11-21 19:46:00 +00001475 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001476 __ Cmp(out, temp2);
1477 __ B(ne, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001478 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1479 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
1480 __ B(hi, &loop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001481
1482 // Return true and exit the function.
1483 // If loop does not result in returning false, we return true.
1484 __ Bind(&return_true);
1485 __ Mov(out, 1);
1486 __ B(&end);
1487
1488 // Return false and exit the function.
1489 __ Bind(&return_false);
1490 __ Mov(out, 0);
1491 __ Bind(&end);
1492}
1493
1494static void GenerateVisitStringIndexOf(HInvoke* invoke,
1495 ArmVIXLAssembler* assembler,
1496 CodeGeneratorARMVIXL* codegen,
1497 ArenaAllocator* allocator,
1498 bool start_at_zero) {
1499 LocationSummary* locations = invoke->GetLocations();
1500
1501 // Note that the null check must have been done earlier.
1502 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1503
1504 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1505 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1506 SlowPathCodeARMVIXL* slow_path = nullptr;
1507 HInstruction* code_point = invoke->InputAt(1);
1508 if (code_point->IsIntConstant()) {
1509 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1510 std::numeric_limits<uint16_t>::max()) {
1511 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1512 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1513 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1514 codegen->AddSlowPath(slow_path);
1515 __ B(slow_path->GetEntryLabel());
1516 __ Bind(slow_path->GetExitLabel());
1517 return;
1518 }
1519 } else if (code_point->GetType() != Primitive::kPrimChar) {
1520 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1521 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1522 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1523 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1524 codegen->AddSlowPath(slow_path);
1525 __ B(hs, slow_path->GetEntryLabel());
1526 }
1527
1528 if (start_at_zero) {
1529 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1530 DCHECK(tmp_reg.Is(r2));
1531 // Start-index = 0.
1532 __ Mov(tmp_reg, 0);
1533 }
1534
1535 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1536 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1537
1538 if (slow_path != nullptr) {
1539 __ Bind(slow_path->GetExitLabel());
1540 }
1541}
1542
1543void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1544 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1545 LocationSummary::kCallOnMainAndSlowPath,
1546 kIntrinsified);
1547 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1548 // best to align the inputs accordingly.
1549 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1550 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1551 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1552 locations->SetOut(LocationFrom(r0));
1553
1554 // Need to send start-index=0.
1555 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1556}
1557
1558void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1559 GenerateVisitStringIndexOf(
1560 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1561}
1562
1563void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1564 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1565 LocationSummary::kCallOnMainAndSlowPath,
1566 kIntrinsified);
1567 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1568 // best to align the inputs accordingly.
1569 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1570 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1571 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1572 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1573 locations->SetOut(LocationFrom(r0));
1574}
1575
1576void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1577 GenerateVisitStringIndexOf(
1578 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1579}
1580
1581void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1582 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1583 LocationSummary::kCallOnMainAndSlowPath,
1584 kIntrinsified);
1585 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1586 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1587 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1588 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1589 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1590 locations->SetOut(LocationFrom(r0));
1591}
1592
1593void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1594 ArmVIXLAssembler* assembler = GetAssembler();
1595 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1596 __ Cmp(byte_array, 0);
1597 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1598 codegen_->AddSlowPath(slow_path);
1599 __ B(eq, slow_path->GetEntryLabel());
1600
1601 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1602 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1603 __ Bind(slow_path->GetExitLabel());
1604}
1605
1606void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1607 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1608 LocationSummary::kCallOnMainOnly,
1609 kIntrinsified);
1610 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1611 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1612 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1613 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1614 locations->SetOut(LocationFrom(r0));
1615}
1616
1617void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1618 // No need to emit code checking whether `locations->InAt(2)` is a null
1619 // pointer, as callers of the native method
1620 //
1621 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1622 //
1623 // all include a null check on `data` before calling that method.
1624 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1625 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1626}
1627
1628void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1629 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1630 LocationSummary::kCallOnMainAndSlowPath,
1631 kIntrinsified);
1632 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1633 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1634 locations->SetOut(LocationFrom(r0));
1635}
1636
1637void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1638 ArmVIXLAssembler* assembler = GetAssembler();
1639 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1640 __ Cmp(string_to_copy, 0);
1641 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1642 codegen_->AddSlowPath(slow_path);
1643 __ B(eq, slow_path->GetEntryLabel());
1644
1645 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1646 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1647
1648 __ Bind(slow_path->GetExitLabel());
1649}
1650
1651void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1652 // The only read barrier implementation supporting the
1653 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1654 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1655 return;
1656 }
1657
1658 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1659 LocationSummary* locations = invoke->GetLocations();
1660 if (locations == nullptr) {
1661 return;
1662 }
1663
1664 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1665 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1666 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1667
1668 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1669 locations->SetInAt(1, Location::RequiresRegister());
1670 }
1671 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1672 locations->SetInAt(3, Location::RequiresRegister());
1673 }
1674 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1675 locations->SetInAt(4, Location::RequiresRegister());
1676 }
1677 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1678 // Temporary register IP cannot be used in
1679 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1680 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1681 // temporary register from the register allocator.
1682 locations->AddTemp(Location::RequiresRegister());
1683 }
1684}
1685
1686static void CheckPosition(ArmVIXLAssembler* assembler,
1687 Location pos,
1688 vixl32::Register input,
1689 Location length,
1690 SlowPathCodeARMVIXL* slow_path,
1691 vixl32::Register temp,
1692 bool length_is_input_length = false) {
1693 // Where is the length in the Array?
1694 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1695
1696 if (pos.IsConstant()) {
1697 int32_t pos_const = Int32ConstantFrom(pos);
1698 if (pos_const == 0) {
1699 if (!length_is_input_length) {
1700 // Check that length(input) >= length.
1701 __ Ldr(temp, MemOperand(input, length_offset));
1702 if (length.IsConstant()) {
1703 __ Cmp(temp, Int32ConstantFrom(length));
1704 } else {
1705 __ Cmp(temp, RegisterFrom(length));
1706 }
1707 __ B(lt, slow_path->GetEntryLabel());
1708 }
1709 } else {
1710 // Check that length(input) >= pos.
1711 __ Ldr(temp, MemOperand(input, length_offset));
1712 __ Subs(temp, temp, pos_const);
1713 __ B(lt, slow_path->GetEntryLabel());
1714
1715 // Check that (length(input) - pos) >= length.
1716 if (length.IsConstant()) {
1717 __ Cmp(temp, Int32ConstantFrom(length));
1718 } else {
1719 __ Cmp(temp, RegisterFrom(length));
1720 }
1721 __ B(lt, slow_path->GetEntryLabel());
1722 }
1723 } else if (length_is_input_length) {
1724 // The only way the copy can succeed is if pos is zero.
1725 vixl32::Register pos_reg = RegisterFrom(pos);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001726 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001727 } else {
1728 // Check that pos >= 0.
1729 vixl32::Register pos_reg = RegisterFrom(pos);
1730 __ Cmp(pos_reg, 0);
1731 __ B(lt, slow_path->GetEntryLabel());
1732
1733 // Check that pos <= length(input).
1734 __ Ldr(temp, MemOperand(input, length_offset));
1735 __ Subs(temp, temp, pos_reg);
1736 __ B(lt, slow_path->GetEntryLabel());
1737
1738 // Check that (length(input) - pos) >= length.
1739 if (length.IsConstant()) {
1740 __ Cmp(temp, Int32ConstantFrom(length));
1741 } else {
1742 __ Cmp(temp, RegisterFrom(length));
1743 }
1744 __ B(lt, slow_path->GetEntryLabel());
1745 }
1746}
1747
1748void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1749 // The only read barrier implementation supporting the
1750 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1751 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1752
1753 ArmVIXLAssembler* assembler = GetAssembler();
1754 LocationSummary* locations = invoke->GetLocations();
1755
1756 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1757 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1758 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1759 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1760 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1761
1762 vixl32::Register src = InputRegisterAt(invoke, 0);
1763 Location src_pos = locations->InAt(1);
1764 vixl32::Register dest = InputRegisterAt(invoke, 2);
1765 Location dest_pos = locations->InAt(3);
1766 Location length = locations->InAt(4);
1767 Location temp1_loc = locations->GetTemp(0);
1768 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1769 Location temp2_loc = locations->GetTemp(1);
1770 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1771 Location temp3_loc = locations->GetTemp(2);
1772 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1773
1774 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1775 codegen_->AddSlowPath(intrinsic_slow_path);
1776
1777 vixl32::Label conditions_on_positions_validated;
1778 SystemArrayCopyOptimizations optimizations(invoke);
1779
1780 // If source and destination are the same, we go to slow path if we need to do
1781 // forward copying.
1782 if (src_pos.IsConstant()) {
1783 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
1784 if (dest_pos.IsConstant()) {
1785 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1786 if (optimizations.GetDestinationIsSource()) {
1787 // Checked when building locations.
1788 DCHECK_GE(src_pos_constant, dest_pos_constant);
1789 } else if (src_pos_constant < dest_pos_constant) {
1790 __ Cmp(src, dest);
1791 __ B(eq, intrinsic_slow_path->GetEntryLabel());
1792 }
1793
1794 // Checked when building locations.
1795 DCHECK(!optimizations.GetDestinationIsSource()
1796 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
1797 } else {
1798 if (!optimizations.GetDestinationIsSource()) {
1799 __ Cmp(src, dest);
1800 __ B(ne, &conditions_on_positions_validated);
1801 }
1802 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
1803 __ B(gt, intrinsic_slow_path->GetEntryLabel());
1804 }
1805 } else {
1806 if (!optimizations.GetDestinationIsSource()) {
1807 __ Cmp(src, dest);
1808 __ B(ne, &conditions_on_positions_validated);
1809 }
1810 if (dest_pos.IsConstant()) {
1811 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
1812 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
1813 } else {
1814 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
1815 }
1816 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1817 }
1818
1819 __ Bind(&conditions_on_positions_validated);
1820
1821 if (!optimizations.GetSourceIsNotNull()) {
1822 // Bail out if the source is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001823 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001824 }
1825
1826 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1827 // Bail out if the destination is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001828 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001829 }
1830
1831 // If the length is negative, bail out.
1832 // We have already checked in the LocationsBuilder for the constant case.
1833 if (!length.IsConstant() &&
1834 !optimizations.GetCountIsSourceLength() &&
1835 !optimizations.GetCountIsDestinationLength()) {
1836 __ Cmp(RegisterFrom(length), 0);
1837 __ B(lt, intrinsic_slow_path->GetEntryLabel());
1838 }
1839
1840 // Validity checks: source.
1841 CheckPosition(assembler,
1842 src_pos,
1843 src,
1844 length,
1845 intrinsic_slow_path,
1846 temp1,
1847 optimizations.GetCountIsSourceLength());
1848
1849 // Validity checks: dest.
1850 CheckPosition(assembler,
1851 dest_pos,
1852 dest,
1853 length,
1854 intrinsic_slow_path,
1855 temp1,
1856 optimizations.GetCountIsDestinationLength());
1857
1858 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1859 // Check whether all elements of the source array are assignable to the component
1860 // type of the destination array. We do two checks: the classes are the same,
1861 // or the destination is Object[]. If none of these checks succeed, we go to the
1862 // slow path.
1863
1864 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1865 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1866 // /* HeapReference<Class> */ temp1 = src->klass_
1867 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1868 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1869 // Bail out if the source is not a non primitive array.
1870 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1871 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1872 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001873 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001874 // If heap poisoning is enabled, `temp1` has been unpoisoned
1875 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1876 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
1877 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
1878 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001879 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001880 }
1881
1882 // /* HeapReference<Class> */ temp1 = dest->klass_
1883 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1884 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
1885
1886 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1887 // Bail out if the destination is not a non primitive array.
1888 //
1889 // Register `temp1` is not trashed by the read barrier emitted
1890 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1891 // method produces a call to a ReadBarrierMarkRegX entry point,
1892 // which saves all potentially live registers, including
1893 // temporaries such a `temp1`.
1894 // /* HeapReference<Class> */ temp2 = temp1->component_type_
1895 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1896 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001897 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001898 // If heap poisoning is enabled, `temp2` has been unpoisoned
1899 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
1900 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
1901 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
1902 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001903 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001904 }
1905
1906 // For the same reason given earlier, `temp1` is not trashed by the
1907 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
1908 // /* HeapReference<Class> */ temp2 = src->klass_
1909 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1910 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
1911 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
1912 __ Cmp(temp1, temp2);
1913
1914 if (optimizations.GetDestinationIsTypedObjectArray()) {
1915 vixl32::Label do_copy;
1916 __ B(eq, &do_copy);
1917 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1918 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1919 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
1920 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1921 // We do not need to emit a read barrier for the following
1922 // heap reference load, as `temp1` is only used in a
1923 // comparison with null below, and this reference is not
1924 // kept afterwards.
1925 __ Ldr(temp1, MemOperand(temp1, super_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001926 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001927 __ Bind(&do_copy);
1928 } else {
1929 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1930 }
1931 } else {
1932 // Non read barrier code.
1933
1934 // /* HeapReference<Class> */ temp1 = dest->klass_
1935 __ Ldr(temp1, MemOperand(dest, class_offset));
1936 // /* HeapReference<Class> */ temp2 = src->klass_
1937 __ Ldr(temp2, MemOperand(src, class_offset));
1938 bool did_unpoison = false;
1939 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
1940 !optimizations.GetSourceIsNonPrimitiveArray()) {
1941 // One or two of the references need to be unpoisoned. Unpoison them
1942 // both to make the identity check valid.
1943 assembler->MaybeUnpoisonHeapReference(temp1);
1944 assembler->MaybeUnpoisonHeapReference(temp2);
1945 did_unpoison = true;
1946 }
1947
1948 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
1949 // Bail out if the destination is not a non primitive array.
1950 // /* HeapReference<Class> */ temp3 = temp1->component_type_
1951 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001952 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001953 assembler->MaybeUnpoisonHeapReference(temp3);
1954 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1955 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1956 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001957 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001958 }
1959
1960 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1961 // Bail out if the source is not a non primitive array.
1962 // /* HeapReference<Class> */ temp3 = temp2->component_type_
1963 __ Ldr(temp3, MemOperand(temp2, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00001964 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001965 assembler->MaybeUnpoisonHeapReference(temp3);
1966 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
1967 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
1968 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001969 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001970 }
1971
1972 __ Cmp(temp1, temp2);
1973
1974 if (optimizations.GetDestinationIsTypedObjectArray()) {
1975 vixl32::Label do_copy;
1976 __ B(eq, &do_copy);
1977 if (!did_unpoison) {
1978 assembler->MaybeUnpoisonHeapReference(temp1);
1979 }
1980 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1981 __ Ldr(temp1, MemOperand(temp1, component_offset));
1982 assembler->MaybeUnpoisonHeapReference(temp1);
1983 // /* HeapReference<Class> */ temp1 = temp1->super_class_
1984 __ Ldr(temp1, MemOperand(temp1, super_offset));
1985 // No need to unpoison the result, we're comparing against null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001986 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001987 __ Bind(&do_copy);
1988 } else {
1989 __ B(ne, intrinsic_slow_path->GetEntryLabel());
1990 }
1991 }
1992 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1993 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1994 // Bail out if the source is not a non primitive array.
1995 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1996 // /* HeapReference<Class> */ temp1 = src->klass_
1997 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1998 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
1999 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2000 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2001 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002002 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002003 // If heap poisoning is enabled, `temp3` has been unpoisoned
2004 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2005 } else {
2006 // /* HeapReference<Class> */ temp1 = src->klass_
2007 __ Ldr(temp1, MemOperand(src, class_offset));
2008 assembler->MaybeUnpoisonHeapReference(temp1);
2009 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2010 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002011 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002012 assembler->MaybeUnpoisonHeapReference(temp3);
2013 }
2014 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2015 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2016 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002017 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002018 }
2019
2020 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
2021 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
2022 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
2023
2024 // Compute the base source address in `temp1`.
2025 if (src_pos.IsConstant()) {
2026 int32_t constant = Int32ConstantFrom(src_pos);
2027 __ Add(temp1, src, element_size * constant + offset);
2028 } else {
2029 __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
2030 __ Add(temp1, temp1, offset);
2031 }
2032
2033 // Compute the end source address in `temp3`.
2034 if (length.IsConstant()) {
2035 int32_t constant = Int32ConstantFrom(length);
2036 __ Add(temp3, temp1, element_size * constant);
2037 } else {
2038 __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
2039 }
2040
2041 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2042 // The base destination address is computed later, as `temp2` is
2043 // used for intermediate computations.
2044
2045 // SystemArrayCopy implementation for Baker read barriers (see
2046 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2047 //
2048 // if (src_ptr != end_ptr) {
2049 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2050 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002051 // bool is_gray = (rb_state == ReadBarrier::GrayState());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002052 // if (is_gray) {
2053 // // Slow-path copy.
2054 // do {
2055 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2056 // } while (src_ptr != end_ptr)
2057 // } else {
2058 // // Fast-path copy.
2059 // do {
2060 // *dest_ptr++ = *src_ptr++;
2061 // } while (src_ptr != end_ptr)
2062 // }
2063 // }
2064
2065 vixl32::Label loop, done;
2066
2067 // Don't enter copy loop if `length == 0`.
2068 __ Cmp(temp1, temp3);
2069 __ B(eq, &done);
2070
2071 // /* int32_t */ monitor = src->monitor_
2072 __ Ldr(temp2, MemOperand(src, monitor_offset));
2073 // /* LockWord */ lock_word = LockWord(monitor)
2074 static_assert(sizeof(LockWord) == sizeof(int32_t),
2075 "art::LockWord and int32_t have different sizes.");
2076
2077 // Introduce a dependency on the lock_word including the rb_state,
2078 // which shall prevent load-load reordering without using
2079 // a memory barrier (which would be more expensive).
2080 // `src` is unchanged by this operation, but its value now depends
2081 // on `temp2`.
2082 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2083
2084 // Slow path used to copy array when `src` is gray.
2085 SlowPathCodeARMVIXL* read_barrier_slow_path =
2086 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2087 codegen_->AddSlowPath(read_barrier_slow_path);
2088
2089 // Given the numeric representation, it's enough to check the low bit of the
2090 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2091 // which can be a 16-bit instruction unlike the TST immediate.
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002092 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2093 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
Anton Kirilov5ec62182016-10-13 20:16:02 +01002094 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2095 // Carry flag is the last bit shifted out by LSRS.
2096 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2097
2098 // Fast-path copy.
2099
2100 // Compute the base destination address in `temp2`.
2101 if (dest_pos.IsConstant()) {
2102 int32_t constant = Int32ConstantFrom(dest_pos);
2103 __ Add(temp2, dest, element_size * constant + offset);
2104 } else {
2105 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2106 __ Add(temp2, temp2, offset);
2107 }
2108
2109 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2110 // poison/unpoison.
2111 __ Bind(&loop);
2112
2113 {
2114 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2115 const vixl32::Register temp_reg = temps.Acquire();
2116
2117 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2118 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2119 }
2120
2121 __ Cmp(temp1, temp3);
2122 __ B(ne, &loop);
2123
2124 __ Bind(read_barrier_slow_path->GetExitLabel());
2125 __ Bind(&done);
2126 } else {
2127 // Non read barrier code.
2128
2129 // Compute the base destination address in `temp2`.
2130 if (dest_pos.IsConstant()) {
2131 int32_t constant = Int32ConstantFrom(dest_pos);
2132 __ Add(temp2, dest, element_size * constant + offset);
2133 } else {
2134 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2135 __ Add(temp2, temp2, offset);
2136 }
2137
2138 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2139 // poison/unpoison.
2140 vixl32::Label loop, done;
2141 __ Cmp(temp1, temp3);
2142 __ B(eq, &done);
2143 __ Bind(&loop);
2144
2145 {
2146 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2147 const vixl32::Register temp_reg = temps.Acquire();
2148
2149 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2150 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2151 }
2152
2153 __ Cmp(temp1, temp3);
2154 __ B(ne, &loop);
2155 __ Bind(&done);
2156 }
2157
2158 // We only need one card marking on the destination array.
2159 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2160
2161 __ Bind(intrinsic_slow_path->GetExitLabel());
2162}
2163
2164static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2165 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2166 // the code generator. Furthermore, the register allocator creates fixed live intervals
2167 // for all caller-saved registers because we are doing a function call. As a result, if
2168 // the input and output locations are unallocated, the register allocator runs out of
2169 // registers and fails; however, a debuggable graph is not the common case.
2170 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2171 return;
2172 }
2173
2174 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2175 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2176 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2177
2178 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2179 LocationSummary::kCallOnMainOnly,
2180 kIntrinsified);
2181 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2182
2183 locations->SetInAt(0, Location::RequiresFpuRegister());
2184 locations->SetOut(Location::RequiresFpuRegister());
2185 // Native code uses the soft float ABI.
2186 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2187 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2188}
2189
2190static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2191 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2192 // the code generator. Furthermore, the register allocator creates fixed live intervals
2193 // for all caller-saved registers because we are doing a function call. As a result, if
2194 // the input and output locations are unallocated, the register allocator runs out of
2195 // registers and fails; however, a debuggable graph is not the common case.
2196 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2197 return;
2198 }
2199
2200 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2201 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2202 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2203 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2204
2205 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2206 LocationSummary::kCallOnMainOnly,
2207 kIntrinsified);
2208 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2209
2210 locations->SetInAt(0, Location::RequiresFpuRegister());
2211 locations->SetInAt(1, Location::RequiresFpuRegister());
2212 locations->SetOut(Location::RequiresFpuRegister());
2213 // Native code uses the soft float ABI.
2214 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2215 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2216 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2217 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2218}
2219
2220static void GenFPToFPCall(HInvoke* invoke,
2221 ArmVIXLAssembler* assembler,
2222 CodeGeneratorARMVIXL* codegen,
2223 QuickEntrypointEnum entry) {
2224 LocationSummary* const locations = invoke->GetLocations();
2225
2226 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2227 DCHECK(locations->WillCall() && locations->Intrinsified());
2228
2229 // Native code uses the soft float ABI.
2230 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2231 RegisterFrom(locations->GetTemp(1)),
2232 InputDRegisterAt(invoke, 0));
2233 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2234 __ Vmov(OutputDRegister(invoke),
2235 RegisterFrom(locations->GetTemp(0)),
2236 RegisterFrom(locations->GetTemp(1)));
2237}
2238
2239static void GenFPFPToFPCall(HInvoke* invoke,
2240 ArmVIXLAssembler* assembler,
2241 CodeGeneratorARMVIXL* codegen,
2242 QuickEntrypointEnum entry) {
2243 LocationSummary* const locations = invoke->GetLocations();
2244
2245 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2246 DCHECK(locations->WillCall() && locations->Intrinsified());
2247
2248 // Native code uses the soft float ABI.
2249 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2250 RegisterFrom(locations->GetTemp(1)),
2251 InputDRegisterAt(invoke, 0));
2252 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2253 RegisterFrom(locations->GetTemp(3)),
2254 InputDRegisterAt(invoke, 1));
2255 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2256 __ Vmov(OutputDRegister(invoke),
2257 RegisterFrom(locations->GetTemp(0)),
2258 RegisterFrom(locations->GetTemp(1)));
2259}
2260
2261void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2262 CreateFPToFPCallLocations(arena_, invoke);
2263}
2264
2265void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2266 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2267}
2268
2269void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2270 CreateFPToFPCallLocations(arena_, invoke);
2271}
2272
2273void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2274 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2275}
2276
2277void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2278 CreateFPToFPCallLocations(arena_, invoke);
2279}
2280
2281void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2282 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2283}
2284
2285void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2286 CreateFPToFPCallLocations(arena_, invoke);
2287}
2288
2289void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2290 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2291}
2292
2293void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2294 CreateFPToFPCallLocations(arena_, invoke);
2295}
2296
2297void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2298 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2299}
2300
2301void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2302 CreateFPToFPCallLocations(arena_, invoke);
2303}
2304
2305void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2306 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2307}
2308
2309void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2310 CreateFPToFPCallLocations(arena_, invoke);
2311}
2312
2313void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2314 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2315}
2316
2317void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2318 CreateFPToFPCallLocations(arena_, invoke);
2319}
2320
2321void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2322 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2323}
2324
2325void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2326 CreateFPToFPCallLocations(arena_, invoke);
2327}
2328
2329void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2330 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2331}
2332
2333void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2334 CreateFPToFPCallLocations(arena_, invoke);
2335}
2336
2337void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2338 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2339}
2340
2341void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2342 CreateFPToFPCallLocations(arena_, invoke);
2343}
2344
2345void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2346 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2347}
2348
2349void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2350 CreateFPToFPCallLocations(arena_, invoke);
2351}
2352
2353void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2354 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2355}
2356
2357void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2358 CreateFPToFPCallLocations(arena_, invoke);
2359}
2360
2361void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2362 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2363}
2364
2365void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2366 CreateFPToFPCallLocations(arena_, invoke);
2367}
2368
2369void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2370 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2371}
2372
2373void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2374 CreateFPFPToFPCallLocations(arena_, invoke);
2375}
2376
2377void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2378 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2379}
2380
2381void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2382 CreateFPFPToFPCallLocations(arena_, invoke);
2383}
2384
2385void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2386 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2387}
2388
2389void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2390 CreateFPFPToFPCallLocations(arena_, invoke);
2391}
2392
2393void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2394 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2395}
2396
2397void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2398 CreateIntToIntLocations(arena_, invoke);
2399}
2400
2401void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2402 ArmVIXLAssembler* assembler = GetAssembler();
2403 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2404}
2405
2406void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2407 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2408 LocationSummary::kNoCall,
2409 kIntrinsified);
2410 locations->SetInAt(0, Location::RequiresRegister());
2411 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2412}
2413
2414void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2415 ArmVIXLAssembler* assembler = GetAssembler();
2416 LocationSummary* locations = invoke->GetLocations();
2417
2418 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2419 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2420 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2421 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2422
2423 __ Rbit(out_reg_lo, in_reg_hi);
2424 __ Rbit(out_reg_hi, in_reg_lo);
2425}
2426
2427void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2428 CreateIntToIntLocations(arena_, invoke);
2429}
2430
2431void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2432 ArmVIXLAssembler* assembler = GetAssembler();
2433 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2434}
2435
2436void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2437 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2438 LocationSummary::kNoCall,
2439 kIntrinsified);
2440 locations->SetInAt(0, Location::RequiresRegister());
2441 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2442}
2443
2444void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2445 ArmVIXLAssembler* assembler = GetAssembler();
2446 LocationSummary* locations = invoke->GetLocations();
2447
2448 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2449 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2450 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2451 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2452
2453 __ Rev(out_reg_lo, in_reg_hi);
2454 __ Rev(out_reg_hi, in_reg_lo);
2455}
2456
2457void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2458 CreateIntToIntLocations(arena_, invoke);
2459}
2460
2461void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2462 ArmVIXLAssembler* assembler = GetAssembler();
2463 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2464}
2465
2466static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
2467 DCHECK(Primitive::IsIntOrLongType(type)) << type;
2468 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2469 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2470
2471 bool is_long = type == Primitive::kPrimLong;
2472 LocationSummary* locations = instr->GetLocations();
2473 Location in = locations->InAt(0);
2474 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2475 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2476 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2477 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2478 vixl32::Register out_r = OutputRegister(instr);
2479
2480 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2481 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2482 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2483 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2484 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2485 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2486 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2487 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2488 if (is_long) {
2489 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2490 }
2491 __ Vmov(out_r, tmp_s);
2492}
2493
2494void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2495 CreateIntToIntLocations(arena_, invoke);
2496 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2497}
2498
2499void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2500 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2501}
2502
2503void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2504 VisitIntegerBitCount(invoke);
2505}
2506
2507void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2508 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2509}
2510
2511void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2512 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2513 LocationSummary::kNoCall,
2514 kIntrinsified);
2515 locations->SetInAt(0, Location::RequiresRegister());
2516 locations->SetInAt(1, Location::RequiresRegister());
2517 locations->SetInAt(2, Location::RequiresRegister());
2518 locations->SetInAt(3, Location::RequiresRegister());
2519 locations->SetInAt(4, Location::RequiresRegister());
2520
2521 // Temporary registers to store lengths of strings and for calculations.
2522 locations->AddTemp(Location::RequiresRegister());
2523 locations->AddTemp(Location::RequiresRegister());
2524 locations->AddTemp(Location::RequiresRegister());
2525}
2526
2527void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2528 ArmVIXLAssembler* assembler = GetAssembler();
2529 LocationSummary* locations = invoke->GetLocations();
2530
2531 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2532 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2533 DCHECK_EQ(char_size, 2u);
2534
2535 // Location of data in char array buffer.
2536 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2537
2538 // Location of char array data in string.
2539 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2540
2541 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2542 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2543 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2544 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2545 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2546 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2547 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2548
2549 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2550 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2551 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2552
2553 vixl32::Label done, compressed_string_loop;
2554 // dst to be copied.
2555 __ Add(dst_ptr, dstObj, data_offset);
2556 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2557
2558 __ Subs(num_chr, srcEnd, srcBegin);
2559 // Early out for valid zero-length retrievals.
2560 __ B(eq, &done);
2561
2562 // src range to copy.
2563 __ Add(src_ptr, srcObj, value_offset);
2564
2565 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2566 vixl32::Register temp;
2567 vixl32::Label compressed_string_preloop;
2568 if (mirror::kUseStringCompression) {
2569 // Location of count in string.
2570 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2571 temp = temps.Acquire();
2572 // String's length.
2573 __ Ldr(temp, MemOperand(srcObj, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002574 __ Tst(temp, 1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002575 temps.Release(temp);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002576 __ B(eq, &compressed_string_preloop);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002577 }
2578 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2579
2580 // Do the copy.
2581 vixl32::Label loop, remainder;
2582
2583 temp = temps.Acquire();
2584 // Save repairing the value of num_chr on the < 4 character path.
2585 __ Subs(temp, num_chr, 4);
2586 __ B(lt, &remainder);
2587
2588 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2589 __ Mov(num_chr, temp);
2590
2591 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2592 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2593 // to rectify these everywhere this intrinsic applies.)
2594 __ Bind(&loop);
2595 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2596 __ Subs(num_chr, num_chr, 4);
2597 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2598 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2599 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2600 temps.Release(temp);
2601 __ B(ge, &loop);
2602
2603 __ Adds(num_chr, num_chr, 4);
2604 __ B(eq, &done);
2605
2606 // Main loop for < 4 character case and remainder handling. Loads and stores one
2607 // 16-bit Java character at a time.
2608 __ Bind(&remainder);
2609 temp = temps.Acquire();
2610 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2611 __ Subs(num_chr, num_chr, 1);
2612 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2613 temps.Release(temp);
2614 __ B(gt, &remainder);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002615
2616 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002617 __ B(&done);
2618
Anton Kirilov5ec62182016-10-13 20:16:02 +01002619 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2620 DCHECK_EQ(c_char_size, 1u);
2621 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2622 __ Bind(&compressed_string_preloop);
2623 __ Add(src_ptr, src_ptr, srcBegin);
2624 __ Bind(&compressed_string_loop);
2625 temp = temps.Acquire();
2626 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2627 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2628 temps.Release(temp);
2629 __ Subs(num_chr, num_chr, 1);
2630 __ B(gt, &compressed_string_loop);
2631 }
2632
2633 __ Bind(&done);
2634}
2635
2636void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2637 CreateFPToIntLocations(arena_, invoke);
2638}
2639
2640void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2641 ArmVIXLAssembler* const assembler = GetAssembler();
2642 const vixl32::Register out = OutputRegister(invoke);
2643 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2644 // we don't care about the sign bit anyway.
2645 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2646
2647 __ Vmov(out, InputSRegisterAt(invoke, 0));
2648 // We don't care about the sign bit, so shift left.
2649 __ Lsl(out, out, 1);
2650 __ Eor(out, out, infinity);
2651 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2652 __ Clz(out, out);
2653 // Any number less than 32 logically shifted right by 5 bits results in 0;
2654 // the same operation on 32 yields 1.
2655 __ Lsr(out, out, 5);
2656}
2657
2658void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2659 CreateFPToIntLocations(arena_, invoke);
2660}
2661
2662void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2663 ArmVIXLAssembler* const assembler = GetAssembler();
2664 const vixl32::Register out = OutputRegister(invoke);
2665 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2666 const vixl32::Register temp = temps.Acquire();
2667 // The highest 32 bits of double precision positive infinity separated into
2668 // two constants encodable as immediate operands.
2669 constexpr uint32_t infinity_high = 0x7f000000U;
2670 constexpr uint32_t infinity_high2 = 0x00f00000U;
2671
2672 static_assert((infinity_high | infinity_high2) ==
2673 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2674 "The constants do not add up to the high 32 bits of double "
2675 "precision positive infinity.");
2676 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2677 __ Eor(out, out, infinity_high);
2678 __ Eor(out, out, infinity_high2);
2679 // We don't care about the sign bit, so shift left.
2680 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2681 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2682 __ Clz(out, out);
2683 // Any number less than 32 logically shifted right by 5 bits results in 0;
2684 // the same operation on 32 yields 1.
2685 __ Lsr(out, out, 5);
2686}
2687
2688UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
2689UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
2690UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
2691UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
2692UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
2693UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
2694UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil) // Could be done by changing rounding mode, maybe?
2695UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor) // Could be done by changing rounding mode, maybe?
2696UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
2697UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
2698UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
2699UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
2700UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
2701UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
2702UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
2703UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
2704UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
2705UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
2706
Aart Bikff7d89c2016-11-07 08:49:28 -08002707UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
2708UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
2709
Anton Kirilov5ec62182016-10-13 20:16:02 +01002710// 1.8.
2711UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
2712UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
2713UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
2714UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
2715UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
2716
2717UNREACHABLE_INTRINSICS(ARMVIXL)
2718
2719#undef __
2720
2721} // namespace arm
2722} // namespace art