blob: 60bcf2cfd54afb6bc4151d422df28ff1969c42dc [file] [log] [blame]
Anton Kirilov5ec62182016-10-13 20:16:02 +01001/*
2 * Copyright (C) 2016 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_arm_vixl.h"
18
19#include "arch/arm/instruction_set_features_arm.h"
20#include "code_generator_arm_vixl.h"
21#include "common_arm.h"
22#include "lock_word.h"
23#include "mirror/array-inl.h"
24
25#include "aarch32/constants-aarch32.h"
26
27namespace art {
28namespace arm {
29
30#define __ assembler->GetVIXLAssembler()->
31
32using helpers::DRegisterFrom;
33using helpers::HighRegisterFrom;
34using helpers::InputDRegisterAt;
35using helpers::InputRegisterAt;
36using helpers::InputSRegisterAt;
37using helpers::InputVRegisterAt;
38using helpers::Int32ConstantFrom;
39using helpers::LocationFrom;
40using helpers::LowRegisterFrom;
41using helpers::LowSRegisterFrom;
42using helpers::OutputDRegister;
xueliang.zhongc032e742016-03-28 16:44:32 +010043using helpers::OutputSRegister;
Anton Kirilov5ec62182016-10-13 20:16:02 +010044using helpers::OutputRegister;
45using helpers::OutputVRegister;
46using helpers::RegisterFrom;
47using helpers::SRegisterFrom;
xueliang.zhongc032e742016-03-28 16:44:32 +010048using helpers::DRegisterFromS;
Anton Kirilov5ec62182016-10-13 20:16:02 +010049
50using namespace vixl::aarch32; // NOLINT(build/namespaces)
51
Artem Serov0fb37192016-12-06 18:13:40 +000052using vixl::ExactAssemblyScope;
53using vixl::CodeBufferCheckScope;
54
Anton Kirilov5ec62182016-10-13 20:16:02 +010055ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
56 return codegen_->GetAssembler();
57}
58
59ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
60 return codegen_->GetGraph()->GetArena();
61}
62
63// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
64// intrinsified call. This will copy the arguments into the positions for a regular call.
65//
66// Note: The actual parameters are required to be in the locations given by the invoke's location
67// summary. If an intrinsic modifies those locations before a slowpath call, they must be
68// restored!
69//
70// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
71// sub-optimal (compared to a direct pointer call), but this is a slow-path.
72
73class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
74 public:
75 explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
76 : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
77
78 Location MoveArguments(CodeGenerator* codegen) {
Artem Serovd4cc5b22016-11-04 11:19:09 +000079 InvokeDexCallingConventionVisitorARMVIXL calling_convention_visitor;
Anton Kirilov5ec62182016-10-13 20:16:02 +010080 IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
81 return calling_convention_visitor.GetMethodLocation();
82 }
83
84 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
85 ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
86 __ Bind(GetEntryLabel());
87
88 SaveLiveRegisters(codegen, invoke_->GetLocations());
89
90 Location method_loc = MoveArguments(codegen);
91
92 if (invoke_->IsInvokeStaticOrDirect()) {
93 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
94 } else {
95 codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
96 }
97 codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
98
99 // Copy the result back to the expected output.
100 Location out = invoke_->GetLocations()->Out();
101 if (out.IsValid()) {
102 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
103 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
104 codegen->MoveFromReturnRegister(out, invoke_->GetType());
105 }
106
107 RestoreLiveRegisters(codegen, invoke_->GetLocations());
108 __ B(GetExitLabel());
109 }
110
111 const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
112
113 private:
114 // The instruction where this slow path is happening.
115 HInvoke* const invoke_;
116
117 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
118};
119
120// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
121class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
122 public:
123 explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
124 : SlowPathCodeARMVIXL(instruction) {
125 DCHECK(kEmitCompilerReadBarrier);
126 DCHECK(kUseBakerReadBarrier);
127 }
128
129 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
130 CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
131 ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
132 LocationSummary* locations = instruction_->GetLocations();
133 DCHECK(locations->CanCall());
134 DCHECK(instruction_->IsInvokeStaticOrDirect())
135 << "Unexpected instruction in read barrier arraycopy slow path: "
136 << instruction_->DebugName();
137 DCHECK(instruction_->GetLocations()->Intrinsified());
138 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
139
140 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
141 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
142 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
143
144 vixl32::Register dest = InputRegisterAt(instruction_, 2);
145 Location dest_pos = locations->InAt(3);
146 vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
147 vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
148 vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
149 vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
150
151 __ Bind(GetEntryLabel());
152 // Compute the base destination address in `dst_curr_addr`.
153 if (dest_pos.IsConstant()) {
154 int32_t constant = Int32ConstantFrom(dest_pos);
155 __ Add(dst_curr_addr, dest, element_size * constant + offset);
156 } else {
157 __ Add(dst_curr_addr,
158 dest,
159 Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
160 __ Add(dst_curr_addr, dst_curr_addr, offset);
161 }
162
163 vixl32::Label loop;
164 __ Bind(&loop);
165 __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
166 assembler->MaybeUnpoisonHeapReference(tmp);
167 // TODO: Inline the mark bit check before calling the runtime?
168 // tmp = ReadBarrier::Mark(tmp);
169 // No need to save live registers; it's taken care of by the
170 // entrypoint. Also, there is no need to update the stack mask,
171 // as this runtime call will not trigger a garbage collection.
172 // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
173 // explanations.)
174 DCHECK(!tmp.IsSP());
175 DCHECK(!tmp.IsLR());
176 DCHECK(!tmp.IsPC());
177 // IP is used internally by the ReadBarrierMarkRegX entry point
178 // as a temporary (and not preserved). It thus cannot be used by
179 // any live register in this slow path.
180 DCHECK(!src_curr_addr.Is(ip));
181 DCHECK(!dst_curr_addr.Is(ip));
182 DCHECK(!src_stop_addr.Is(ip));
183 DCHECK(!tmp.Is(ip));
184 DCHECK(tmp.IsRegister()) << tmp;
185 int32_t entry_point_offset =
186 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
187 // This runtime call does not require a stack map.
188 arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
189 assembler->MaybePoisonHeapReference(tmp);
190 __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
191 __ Cmp(src_curr_addr, src_stop_addr);
Artem Serov517d9f62016-12-12 15:51:15 +0000192 __ B(ne, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100193 __ B(GetExitLabel());
194 }
195
196 const char* GetDescription() const OVERRIDE {
197 return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
198 }
199
200 private:
201 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
202};
203
204IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
205 : arena_(codegen->GetGraph()->GetArena()),
Nicolas Geoffray331605a2017-03-01 11:01:41 +0000206 codegen_(codegen),
Anton Kirilov5ec62182016-10-13 20:16:02 +0100207 assembler_(codegen->GetAssembler()),
208 features_(codegen->GetInstructionSetFeatures()) {}
209
210bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
211 Dispatch(invoke);
212 LocationSummary* res = invoke->GetLocations();
213 if (res == nullptr) {
214 return false;
215 }
216 return res->Intrinsified();
217}
218
219static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
220 LocationSummary* locations = new (arena) LocationSummary(invoke,
221 LocationSummary::kNoCall,
222 kIntrinsified);
223 locations->SetInAt(0, Location::RequiresFpuRegister());
224 locations->SetOut(Location::RequiresRegister());
225}
226
227static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
228 LocationSummary* locations = new (arena) LocationSummary(invoke,
229 LocationSummary::kNoCall,
230 kIntrinsified);
231 locations->SetInAt(0, Location::RequiresRegister());
232 locations->SetOut(Location::RequiresFpuRegister());
233}
234
235static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
236 Location input = locations->InAt(0);
237 Location output = locations->Out();
238 if (is64bit) {
239 __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
240 } else {
241 __ Vmov(RegisterFrom(output), SRegisterFrom(input));
242 }
243}
244
245static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
246 Location input = locations->InAt(0);
247 Location output = locations->Out();
248 if (is64bit) {
249 __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
250 } else {
251 __ Vmov(SRegisterFrom(output), RegisterFrom(input));
252 }
253}
254
255void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
256 CreateFPToIntLocations(arena_, invoke);
257}
258void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
259 CreateIntToFPLocations(arena_, invoke);
260}
261
262void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
263 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
264}
265void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
266 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
267}
268
269void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
270 CreateFPToIntLocations(arena_, invoke);
271}
272void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
273 CreateIntToFPLocations(arena_, invoke);
274}
275
276void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
277 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
278}
279void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
280 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
281}
282
283static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
284 LocationSummary* locations = new (arena) LocationSummary(invoke,
285 LocationSummary::kNoCall,
286 kIntrinsified);
287 locations->SetInAt(0, Location::RequiresRegister());
288 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
289}
290
291static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
292 LocationSummary* locations = new (arena) LocationSummary(invoke,
293 LocationSummary::kNoCall,
294 kIntrinsified);
295 locations->SetInAt(0, Location::RequiresFpuRegister());
296 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
297}
298
299static void GenNumberOfLeadingZeros(LocationSummary* locations,
300 Primitive::Type type,
301 ArmVIXLAssembler* assembler) {
302 Location in = locations->InAt(0);
303 vixl32::Register out = RegisterFrom(locations->Out());
304
305 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
306
307 if (type == Primitive::kPrimLong) {
308 vixl32::Register in_reg_lo = LowRegisterFrom(in);
309 vixl32::Register in_reg_hi = HighRegisterFrom(in);
310 vixl32::Label end;
311 __ Clz(out, in_reg_hi);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000312 __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100313 __ Clz(out, in_reg_lo);
314 __ Add(out, out, 32);
315 __ Bind(&end);
316 } else {
317 __ Clz(out, RegisterFrom(in));
318 }
319}
320
321void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
322 CreateIntToIntLocations(arena_, invoke);
323}
324
325void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
326 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
327}
328
329void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
330 LocationSummary* locations = new (arena_) LocationSummary(invoke,
331 LocationSummary::kNoCall,
332 kIntrinsified);
333 locations->SetInAt(0, Location::RequiresRegister());
334 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
335}
336
337void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
338 GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
339}
340
341static void GenNumberOfTrailingZeros(LocationSummary* locations,
342 Primitive::Type type,
343 ArmVIXLAssembler* assembler) {
344 DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
345
346 vixl32::Register out = RegisterFrom(locations->Out());
347
348 if (type == Primitive::kPrimLong) {
349 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
350 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
351 vixl32::Label end;
352 __ Rbit(out, in_reg_lo);
353 __ Clz(out, out);
xueliang.zhongf51bc622016-11-04 09:23:32 +0000354 __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100355 __ Rbit(out, in_reg_hi);
356 __ Clz(out, out);
357 __ Add(out, out, 32);
358 __ Bind(&end);
359 } else {
360 vixl32::Register in = RegisterFrom(locations->InAt(0));
361 __ Rbit(out, in);
362 __ Clz(out, out);
363 }
364}
365
366void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
367 LocationSummary* locations = new (arena_) LocationSummary(invoke,
368 LocationSummary::kNoCall,
369 kIntrinsified);
370 locations->SetInAt(0, Location::RequiresRegister());
371 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
372}
373
374void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
375 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
376}
377
378void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
379 LocationSummary* locations = new (arena_) LocationSummary(invoke,
380 LocationSummary::kNoCall,
381 kIntrinsified);
382 locations->SetInAt(0, Location::RequiresRegister());
383 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
384}
385
386void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
387 GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
388}
389
390static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
391 __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
392}
393
394void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
395 CreateFPToFPLocations(arena_, invoke);
396}
397
398void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
399 MathAbsFP(invoke, GetAssembler());
400}
401
402void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
403 CreateFPToFPLocations(arena_, invoke);
404}
405
406void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
407 MathAbsFP(invoke, GetAssembler());
408}
409
410static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
411 LocationSummary* locations = new (arena) LocationSummary(invoke,
412 LocationSummary::kNoCall,
413 kIntrinsified);
414 locations->SetInAt(0, Location::RequiresRegister());
415 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
416
417 locations->AddTemp(Location::RequiresRegister());
418}
419
420static void GenAbsInteger(LocationSummary* locations,
421 bool is64bit,
422 ArmVIXLAssembler* assembler) {
423 Location in = locations->InAt(0);
424 Location output = locations->Out();
425
426 vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
427
428 if (is64bit) {
429 vixl32::Register in_reg_lo = LowRegisterFrom(in);
430 vixl32::Register in_reg_hi = HighRegisterFrom(in);
431 vixl32::Register out_reg_lo = LowRegisterFrom(output);
432 vixl32::Register out_reg_hi = HighRegisterFrom(output);
433
434 DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
435
436 __ Asr(mask, in_reg_hi, 31);
437 __ Adds(out_reg_lo, in_reg_lo, mask);
438 __ Adc(out_reg_hi, in_reg_hi, mask);
439 __ Eor(out_reg_lo, mask, out_reg_lo);
440 __ Eor(out_reg_hi, mask, out_reg_hi);
441 } else {
442 vixl32::Register in_reg = RegisterFrom(in);
443 vixl32::Register out_reg = RegisterFrom(output);
444
445 __ Asr(mask, in_reg, 31);
446 __ Add(out_reg, in_reg, mask);
447 __ Eor(out_reg, mask, out_reg);
448 }
449}
450
451void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
452 CreateIntToIntPlusTemp(arena_, invoke);
453}
454
455void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
456 GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
457}
458
459
460void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
461 CreateIntToIntPlusTemp(arena_, invoke);
462}
463
464void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
465 GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
466}
467
xueliang.zhongc032e742016-03-28 16:44:32 +0100468static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
469 Location op1_loc = invoke->GetLocations()->InAt(0);
470 Location op2_loc = invoke->GetLocations()->InAt(1);
471 Location out_loc = invoke->GetLocations()->Out();
472
473 // Optimization: don't generate any code if inputs are the same.
474 if (op1_loc.Equals(op2_loc)) {
475 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
476 return;
477 }
478
479 vixl32::SRegister op1 = SRegisterFrom(op1_loc);
480 vixl32::SRegister op2 = SRegisterFrom(op2_loc);
481 vixl32::SRegister out = OutputSRegister(invoke);
482 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
483 const vixl32::Register temp1 = temps.Acquire();
484 vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
485 vixl32::Label nan, done;
486
487 DCHECK(op1.Is(out));
488
489 __ Vcmp(op1, op2);
490 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
491 __ B(vs, &nan, /* far_target */ false); // if un-ordered, go to NaN handling.
492
493 // op1 <> op2
494 vixl32::ConditionType cond = is_min ? gt : lt;
495 {
496 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
497 2 * kMaxInstructionSizeInBytes,
498 CodeBufferCheckScope::kMaximumSize);
499 __ it(cond);
500 __ vmov(cond, F32, out, op2);
501 }
502 __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
503
504 // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
505 __ Vmov(temp1, op1);
506 __ Vmov(temp2, op2);
507 if (is_min) {
508 __ Orr(temp1, temp1, temp2);
509 } else {
510 __ And(temp1, temp1, temp2);
511 }
512 __ Vmov(out, temp1);
513 __ B(&done);
514
515 // handle NaN input.
516 __ Bind(&nan);
517 __ Movt(temp1, High16Bits(kNanFloat)); // 0x7FC0xxxx is a NaN.
518 __ Vmov(out, temp1);
519
520 __ Bind(&done);
521}
522
523static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
524 LocationSummary* locations = new (arena) LocationSummary(invoke,
525 LocationSummary::kNoCall,
526 kIntrinsified);
527 locations->SetInAt(0, Location::RequiresFpuRegister());
528 locations->SetInAt(1, Location::RequiresFpuRegister());
529 locations->SetOut(Location::SameAsFirstInput());
530}
531
532void IntrinsicLocationsBuilderARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
533 CreateFPFPToFPLocations(arena_, invoke);
534 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
535}
536
537void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
538 GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler());
539}
540
541void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
542 CreateFPFPToFPLocations(arena_, invoke);
543 invoke->GetLocations()->AddTemp(Location::RequiresRegister());
544}
545
546void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
547 GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler());
548}
549
550static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
551 Location op1_loc = invoke->GetLocations()->InAt(0);
552 Location op2_loc = invoke->GetLocations()->InAt(1);
553 Location out_loc = invoke->GetLocations()->Out();
554
555 // Optimization: don't generate any code if inputs are the same.
556 if (op1_loc.Equals(op2_loc)) {
557 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in.
558 return;
559 }
560
561 vixl32::DRegister op1 = DRegisterFrom(op1_loc);
562 vixl32::DRegister op2 = DRegisterFrom(op2_loc);
563 vixl32::DRegister out = OutputDRegister(invoke);
564 vixl32::Label handle_nan_eq, done;
565
566 DCHECK(op1.Is(out));
567
568 __ Vcmp(op1, op2);
569 __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
570 __ B(vs, &handle_nan_eq, /* far_target */ false); // if un-ordered, go to NaN handling.
571
572 // op1 <> op2
573 vixl32::ConditionType cond = is_min ? gt : lt;
574 {
575 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
576 2 * kMaxInstructionSizeInBytes,
577 CodeBufferCheckScope::kMaximumSize);
578 __ it(cond);
579 __ vmov(cond, F64, out, op2);
580 }
581 __ B(ne, &done, /* far_target */ false); // for <>(not equal), we've done min/max calculation.
582
583 // handle op1 == op2, max(+0.0,-0.0).
584 if (!is_min) {
585 __ Vand(F64, out, op1, op2);
586 __ B(&done);
587 }
588
589 // handle op1 == op2, min(+0.0,-0.0), NaN input.
590 __ Bind(&handle_nan_eq);
591 __ Vorr(F64, out, op1, op2); // assemble op1/-0.0/NaN.
592
593 __ Bind(&done);
594}
595
596void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
597 CreateFPFPToFPLocations(arena_, invoke);
598}
599
600void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
601 GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler());
602}
603
604void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
605 CreateFPFPToFPLocations(arena_, invoke);
606}
607
608void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
609 GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler());
610}
611
612static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
613 Location op1_loc = invoke->GetLocations()->InAt(0);
614 Location op2_loc = invoke->GetLocations()->InAt(1);
615 Location out_loc = invoke->GetLocations()->Out();
616
617 // Optimization: don't generate any code if inputs are the same.
618 if (op1_loc.Equals(op2_loc)) {
619 DCHECK(out_loc.Equals(op1_loc)); // out_loc is set as SameAsFirstInput() in location builder.
620 return;
621 }
622
623 vixl32::Register op1_lo = LowRegisterFrom(op1_loc);
624 vixl32::Register op1_hi = HighRegisterFrom(op1_loc);
625 vixl32::Register op2_lo = LowRegisterFrom(op2_loc);
626 vixl32::Register op2_hi = HighRegisterFrom(op2_loc);
627 vixl32::Register out_lo = LowRegisterFrom(out_loc);
628 vixl32::Register out_hi = HighRegisterFrom(out_loc);
629 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
630 const vixl32::Register temp = temps.Acquire();
631
632 DCHECK(op1_lo.Is(out_lo));
633 DCHECK(op1_hi.Is(out_hi));
634
635 // Compare op1 >= op2, or op1 < op2.
636 __ Cmp(out_lo, op2_lo);
637 __ Sbcs(temp, out_hi, op2_hi);
638
639 // Now GE/LT condition code is correct for the long comparison.
640 {
641 vixl32::ConditionType cond = is_min ? ge : lt;
642 ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
643 3 * kMaxInstructionSizeInBytes,
644 CodeBufferCheckScope::kMaximumSize);
645 __ itt(cond);
646 __ mov(cond, out_lo, op2_lo);
647 __ mov(cond, out_hi, op2_hi);
648 }
649}
650
651static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
652 LocationSummary* locations = new (arena) LocationSummary(invoke,
653 LocationSummary::kNoCall,
654 kIntrinsified);
655 locations->SetInAt(0, Location::RequiresRegister());
656 locations->SetInAt(1, Location::RequiresRegister());
657 locations->SetOut(Location::SameAsFirstInput());
658}
659
660void IntrinsicLocationsBuilderARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
661 CreateLongLongToLongLocations(arena_, invoke);
662}
663
664void IntrinsicCodeGeneratorARMVIXL::VisitMathMinLongLong(HInvoke* invoke) {
665 GenMinMaxLong(invoke, /* is_min */ true, GetAssembler());
666}
667
668void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
669 CreateLongLongToLongLocations(arena_, invoke);
670}
671
672void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxLongLong(HInvoke* invoke) {
673 GenMinMaxLong(invoke, /* is_min */ false, GetAssembler());
674}
675
Anton Kirilov5ec62182016-10-13 20:16:02 +0100676static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
677 vixl32::Register op1 = InputRegisterAt(invoke, 0);
678 vixl32::Register op2 = InputRegisterAt(invoke, 1);
679 vixl32::Register out = OutputRegister(invoke);
680
681 __ Cmp(op1, op2);
682
683 {
Artem Serov0fb37192016-12-06 18:13:40 +0000684 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
685 3 * kMaxInstructionSizeInBytes,
686 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +0100687
688 __ ite(is_min ? lt : gt);
689 __ mov(is_min ? lt : gt, out, op1);
690 __ mov(is_min ? ge : le, out, op2);
691 }
692}
693
694static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
695 LocationSummary* locations = new (arena) LocationSummary(invoke,
696 LocationSummary::kNoCall,
697 kIntrinsified);
698 locations->SetInAt(0, Location::RequiresRegister());
699 locations->SetInAt(1, Location::RequiresRegister());
700 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
701}
702
703void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
704 CreateIntIntToIntLocations(arena_, invoke);
705}
706
707void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
708 GenMinMax(invoke, /* is_min */ true, GetAssembler());
709}
710
711void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
712 CreateIntIntToIntLocations(arena_, invoke);
713}
714
715void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
716 GenMinMax(invoke, /* is_min */ false, GetAssembler());
717}
718
719void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
720 CreateFPToFPLocations(arena_, invoke);
721}
722
723void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
724 ArmVIXLAssembler* assembler = GetAssembler();
725 __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
726}
727
xueliang.zhong6099d5e2016-04-20 18:44:56 +0100728void IntrinsicLocationsBuilderARMVIXL::VisitMathRint(HInvoke* invoke) {
729 if (features_.HasARMv8AInstructions()) {
730 CreateFPToFPLocations(arena_, invoke);
731 }
732}
733
734void IntrinsicCodeGeneratorARMVIXL::VisitMathRint(HInvoke* invoke) {
735 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
736 ArmVIXLAssembler* assembler = GetAssembler();
737 __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
738}
739
Anton Kirilov5ec62182016-10-13 20:16:02 +0100740void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
741 CreateIntToIntLocations(arena_, invoke);
742}
743
744void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
745 ArmVIXLAssembler* assembler = GetAssembler();
746 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000747 __ Ldrsb(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100748}
749
750void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
751 CreateIntToIntLocations(arena_, invoke);
752}
753
754void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
755 ArmVIXLAssembler* assembler = GetAssembler();
756 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000757 __ Ldr(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100758}
759
760void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
761 CreateIntToIntLocations(arena_, invoke);
762}
763
764void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
765 ArmVIXLAssembler* assembler = GetAssembler();
766 // Ignore upper 4B of long address.
767 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
768 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
769 // exception. So we can't use ldrd as addr may be unaligned.
770 vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
771 vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
772 if (addr.Is(lo)) {
773 __ Ldr(hi, MemOperand(addr, 4));
Scott Wakelingb77051e2016-11-21 19:46:00 +0000774 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100775 } else {
Scott Wakelingb77051e2016-11-21 19:46:00 +0000776 __ Ldr(lo, MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100777 __ Ldr(hi, MemOperand(addr, 4));
778 }
779}
780
781void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
782 CreateIntToIntLocations(arena_, invoke);
783}
784
785void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
786 ArmVIXLAssembler* assembler = GetAssembler();
787 // Ignore upper 4B of long address.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000788 __ Ldrsh(OutputRegister(invoke), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100789}
790
791static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
792 LocationSummary* locations = new (arena) LocationSummary(invoke,
793 LocationSummary::kNoCall,
794 kIntrinsified);
795 locations->SetInAt(0, Location::RequiresRegister());
796 locations->SetInAt(1, Location::RequiresRegister());
797}
798
799void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
800 CreateIntIntToVoidLocations(arena_, invoke);
801}
802
803void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
804 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000805 __ Strb(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100806}
807
808void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
809 CreateIntIntToVoidLocations(arena_, invoke);
810}
811
812void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
813 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000814 __ Str(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100815}
816
817void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
818 CreateIntIntToVoidLocations(arena_, invoke);
819}
820
821void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
822 ArmVIXLAssembler* assembler = GetAssembler();
823 // Ignore upper 4B of long address.
824 vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
825 // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
826 // exception. So we can't use ldrd as addr may be unaligned.
Scott Wakelingb77051e2016-11-21 19:46:00 +0000827 __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100828 __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
829}
830
831void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
832 CreateIntIntToVoidLocations(arena_, invoke);
833}
834
835void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
836 ArmVIXLAssembler* assembler = GetAssembler();
Scott Wakelingb77051e2016-11-21 19:46:00 +0000837 __ Strh(InputRegisterAt(invoke, 1), MemOperand(LowRegisterFrom(invoke->GetLocations()->InAt(0))));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100838}
839
840void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
841 LocationSummary* locations = new (arena_) LocationSummary(invoke,
842 LocationSummary::kNoCall,
843 kIntrinsified);
844 locations->SetOut(Location::RequiresRegister());
845}
846
847void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
848 ArmVIXLAssembler* assembler = GetAssembler();
849 __ Ldr(OutputRegister(invoke),
850 MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
851}
852
853static void GenUnsafeGet(HInvoke* invoke,
854 Primitive::Type type,
855 bool is_volatile,
856 CodeGeneratorARMVIXL* codegen) {
857 LocationSummary* locations = invoke->GetLocations();
858 ArmVIXLAssembler* assembler = codegen->GetAssembler();
859 Location base_loc = locations->InAt(1);
860 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
861 Location offset_loc = locations->InAt(2);
862 vixl32::Register offset = LowRegisterFrom(offset_loc); // Long offset, lo part only.
863 Location trg_loc = locations->Out();
864
865 switch (type) {
866 case Primitive::kPrimInt: {
867 vixl32::Register trg = RegisterFrom(trg_loc);
868 __ Ldr(trg, MemOperand(base, offset));
869 if (is_volatile) {
870 __ Dmb(vixl32::ISH);
871 }
872 break;
873 }
874
875 case Primitive::kPrimNot: {
876 vixl32::Register trg = RegisterFrom(trg_loc);
877 if (kEmitCompilerReadBarrier) {
878 if (kUseBakerReadBarrier) {
879 Location temp = locations->GetTemp(0);
880 codegen->GenerateReferenceLoadWithBakerReadBarrier(
881 invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
882 if (is_volatile) {
883 __ Dmb(vixl32::ISH);
884 }
885 } else {
886 __ Ldr(trg, MemOperand(base, offset));
887 if (is_volatile) {
888 __ Dmb(vixl32::ISH);
889 }
890 codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
891 }
892 } else {
893 __ Ldr(trg, MemOperand(base, offset));
894 if (is_volatile) {
895 __ Dmb(vixl32::ISH);
896 }
897 assembler->MaybeUnpoisonHeapReference(trg);
898 }
899 break;
900 }
901
902 case Primitive::kPrimLong: {
903 vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
904 vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
905 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
Artem Serov657022c2016-11-23 14:19:38 +0000906 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
907 const vixl32::Register temp_reg = temps.Acquire();
908 __ Add(temp_reg, base, offset);
909 __ Ldrexd(trg_lo, trg_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +0100910 } else {
911 __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
912 }
913 if (is_volatile) {
914 __ Dmb(vixl32::ISH);
915 }
916 break;
917 }
918
919 default:
920 LOG(FATAL) << "Unexpected type " << type;
921 UNREACHABLE();
922 }
923}
924
925static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
926 HInvoke* invoke,
927 Primitive::Type type) {
928 bool can_call = kEmitCompilerReadBarrier &&
929 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
930 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
931 LocationSummary* locations = new (arena) LocationSummary(invoke,
932 (can_call
933 ? LocationSummary::kCallOnSlowPath
934 : LocationSummary::kNoCall),
935 kIntrinsified);
936 if (can_call && kUseBakerReadBarrier) {
937 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
938 }
939 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
940 locations->SetInAt(1, Location::RequiresRegister());
941 locations->SetInAt(2, Location::RequiresRegister());
942 locations->SetOut(Location::RequiresRegister(),
943 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
944 if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
945 // We need a temporary register for the read barrier marking slow
946 // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
947 locations->AddTemp(Location::RequiresRegister());
948 }
949}
950
951void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
952 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
953}
954void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
955 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
956}
957void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
958 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
959}
960void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
961 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
962}
963void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
964 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
965}
966void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
967 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
968}
969
970void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
971 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
972}
973void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
974 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
975}
976void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
977 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
978}
979void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
980 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
981}
982void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
983 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
984}
985void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
986 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
987}
988
989static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
990 const ArmInstructionSetFeatures& features,
991 Primitive::Type type,
992 bool is_volatile,
993 HInvoke* invoke) {
994 LocationSummary* locations = new (arena) LocationSummary(invoke,
995 LocationSummary::kNoCall,
996 kIntrinsified);
997 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
998 locations->SetInAt(1, Location::RequiresRegister());
999 locations->SetInAt(2, Location::RequiresRegister());
1000 locations->SetInAt(3, Location::RequiresRegister());
1001
1002 if (type == Primitive::kPrimLong) {
1003 // Potentially need temps for ldrexd-strexd loop.
1004 if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
1005 locations->AddTemp(Location::RequiresRegister()); // Temp_lo.
1006 locations->AddTemp(Location::RequiresRegister()); // Temp_hi.
1007 }
1008 } else if (type == Primitive::kPrimNot) {
1009 // Temps for card-marking.
1010 locations->AddTemp(Location::RequiresRegister()); // Temp.
1011 locations->AddTemp(Location::RequiresRegister()); // Card.
1012 }
1013}
1014
1015void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1016 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
1017}
1018void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1019 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
1020}
1021void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1022 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
1023}
1024void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1025 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
1026}
1027void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1028 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
1029}
1030void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1031 CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
1032}
1033void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1034 CreateIntIntIntIntToVoid(
1035 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
1036}
1037void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1038 CreateIntIntIntIntToVoid(
1039 arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
1040}
1041void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1042 CreateIntIntIntIntToVoid(
1043 arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
1044}
1045
1046static void GenUnsafePut(LocationSummary* locations,
1047 Primitive::Type type,
1048 bool is_volatile,
1049 bool is_ordered,
1050 CodeGeneratorARMVIXL* codegen) {
1051 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1052
1053 vixl32::Register base = RegisterFrom(locations->InAt(1)); // Object pointer.
1054 vixl32::Register offset = LowRegisterFrom(locations->InAt(2)); // Long offset, lo part only.
1055 vixl32::Register value;
1056
1057 if (is_volatile || is_ordered) {
1058 __ Dmb(vixl32::ISH);
1059 }
1060
1061 if (type == Primitive::kPrimLong) {
1062 vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
1063 vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
1064 value = value_lo;
1065 if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
1066 vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
1067 vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
1068 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1069 const vixl32::Register temp_reg = temps.Acquire();
1070
1071 __ Add(temp_reg, base, offset);
1072 vixl32::Label loop_head;
1073 __ Bind(&loop_head);
Scott Wakelingb77051e2016-11-21 19:46:00 +00001074 __ Ldrexd(temp_lo, temp_hi, MemOperand(temp_reg));
1075 __ Strexd(temp_lo, value_lo, value_hi, MemOperand(temp_reg));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001076 __ Cmp(temp_lo, 0);
Artem Serov517d9f62016-12-12 15:51:15 +00001077 __ B(ne, &loop_head, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001078 } else {
1079 __ Strd(value_lo, value_hi, MemOperand(base, offset));
1080 }
1081 } else {
1082 value = RegisterFrom(locations->InAt(3));
1083 vixl32::Register source = value;
1084 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1085 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1086 __ Mov(temp, value);
1087 assembler->PoisonHeapReference(temp);
1088 source = temp;
1089 }
1090 __ Str(source, MemOperand(base, offset));
1091 }
1092
1093 if (is_volatile) {
1094 __ Dmb(vixl32::ISH);
1095 }
1096
1097 if (type == Primitive::kPrimNot) {
1098 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1099 vixl32::Register card = RegisterFrom(locations->GetTemp(1));
1100 bool value_can_be_null = true; // TODO: Worth finding out this information?
1101 codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
1102 }
1103}
1104
1105void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
1106 GenUnsafePut(invoke->GetLocations(),
1107 Primitive::kPrimInt,
1108 /* is_volatile */ false,
1109 /* is_ordered */ false,
1110 codegen_);
1111}
1112void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
1113 GenUnsafePut(invoke->GetLocations(),
1114 Primitive::kPrimInt,
1115 /* is_volatile */ false,
1116 /* is_ordered */ true,
1117 codegen_);
1118}
1119void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
1120 GenUnsafePut(invoke->GetLocations(),
1121 Primitive::kPrimInt,
1122 /* is_volatile */ true,
1123 /* is_ordered */ false,
1124 codegen_);
1125}
1126void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
1127 GenUnsafePut(invoke->GetLocations(),
1128 Primitive::kPrimNot,
1129 /* is_volatile */ false,
1130 /* is_ordered */ false,
1131 codegen_);
1132}
1133void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1134 GenUnsafePut(invoke->GetLocations(),
1135 Primitive::kPrimNot,
1136 /* is_volatile */ false,
1137 /* is_ordered */ true,
1138 codegen_);
1139}
1140void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1141 GenUnsafePut(invoke->GetLocations(),
1142 Primitive::kPrimNot,
1143 /* is_volatile */ true,
1144 /* is_ordered */ false,
1145 codegen_);
1146}
1147void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
1148 GenUnsafePut(invoke->GetLocations(),
1149 Primitive::kPrimLong,
1150 /* is_volatile */ false,
1151 /* is_ordered */ false,
1152 codegen_);
1153}
1154void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1155 GenUnsafePut(invoke->GetLocations(),
1156 Primitive::kPrimLong,
1157 /* is_volatile */ false,
1158 /* is_ordered */ true,
1159 codegen_);
1160}
1161void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1162 GenUnsafePut(invoke->GetLocations(),
1163 Primitive::kPrimLong,
1164 /* is_volatile */ true,
1165 /* is_ordered */ false,
1166 codegen_);
1167}
1168
1169static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
1170 HInvoke* invoke,
1171 Primitive::Type type) {
1172 bool can_call = kEmitCompilerReadBarrier &&
1173 kUseBakerReadBarrier &&
1174 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
1175 LocationSummary* locations = new (arena) LocationSummary(invoke,
1176 (can_call
1177 ? LocationSummary::kCallOnSlowPath
1178 : LocationSummary::kNoCall),
1179 kIntrinsified);
1180 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1181 locations->SetInAt(1, Location::RequiresRegister());
1182 locations->SetInAt(2, Location::RequiresRegister());
1183 locations->SetInAt(3, Location::RequiresRegister());
1184 locations->SetInAt(4, Location::RequiresRegister());
1185
1186 // If heap poisoning is enabled, we don't want the unpoisoning
1187 // operations to potentially clobber the output. Likewise when
1188 // emitting a (Baker) read barrier, which may call.
1189 Location::OutputOverlap overlaps =
1190 ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
1191 ? Location::kOutputOverlap
1192 : Location::kNoOutputOverlap;
1193 locations->SetOut(Location::RequiresRegister(), overlaps);
1194
1195 // Temporary registers used in CAS. In the object case
1196 // (UnsafeCASObject intrinsic), these are also used for
1197 // card-marking, and possibly for (Baker) read barrier.
1198 locations->AddTemp(Location::RequiresRegister()); // Pointer.
1199 locations->AddTemp(Location::RequiresRegister()); // Temp 1.
1200}
1201
1202static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
1203 DCHECK_NE(type, Primitive::kPrimLong);
1204
1205 ArmVIXLAssembler* assembler = codegen->GetAssembler();
1206 LocationSummary* locations = invoke->GetLocations();
1207
1208 Location out_loc = locations->Out();
1209 vixl32::Register out = OutputRegister(invoke); // Boolean result.
1210
1211 vixl32::Register base = InputRegisterAt(invoke, 1); // Object pointer.
1212 Location offset_loc = locations->InAt(2);
1213 vixl32::Register offset = LowRegisterFrom(offset_loc); // Offset (discard high 4B).
1214 vixl32::Register expected = InputRegisterAt(invoke, 3); // Expected.
1215 vixl32::Register value = InputRegisterAt(invoke, 4); // Value.
1216
1217 Location tmp_ptr_loc = locations->GetTemp(0);
1218 vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc); // Pointer to actual memory.
1219 vixl32::Register tmp = RegisterFrom(locations->GetTemp(1)); // Value in memory.
1220
1221 if (type == Primitive::kPrimNot) {
1222 // The only read barrier implementation supporting the
1223 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1224 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1225
1226 // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
1227 // object and scan the receiver at the next GC for nothing.
1228 bool value_can_be_null = true; // TODO: Worth finding out this information?
1229 codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
1230
1231 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1232 // Need to make sure the reference stored in the field is a to-space
1233 // one before attempting the CAS or the CAS could fail incorrectly.
1234 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1235 invoke,
1236 out_loc, // Unused, used only as a "temporary" within the read barrier.
1237 base,
1238 /* offset */ 0u,
1239 /* index */ offset_loc,
1240 ScaleFactor::TIMES_1,
1241 tmp_ptr_loc,
1242 /* needs_null_check */ false,
1243 /* always_update_field */ true,
1244 &tmp);
1245 }
1246 }
1247
1248 // Prevent reordering with prior memory operations.
1249 // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
1250 // latter allows a preceding load to be delayed past the STXR
1251 // instruction below.
1252 __ Dmb(vixl32::ISH);
1253
1254 __ Add(tmp_ptr, base, offset);
1255
1256 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1257 codegen->GetAssembler()->PoisonHeapReference(expected);
1258 if (value.Is(expected)) {
1259 // Do not poison `value`, as it is the same register as
1260 // `expected`, which has just been poisoned.
1261 } else {
1262 codegen->GetAssembler()->PoisonHeapReference(value);
1263 }
1264 }
1265
1266 // do {
1267 // tmp = [r_ptr] - expected;
1268 // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
1269 // result = tmp != 0;
1270
1271 vixl32::Label loop_head;
1272 __ Bind(&loop_head);
1273
Scott Wakelingb77051e2016-11-21 19:46:00 +00001274 __ Ldrex(tmp, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001275
1276 __ Subs(tmp, tmp, expected);
1277
1278 {
Artem Serov0fb37192016-12-06 18:13:40 +00001279 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1280 3 * kMaxInstructionSizeInBytes,
1281 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001282
1283 __ itt(eq);
Scott Wakelingb77051e2016-11-21 19:46:00 +00001284 __ strex(eq, tmp, value, MemOperand(tmp_ptr));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001285 __ cmp(eq, tmp, 1);
1286 }
1287
Artem Serov517d9f62016-12-12 15:51:15 +00001288 __ B(eq, &loop_head, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001289
1290 __ Dmb(vixl32::ISH);
1291
1292 __ Rsbs(out, tmp, 1);
1293
1294 {
Artem Serov0fb37192016-12-06 18:13:40 +00001295 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1296 2 * kMaxInstructionSizeInBytes,
1297 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001298
1299 __ it(cc);
1300 __ mov(cc, out, 0);
1301 }
1302
1303 if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
1304 codegen->GetAssembler()->UnpoisonHeapReference(expected);
1305 if (value.Is(expected)) {
1306 // Do not unpoison `value`, as it is the same register as
1307 // `expected`, which has just been unpoisoned.
1308 } else {
1309 codegen->GetAssembler()->UnpoisonHeapReference(value);
1310 }
1311 }
1312}
1313
1314void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1315 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
1316}
1317void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1318 // The only read barrier implementation supporting the
1319 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1320 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1321 return;
1322 }
1323
1324 CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
1325}
1326void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
1327 GenCas(invoke, Primitive::kPrimInt, codegen_);
1328}
1329void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
1330 // The only read barrier implementation supporting the
1331 // UnsafeCASObject intrinsic is the Baker-style read barriers.
1332 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1333
1334 GenCas(invoke, Primitive::kPrimNot, codegen_);
1335}
1336
1337void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1338 // The inputs plus one temp.
1339 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1340 invoke->InputAt(1)->CanBeNull()
1341 ? LocationSummary::kCallOnSlowPath
1342 : LocationSummary::kNoCall,
1343 kIntrinsified);
1344 locations->SetInAt(0, Location::RequiresRegister());
1345 locations->SetInAt(1, Location::RequiresRegister());
1346 locations->AddTemp(Location::RequiresRegister());
1347 locations->AddTemp(Location::RequiresRegister());
1348 locations->AddTemp(Location::RequiresRegister());
1349 // Need temporary registers for String compression's feature.
1350 if (mirror::kUseStringCompression) {
1351 locations->AddTemp(Location::RequiresRegister());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001352 }
1353 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1354}
1355
1356void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
1357 ArmVIXLAssembler* assembler = GetAssembler();
1358 LocationSummary* locations = invoke->GetLocations();
1359
1360 vixl32::Register str = InputRegisterAt(invoke, 0);
1361 vixl32::Register arg = InputRegisterAt(invoke, 1);
1362 vixl32::Register out = OutputRegister(invoke);
1363
1364 vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
1365 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1366 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001367 vixl32::Register temp3;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001368 if (mirror::kUseStringCompression) {
1369 temp3 = RegisterFrom(locations->GetTemp(3));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001370 }
1371
1372 vixl32::Label loop;
1373 vixl32::Label find_char_diff;
1374 vixl32::Label end;
1375 vixl32::Label different_compression;
1376
1377 // Get offsets of count and value fields within a string object.
1378 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
1379 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1380
1381 // Note that the null check must have been done earlier.
1382 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1383
1384 // Take slow path and throw if input can be and is null.
1385 SlowPathCodeARMVIXL* slow_path = nullptr;
1386 const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
1387 if (can_slow_path) {
1388 slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1389 codegen_->AddSlowPath(slow_path);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001390 __ CompareAndBranchIfZero(arg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001391 }
1392
1393 // Reference equality check, return 0 if same reference.
1394 __ Subs(out, str, arg);
1395 __ B(eq, &end);
1396
Anton Kirilov5ec62182016-10-13 20:16:02 +01001397 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001398 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001399 __ Ldr(temp3, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001400 __ Ldr(temp2, MemOperand(arg, count_offset));
1401 // Extract lengths from the `count` fields.
1402 __ Lsr(temp0, temp3, 1u);
1403 __ Lsr(temp1, temp2, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001404 } else {
1405 // Load lengths of this and argument strings.
1406 __ Ldr(temp0, MemOperand(str, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001407 __ Ldr(temp1, MemOperand(arg, count_offset));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001408 }
1409 // out = length diff.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001410 __ Subs(out, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001411 // temp0 = min(len(str), len(arg)).
1412
1413 {
Artem Serov0fb37192016-12-06 18:13:40 +00001414 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1415 2 * kMaxInstructionSizeInBytes,
1416 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001417
1418 __ it(gt);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001419 __ mov(gt, temp0, temp1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001420 }
1421
Anton Kirilov5ec62182016-10-13 20:16:02 +01001422 // Shorter string is empty?
xueliang.zhongf51bc622016-11-04 09:23:32 +00001423 // Note that mirror::kUseStringCompression==true introduces lots of instructions,
1424 // which makes &end label far away from this branch and makes it not 'CBZ-encodable'.
1425 __ CompareAndBranchIfZero(temp0, &end, mirror::kUseStringCompression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001426
1427 if (mirror::kUseStringCompression) {
1428 // Check if both strings using same compression style to use this comparison loop.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001429 __ Eors(temp2, temp2, temp3);
1430 __ Lsrs(temp2, temp2, 1u);
1431 __ B(cs, &different_compression);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001432 // For string compression, calculate the number of bytes to compare (not chars).
1433 // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001434 __ Lsls(temp3, temp3, 31u); // Extract purely the compression flag.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001435
Artem Serov0fb37192016-12-06 18:13:40 +00001436 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1437 2 * kMaxInstructionSizeInBytes,
1438 CodeBufferCheckScope::kMaximumSize);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001439
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001440 __ it(ne);
1441 __ add(ne, temp0, temp0, temp0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001442 }
1443
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001444 // Store offset of string value in preparation for comparison loop.
1445 __ Mov(temp1, value_offset);
1446
Anton Kirilov5ec62182016-10-13 20:16:02 +01001447 // Assertions that must hold in order to compare multiple characters at a time.
1448 CHECK_ALIGNED(value_offset, 8);
1449 static_assert(IsAligned<8>(kObjectAlignment),
1450 "String data must be 8-byte aligned for unrolled CompareTo loop.");
1451
Scott Wakelingb77051e2016-11-21 19:46:00 +00001452 const unsigned char_size = Primitive::ComponentSize(Primitive::kPrimChar);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001453 DCHECK_EQ(char_size, 2u);
1454
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001455 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
1456
Anton Kirilov5ec62182016-10-13 20:16:02 +01001457 vixl32::Label find_char_diff_2nd_cmp;
1458 // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
1459 __ Bind(&loop);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001460 vixl32::Register temp_reg = temps.Acquire();
Anton Kirilov5ec62182016-10-13 20:16:02 +01001461 __ Ldr(temp_reg, MemOperand(str, temp1));
1462 __ Ldr(temp2, MemOperand(arg, temp1));
1463 __ Cmp(temp_reg, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001464 __ B(ne, &find_char_diff, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001465 __ Add(temp1, temp1, char_size * 2);
1466
1467 __ Ldr(temp_reg, MemOperand(str, temp1));
1468 __ Ldr(temp2, MemOperand(arg, temp1));
1469 __ Cmp(temp_reg, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001470 __ B(ne, &find_char_diff_2nd_cmp, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001471 __ Add(temp1, temp1, char_size * 2);
1472 // With string compression, we have compared 8 bytes, otherwise 4 chars.
1473 __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
Artem Serov517d9f62016-12-12 15:51:15 +00001474 __ B(hi, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001475 __ B(&end);
1476
1477 __ Bind(&find_char_diff_2nd_cmp);
1478 if (mirror::kUseStringCompression) {
1479 __ Subs(temp0, temp0, 4); // 4 bytes previously compared.
Artem Serov517d9f62016-12-12 15:51:15 +00001480 __ B(ls, &end, /* far_target */ false); // Was the second comparison fully beyond the end?
Anton Kirilov5ec62182016-10-13 20:16:02 +01001481 } else {
1482 // Without string compression, we can start treating temp0 as signed
1483 // and rely on the signed comparison below.
1484 __ Sub(temp0, temp0, 2);
1485 }
1486
1487 // Find the single character difference.
1488 __ Bind(&find_char_diff);
1489 // Get the bit position of the first character that differs.
1490 __ Eor(temp1, temp2, temp_reg);
1491 __ Rbit(temp1, temp1);
1492 __ Clz(temp1, temp1);
1493
1494 // temp0 = number of characters remaining to compare.
1495 // (Without string compression, it could be < 1 if a difference is found by the second CMP
1496 // in the comparison loop, and after the end of the shorter string data).
1497
1498 // Without string compression (temp1 >> 4) = character where difference occurs between the last
1499 // two words compared, in the interval [0,1].
1500 // (0 for low half-word different, 1 for high half-word different).
1501 // With string compression, (temp1 << 3) = byte where the difference occurs,
1502 // in the interval [0,3].
1503
1504 // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
1505 // the remaining string data, so just return length diff (out).
1506 // The comparison is unsigned for string compression, otherwise signed.
1507 __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
Artem Serov517d9f62016-12-12 15:51:15 +00001508 __ B((mirror::kUseStringCompression ? ls : le), &end, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001509
Anton Kirilov5ec62182016-10-13 20:16:02 +01001510 // Extract the characters and calculate the difference.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001511 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001512 // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
1513 // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
1514 // The compression flag is now in the highest bit of temp3, so let's play some tricks.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001515 __ Orr(temp3, temp3, 0xffu << 23); // uncompressed ? 0xff800000u : 0x7ff80000u
1516 __ Bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3)); // &= ~(uncompressed ? 0xfu : 0x7u)
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001517 __ Asr(temp3, temp3, 7u); // uncompressed ? 0xffff0000u : 0xff0000u.
1518 __ Lsr(temp2, temp2, temp1); // Extract second character.
1519 __ Lsr(temp3, temp3, 16u); // uncompressed ? 0xffffu : 0xffu
1520 __ Lsr(out, temp_reg, temp1); // Extract first character.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001521 __ And(temp2, temp2, temp3);
1522 __ And(out, out, temp3);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001523 } else {
Anton Kirilovb88c4842016-11-14 14:37:00 +00001524 __ Bic(temp1, temp1, 0xf);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001525 __ Lsr(temp2, temp2, temp1);
1526 __ Lsr(out, temp_reg, temp1);
Anton Kirilovb88c4842016-11-14 14:37:00 +00001527 __ Movt(temp2, 0);
1528 __ Movt(out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001529 }
Anton Kirilov5ec62182016-10-13 20:16:02 +01001530
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001531 __ Sub(out, out, temp2);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001532 temps.Release(temp_reg);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001533
1534 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001535 __ B(&end);
1536 __ Bind(&different_compression);
1537
1538 // Comparison for different compression style.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001539 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1540 DCHECK_EQ(c_char_size, 1u);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001541
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001542 // We want to free up the temp3, currently holding `str.count`, for comparison.
1543 // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
1544 // need to treat as unsigned. Start by freeing the bit with an ADD and continue
1545 // further down by a LSRS+SBC which will flip the meaning of the flag but allow
1546 // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001547 __ Add(temp0, temp0, temp0); // Unlike LSL, this ADD is always 16-bit.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001548 // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001549 __ Mov(temp1, str);
1550 __ Mov(temp2, arg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001551 __ Lsrs(temp3, temp3, 1u); // Continue the move of the compression flag.
1552 {
Artem Serov0fb37192016-12-06 18:13:40 +00001553 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1554 3 * kMaxInstructionSizeInBytes,
1555 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001556 __ itt(cs); // Interleave with selection of temp1 and temp2.
1557 __ mov(cs, temp1, arg); // Preserves flags.
1558 __ mov(cs, temp2, str); // Preserves flags.
1559 }
Anton Kirilovb88c4842016-11-14 14:37:00 +00001560 __ Sbc(temp0, temp0, 0); // Complete the move of the compression flag.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001561
1562 // Adjust temp1 and temp2 from string pointers to data pointers.
Anton Kirilovb88c4842016-11-14 14:37:00 +00001563 __ Add(temp1, temp1, value_offset);
1564 __ Add(temp2, temp2, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001565
1566 vixl32::Label different_compression_loop;
1567 vixl32::Label different_compression_diff;
1568
1569 // Main loop for different compression.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001570 temp_reg = temps.Acquire();
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001571 __ Bind(&different_compression_loop);
1572 __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
1573 __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
Anton Kirilovb88c4842016-11-14 14:37:00 +00001574 __ Cmp(temp_reg, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00001575 __ B(ne, &different_compression_diff, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001576 __ Subs(temp0, temp0, 2);
Artem Serov517d9f62016-12-12 15:51:15 +00001577 __ B(hi, &different_compression_loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001578 __ B(&end);
1579
1580 // Calculate the difference.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001581 __ Bind(&different_compression_diff);
1582 __ Sub(out, temp_reg, temp3);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001583 temps.Release(temp_reg);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001584 // Flip the difference if the `arg` is compressed.
1585 // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
1586 __ Lsrs(temp0, temp0, 1u);
1587 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1588 "Expecting 0=compressed, 1=uncompressed");
1589
Artem Serov0fb37192016-12-06 18:13:40 +00001590 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1591 2 * kMaxInstructionSizeInBytes,
1592 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001593 __ it(cc);
1594 __ rsb(cc, out, out, 0);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001595 }
1596
1597 __ Bind(&end);
1598
1599 if (can_slow_path) {
1600 __ Bind(slow_path->GetExitLabel());
1601 }
1602}
1603
1604void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
1605 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1606 LocationSummary::kNoCall,
1607 kIntrinsified);
1608 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1609 locations->SetInAt(0, Location::RequiresRegister());
1610 locations->SetInAt(1, Location::RequiresRegister());
1611 // Temporary registers to store lengths of strings and for calculations.
1612 // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
1613 locations->AddTemp(LocationFrom(r0));
1614 locations->AddTemp(Location::RequiresRegister());
1615 locations->AddTemp(Location::RequiresRegister());
1616
1617 locations->SetOut(Location::RequiresRegister());
1618}
1619
1620void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
1621 ArmVIXLAssembler* assembler = GetAssembler();
1622 LocationSummary* locations = invoke->GetLocations();
1623
1624 vixl32::Register str = InputRegisterAt(invoke, 0);
1625 vixl32::Register arg = InputRegisterAt(invoke, 1);
1626 vixl32::Register out = OutputRegister(invoke);
1627
1628 vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
1629 vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
1630 vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
1631
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001632 vixl32::Label loop;
Anton Kirilov5ec62182016-10-13 20:16:02 +01001633 vixl32::Label end;
1634 vixl32::Label return_true;
1635 vixl32::Label return_false;
1636
1637 // Get offsets of count, value, and class fields within a string object.
1638 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1639 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1640 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1641
1642 // Note that the null check must have been done earlier.
1643 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1644
1645 StringEqualsOptimizations optimizations(invoke);
1646 if (!optimizations.GetArgumentNotNull()) {
1647 // Check if input is null, return false if it is.
xueliang.zhongf51bc622016-11-04 09:23:32 +00001648 __ CompareAndBranchIfZero(arg, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001649 }
1650
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001651 // Reference equality check, return true if same reference.
1652 __ Cmp(str, arg);
Artem Serov517d9f62016-12-12 15:51:15 +00001653 __ B(eq, &return_true, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001654
Anton Kirilov5ec62182016-10-13 20:16:02 +01001655 if (!optimizations.GetArgumentIsString()) {
1656 // Instanceof check for the argument by comparing class fields.
1657 // All string objects must have the same type since String cannot be subclassed.
1658 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1659 // If the argument is a string object, its class field must be equal to receiver's class field.
1660 __ Ldr(temp, MemOperand(str, class_offset));
1661 __ Ldr(temp1, MemOperand(arg, class_offset));
1662 __ Cmp(temp, temp1);
Artem Serov517d9f62016-12-12 15:51:15 +00001663 __ B(ne, &return_false, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001664 }
1665
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001666 // Load `count` fields of this and argument strings.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001667 __ Ldr(temp, MemOperand(str, count_offset));
1668 __ Ldr(temp1, MemOperand(arg, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001669 // Check if `count` fields are equal, return false if they're not.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001670 // Also compares the compression style, if differs return false.
1671 __ Cmp(temp, temp1);
Artem Serov517d9f62016-12-12 15:51:15 +00001672 __ B(ne, &return_false, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001673 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1674 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1675 "Expecting 0=compressed, 1=uncompressed");
xueliang.zhongf51bc622016-11-04 09:23:32 +00001676 __ CompareAndBranchIfZero(temp, &return_true, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001677
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001678 // Assertions that must hold in order to compare strings 4 bytes at a time.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001679 DCHECK_ALIGNED(value_offset, 4);
1680 static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
1681
1682 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001683 // For string compression, calculate the number of bytes to compare (not chars).
1684 // This could in theory exceed INT32_MAX, so treat temp as unsigned.
1685 __ Lsrs(temp, temp, 1u); // Extract length and check compression flag.
Artem Serov0fb37192016-12-06 18:13:40 +00001686 ExactAssemblyScope aas(assembler->GetVIXLAssembler(),
1687 2 * kMaxInstructionSizeInBytes,
1688 CodeBufferCheckScope::kMaximumSize);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001689 __ it(cs); // If uncompressed,
1690 __ add(cs, temp, temp, temp); // double the byte count.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001691 }
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001692
1693 // Store offset of string value in preparation for comparison loop.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001694 __ Mov(temp1, value_offset);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001695
1696 // Loop to compare strings 4 bytes at a time starting at the front of the string.
1697 // Ok to do this because strings are zero-padded to kObjectAlignment.
Anton Kirilov5ec62182016-10-13 20:16:02 +01001698 __ Bind(&loop);
1699 __ Ldr(out, MemOperand(str, temp1));
1700 __ Ldr(temp2, MemOperand(arg, temp1));
Scott Wakelingb77051e2016-11-21 19:46:00 +00001701 __ Add(temp1, temp1, Operand::From(sizeof(uint32_t)));
Anton Kirilov5ec62182016-10-13 20:16:02 +01001702 __ Cmp(out, temp2);
Artem Serov517d9f62016-12-12 15:51:15 +00001703 __ B(ne, &return_false, /* far_target */ false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001704 // With string compression, we have compared 4 bytes, otherwise 2 chars.
1705 __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
Artem Serov517d9f62016-12-12 15:51:15 +00001706 __ B(hi, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01001707
1708 // Return true and exit the function.
1709 // If loop does not result in returning false, we return true.
1710 __ Bind(&return_true);
1711 __ Mov(out, 1);
1712 __ B(&end);
1713
1714 // Return false and exit the function.
1715 __ Bind(&return_false);
1716 __ Mov(out, 0);
1717 __ Bind(&end);
1718}
1719
1720static void GenerateVisitStringIndexOf(HInvoke* invoke,
1721 ArmVIXLAssembler* assembler,
1722 CodeGeneratorARMVIXL* codegen,
1723 ArenaAllocator* allocator,
1724 bool start_at_zero) {
1725 LocationSummary* locations = invoke->GetLocations();
1726
1727 // Note that the null check must have been done earlier.
1728 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1729
1730 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1731 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1732 SlowPathCodeARMVIXL* slow_path = nullptr;
1733 HInstruction* code_point = invoke->InputAt(1);
1734 if (code_point->IsIntConstant()) {
Anton Kirilov644032c2016-12-06 17:51:43 +00001735 if (static_cast<uint32_t>(Int32ConstantFrom(code_point)) >
Anton Kirilov5ec62182016-10-13 20:16:02 +01001736 std::numeric_limits<uint16_t>::max()) {
1737 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1738 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1739 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1740 codegen->AddSlowPath(slow_path);
1741 __ B(slow_path->GetEntryLabel());
1742 __ Bind(slow_path->GetExitLabel());
1743 return;
1744 }
1745 } else if (code_point->GetType() != Primitive::kPrimChar) {
1746 vixl32::Register char_reg = InputRegisterAt(invoke, 1);
1747 // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
1748 __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
1749 slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
1750 codegen->AddSlowPath(slow_path);
1751 __ B(hs, slow_path->GetEntryLabel());
1752 }
1753
1754 if (start_at_zero) {
1755 vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
1756 DCHECK(tmp_reg.Is(r2));
1757 // Start-index = 0.
1758 __ Mov(tmp_reg, 0);
1759 }
1760
1761 codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
1762 CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
1763
1764 if (slow_path != nullptr) {
1765 __ Bind(slow_path->GetExitLabel());
1766 }
1767}
1768
1769void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1770 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1771 LocationSummary::kCallOnMainAndSlowPath,
1772 kIntrinsified);
1773 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1774 // best to align the inputs accordingly.
1775 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1776 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1777 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1778 locations->SetOut(LocationFrom(r0));
1779
1780 // Need to send start-index=0.
1781 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
1782}
1783
1784void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
1785 GenerateVisitStringIndexOf(
1786 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1787}
1788
1789void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1790 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1791 LocationSummary::kCallOnMainAndSlowPath,
1792 kIntrinsified);
1793 // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
1794 // best to align the inputs accordingly.
1795 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1796 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1797 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1798 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1799 locations->SetOut(LocationFrom(r0));
1800}
1801
1802void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
1803 GenerateVisitStringIndexOf(
1804 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1805}
1806
1807void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1808 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1809 LocationSummary::kCallOnMainAndSlowPath,
1810 kIntrinsified);
1811 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1812 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1813 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1814 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1815 locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
1816 locations->SetOut(LocationFrom(r0));
1817}
1818
1819void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
1820 ArmVIXLAssembler* assembler = GetAssembler();
1821 vixl32::Register byte_array = InputRegisterAt(invoke, 0);
1822 __ Cmp(byte_array, 0);
1823 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1824 codegen_->AddSlowPath(slow_path);
1825 __ B(eq, slow_path->GetEntryLabel());
1826
1827 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
1828 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1829 __ Bind(slow_path->GetExitLabel());
1830}
1831
1832void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1833 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1834 LocationSummary::kCallOnMainOnly,
1835 kIntrinsified);
1836 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1837 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1838 locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
1839 locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
1840 locations->SetOut(LocationFrom(r0));
1841}
1842
1843void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
1844 // No need to emit code checking whether `locations->InAt(2)` is a null
1845 // pointer, as callers of the native method
1846 //
1847 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1848 //
1849 // all include a null check on `data` before calling that method.
1850 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1851 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1852}
1853
1854void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1855 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1856 LocationSummary::kCallOnMainAndSlowPath,
1857 kIntrinsified);
1858 InvokeRuntimeCallingConventionARMVIXL calling_convention;
1859 locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
1860 locations->SetOut(LocationFrom(r0));
1861}
1862
1863void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
1864 ArmVIXLAssembler* assembler = GetAssembler();
1865 vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
1866 __ Cmp(string_to_copy, 0);
1867 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
1868 codegen_->AddSlowPath(slow_path);
1869 __ B(eq, slow_path->GetEntryLabel());
1870
1871 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
1872 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1873
1874 __ Bind(slow_path->GetExitLabel());
1875}
1876
1877void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1878 // The only read barrier implementation supporting the
1879 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1880 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
1881 return;
1882 }
1883
1884 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
1885 LocationSummary* locations = invoke->GetLocations();
1886 if (locations == nullptr) {
1887 return;
1888 }
1889
1890 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1891 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1892 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1893
1894 if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
1895 locations->SetInAt(1, Location::RequiresRegister());
1896 }
1897 if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
1898 locations->SetInAt(3, Location::RequiresRegister());
1899 }
1900 if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
1901 locations->SetInAt(4, Location::RequiresRegister());
1902 }
1903 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
1904 // Temporary register IP cannot be used in
1905 // ReadBarrierSystemArrayCopySlowPathARM (because that register
1906 // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
1907 // temporary register from the register allocator.
1908 locations->AddTemp(Location::RequiresRegister());
1909 }
1910}
1911
1912static void CheckPosition(ArmVIXLAssembler* assembler,
1913 Location pos,
1914 vixl32::Register input,
1915 Location length,
1916 SlowPathCodeARMVIXL* slow_path,
1917 vixl32::Register temp,
1918 bool length_is_input_length = false) {
1919 // Where is the length in the Array?
1920 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1921
1922 if (pos.IsConstant()) {
1923 int32_t pos_const = Int32ConstantFrom(pos);
1924 if (pos_const == 0) {
1925 if (!length_is_input_length) {
1926 // Check that length(input) >= length.
1927 __ Ldr(temp, MemOperand(input, length_offset));
1928 if (length.IsConstant()) {
1929 __ Cmp(temp, Int32ConstantFrom(length));
1930 } else {
1931 __ Cmp(temp, RegisterFrom(length));
1932 }
1933 __ B(lt, slow_path->GetEntryLabel());
1934 }
1935 } else {
1936 // Check that length(input) >= pos.
1937 __ Ldr(temp, MemOperand(input, length_offset));
1938 __ Subs(temp, temp, pos_const);
1939 __ B(lt, slow_path->GetEntryLabel());
1940
1941 // Check that (length(input) - pos) >= length.
1942 if (length.IsConstant()) {
1943 __ Cmp(temp, Int32ConstantFrom(length));
1944 } else {
1945 __ Cmp(temp, RegisterFrom(length));
1946 }
1947 __ B(lt, slow_path->GetEntryLabel());
1948 }
1949 } else if (length_is_input_length) {
1950 // The only way the copy can succeed is if pos is zero.
1951 vixl32::Register pos_reg = RegisterFrom(pos);
xueliang.zhongf51bc622016-11-04 09:23:32 +00001952 __ CompareAndBranchIfNonZero(pos_reg, slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01001953 } else {
1954 // Check that pos >= 0.
1955 vixl32::Register pos_reg = RegisterFrom(pos);
1956 __ Cmp(pos_reg, 0);
1957 __ B(lt, slow_path->GetEntryLabel());
1958
1959 // Check that pos <= length(input).
1960 __ Ldr(temp, MemOperand(input, length_offset));
1961 __ Subs(temp, temp, pos_reg);
1962 __ B(lt, slow_path->GetEntryLabel());
1963
1964 // Check that (length(input) - pos) >= length.
1965 if (length.IsConstant()) {
1966 __ Cmp(temp, Int32ConstantFrom(length));
1967 } else {
1968 __ Cmp(temp, RegisterFrom(length));
1969 }
1970 __ B(lt, slow_path->GetEntryLabel());
1971 }
1972}
1973
1974void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
1975 // The only read barrier implementation supporting the
1976 // SystemArrayCopy intrinsic is the Baker-style read barriers.
1977 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
1978
1979 ArmVIXLAssembler* assembler = GetAssembler();
1980 LocationSummary* locations = invoke->GetLocations();
1981
1982 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
1983 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
1984 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
1985 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
1986 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
1987
1988 vixl32::Register src = InputRegisterAt(invoke, 0);
1989 Location src_pos = locations->InAt(1);
1990 vixl32::Register dest = InputRegisterAt(invoke, 2);
1991 Location dest_pos = locations->InAt(3);
1992 Location length = locations->InAt(4);
1993 Location temp1_loc = locations->GetTemp(0);
1994 vixl32::Register temp1 = RegisterFrom(temp1_loc);
1995 Location temp2_loc = locations->GetTemp(1);
1996 vixl32::Register temp2 = RegisterFrom(temp2_loc);
1997 Location temp3_loc = locations->GetTemp(2);
1998 vixl32::Register temp3 = RegisterFrom(temp3_loc);
1999
2000 SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2001 codegen_->AddSlowPath(intrinsic_slow_path);
2002
2003 vixl32::Label conditions_on_positions_validated;
2004 SystemArrayCopyOptimizations optimizations(invoke);
2005
2006 // If source and destination are the same, we go to slow path if we need to do
2007 // forward copying.
2008 if (src_pos.IsConstant()) {
2009 int32_t src_pos_constant = Int32ConstantFrom(src_pos);
2010 if (dest_pos.IsConstant()) {
2011 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2012 if (optimizations.GetDestinationIsSource()) {
2013 // Checked when building locations.
2014 DCHECK_GE(src_pos_constant, dest_pos_constant);
2015 } else if (src_pos_constant < dest_pos_constant) {
2016 __ Cmp(src, dest);
2017 __ B(eq, intrinsic_slow_path->GetEntryLabel());
2018 }
2019
2020 // Checked when building locations.
2021 DCHECK(!optimizations.GetDestinationIsSource()
2022 || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
2023 } else {
2024 if (!optimizations.GetDestinationIsSource()) {
2025 __ Cmp(src, dest);
Artem Serov517d9f62016-12-12 15:51:15 +00002026 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002027 }
2028 __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
2029 __ B(gt, intrinsic_slow_path->GetEntryLabel());
2030 }
2031 } else {
2032 if (!optimizations.GetDestinationIsSource()) {
2033 __ Cmp(src, dest);
Artem Serov517d9f62016-12-12 15:51:15 +00002034 __ B(ne, &conditions_on_positions_validated, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002035 }
2036 if (dest_pos.IsConstant()) {
2037 int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
2038 __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
2039 } else {
2040 __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
2041 }
2042 __ B(lt, intrinsic_slow_path->GetEntryLabel());
2043 }
2044
2045 __ Bind(&conditions_on_positions_validated);
2046
2047 if (!optimizations.GetSourceIsNotNull()) {
2048 // Bail out if the source is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00002049 __ CompareAndBranchIfZero(src, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002050 }
2051
2052 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2053 // Bail out if the destination is null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00002054 __ CompareAndBranchIfZero(dest, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002055 }
2056
2057 // If the length is negative, bail out.
2058 // We have already checked in the LocationsBuilder for the constant case.
2059 if (!length.IsConstant() &&
2060 !optimizations.GetCountIsSourceLength() &&
2061 !optimizations.GetCountIsDestinationLength()) {
2062 __ Cmp(RegisterFrom(length), 0);
2063 __ B(lt, intrinsic_slow_path->GetEntryLabel());
2064 }
2065
2066 // Validity checks: source.
2067 CheckPosition(assembler,
2068 src_pos,
2069 src,
2070 length,
2071 intrinsic_slow_path,
2072 temp1,
2073 optimizations.GetCountIsSourceLength());
2074
2075 // Validity checks: dest.
2076 CheckPosition(assembler,
2077 dest_pos,
2078 dest,
2079 length,
2080 intrinsic_slow_path,
2081 temp1,
2082 optimizations.GetCountIsDestinationLength());
2083
2084 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2085 // Check whether all elements of the source array are assignable to the component
2086 // type of the destination array. We do two checks: the classes are the same,
2087 // or the destination is Object[]. If none of these checks succeed, we go to the
2088 // slow path.
2089
2090 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2091 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2092 // /* HeapReference<Class> */ temp1 = src->klass_
2093 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2094 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2095 // Bail out if the source is not a non primitive array.
2096 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2097 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2098 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002099 __ CompareAndBranchIfZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002100 // If heap poisoning is enabled, `temp1` has been unpoisoned
2101 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2102 // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
2103 __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
2104 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002105 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002106 }
2107
2108 // /* HeapReference<Class> */ temp1 = dest->klass_
2109 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2110 invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
2111
2112 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2113 // Bail out if the destination is not a non primitive array.
2114 //
2115 // Register `temp1` is not trashed by the read barrier emitted
2116 // by GenerateFieldLoadWithBakerReadBarrier below, as that
2117 // method produces a call to a ReadBarrierMarkRegX entry point,
2118 // which saves all potentially live registers, including
2119 // temporaries such a `temp1`.
2120 // /* HeapReference<Class> */ temp2 = temp1->component_type_
2121 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2122 invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002123 __ CompareAndBranchIfZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002124 // If heap poisoning is enabled, `temp2` has been unpoisoned
2125 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2126 // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
2127 __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
2128 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002129 __ CompareAndBranchIfNonZero(temp2, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002130 }
2131
2132 // For the same reason given earlier, `temp1` is not trashed by the
2133 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
2134 // /* HeapReference<Class> */ temp2 = src->klass_
2135 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2136 invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
2137 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
2138 __ Cmp(temp1, temp2);
2139
2140 if (optimizations.GetDestinationIsTypedObjectArray()) {
2141 vixl32::Label do_copy;
Artem Serov517d9f62016-12-12 15:51:15 +00002142 __ B(eq, &do_copy, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002143 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2144 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2145 invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
2146 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2147 // We do not need to emit a read barrier for the following
2148 // heap reference load, as `temp1` is only used in a
2149 // comparison with null below, and this reference is not
2150 // kept afterwards.
2151 __ Ldr(temp1, MemOperand(temp1, super_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002152 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002153 __ Bind(&do_copy);
2154 } else {
2155 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2156 }
2157 } else {
2158 // Non read barrier code.
2159
2160 // /* HeapReference<Class> */ temp1 = dest->klass_
2161 __ Ldr(temp1, MemOperand(dest, class_offset));
2162 // /* HeapReference<Class> */ temp2 = src->klass_
2163 __ Ldr(temp2, MemOperand(src, class_offset));
2164 bool did_unpoison = false;
2165 if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
2166 !optimizations.GetSourceIsNonPrimitiveArray()) {
2167 // One or two of the references need to be unpoisoned. Unpoison them
2168 // both to make the identity check valid.
2169 assembler->MaybeUnpoisonHeapReference(temp1);
2170 assembler->MaybeUnpoisonHeapReference(temp2);
2171 did_unpoison = true;
2172 }
2173
2174 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
2175 // Bail out if the destination is not a non primitive array.
2176 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2177 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002178 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002179 assembler->MaybeUnpoisonHeapReference(temp3);
2180 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2181 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2182 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002183 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002184 }
2185
2186 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2187 // Bail out if the source is not a non primitive array.
2188 // /* HeapReference<Class> */ temp3 = temp2->component_type_
2189 __ Ldr(temp3, MemOperand(temp2, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002190 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002191 assembler->MaybeUnpoisonHeapReference(temp3);
2192 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2193 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2194 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002195 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002196 }
2197
2198 __ Cmp(temp1, temp2);
2199
2200 if (optimizations.GetDestinationIsTypedObjectArray()) {
2201 vixl32::Label do_copy;
Artem Serov517d9f62016-12-12 15:51:15 +00002202 __ B(eq, &do_copy, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002203 if (!did_unpoison) {
2204 assembler->MaybeUnpoisonHeapReference(temp1);
2205 }
2206 // /* HeapReference<Class> */ temp1 = temp1->component_type_
2207 __ Ldr(temp1, MemOperand(temp1, component_offset));
2208 assembler->MaybeUnpoisonHeapReference(temp1);
2209 // /* HeapReference<Class> */ temp1 = temp1->super_class_
2210 __ Ldr(temp1, MemOperand(temp1, super_offset));
2211 // No need to unpoison the result, we're comparing against null.
xueliang.zhongf51bc622016-11-04 09:23:32 +00002212 __ CompareAndBranchIfNonZero(temp1, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002213 __ Bind(&do_copy);
2214 } else {
2215 __ B(ne, intrinsic_slow_path->GetEntryLabel());
2216 }
2217 }
2218 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2219 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
2220 // Bail out if the source is not a non primitive array.
2221 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2222 // /* HeapReference<Class> */ temp1 = src->klass_
2223 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2224 invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
2225 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2226 codegen_->GenerateFieldLoadWithBakerReadBarrier(
2227 invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
xueliang.zhongf51bc622016-11-04 09:23:32 +00002228 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002229 // If heap poisoning is enabled, `temp3` has been unpoisoned
2230 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
2231 } else {
2232 // /* HeapReference<Class> */ temp1 = src->klass_
2233 __ Ldr(temp1, MemOperand(src, class_offset));
2234 assembler->MaybeUnpoisonHeapReference(temp1);
2235 // /* HeapReference<Class> */ temp3 = temp1->component_type_
2236 __ Ldr(temp3, MemOperand(temp1, component_offset));
xueliang.zhongf51bc622016-11-04 09:23:32 +00002237 __ CompareAndBranchIfZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002238 assembler->MaybeUnpoisonHeapReference(temp3);
2239 }
2240 // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
2241 __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
2242 static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
xueliang.zhongf51bc622016-11-04 09:23:32 +00002243 __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002244 }
2245
2246 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
2247 uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
2248 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
2249
2250 // Compute the base source address in `temp1`.
2251 if (src_pos.IsConstant()) {
2252 int32_t constant = Int32ConstantFrom(src_pos);
2253 __ Add(temp1, src, element_size * constant + offset);
2254 } else {
2255 __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
2256 __ Add(temp1, temp1, offset);
2257 }
2258
2259 // Compute the end source address in `temp3`.
2260 if (length.IsConstant()) {
2261 int32_t constant = Int32ConstantFrom(length);
2262 __ Add(temp3, temp1, element_size * constant);
2263 } else {
2264 __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
2265 }
2266
2267 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
Roland Levillainba650a42017-03-06 13:52:32 +00002268 // TODO: Also convert this intrinsic to the IsGcMarking strategy?
2269
Anton Kirilov5ec62182016-10-13 20:16:02 +01002270 // The base destination address is computed later, as `temp2` is
2271 // used for intermediate computations.
2272
2273 // SystemArrayCopy implementation for Baker read barriers (see
2274 // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
2275 //
2276 // if (src_ptr != end_ptr) {
2277 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
2278 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002279 // bool is_gray = (rb_state == ReadBarrier::GrayState());
Anton Kirilov5ec62182016-10-13 20:16:02 +01002280 // if (is_gray) {
2281 // // Slow-path copy.
2282 // do {
2283 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
2284 // } while (src_ptr != end_ptr)
2285 // } else {
2286 // // Fast-path copy.
2287 // do {
2288 // *dest_ptr++ = *src_ptr++;
2289 // } while (src_ptr != end_ptr)
2290 // }
2291 // }
2292
2293 vixl32::Label loop, done;
2294
2295 // Don't enter copy loop if `length == 0`.
2296 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002297 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002298
2299 // /* int32_t */ monitor = src->monitor_
2300 __ Ldr(temp2, MemOperand(src, monitor_offset));
2301 // /* LockWord */ lock_word = LockWord(monitor)
2302 static_assert(sizeof(LockWord) == sizeof(int32_t),
2303 "art::LockWord and int32_t have different sizes.");
2304
2305 // Introduce a dependency on the lock_word including the rb_state,
2306 // which shall prevent load-load reordering without using
2307 // a memory barrier (which would be more expensive).
2308 // `src` is unchanged by this operation, but its value now depends
2309 // on `temp2`.
2310 __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
2311
2312 // Slow path used to copy array when `src` is gray.
2313 SlowPathCodeARMVIXL* read_barrier_slow_path =
2314 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
2315 codegen_->AddSlowPath(read_barrier_slow_path);
2316
2317 // Given the numeric representation, it's enough to check the low bit of the
2318 // rb_state. We do that by shifting the bit out of the lock word with LSRS
2319 // which can be a 16-bit instruction unlike the TST immediate.
Roland Levillain4bbca2a2016-11-03 18:09:18 +00002320 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
2321 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
Anton Kirilov5ec62182016-10-13 20:16:02 +01002322 __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
2323 // Carry flag is the last bit shifted out by LSRS.
2324 __ B(cs, read_barrier_slow_path->GetEntryLabel());
2325
2326 // Fast-path copy.
2327
2328 // Compute the base destination address in `temp2`.
2329 if (dest_pos.IsConstant()) {
2330 int32_t constant = Int32ConstantFrom(dest_pos);
2331 __ Add(temp2, dest, element_size * constant + offset);
2332 } else {
2333 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2334 __ Add(temp2, temp2, offset);
2335 }
2336
2337 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2338 // poison/unpoison.
2339 __ Bind(&loop);
2340
2341 {
2342 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2343 const vixl32::Register temp_reg = temps.Acquire();
2344
2345 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2346 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2347 }
2348
2349 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002350 __ B(ne, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002351
2352 __ Bind(read_barrier_slow_path->GetExitLabel());
2353 __ Bind(&done);
2354 } else {
2355 // Non read barrier code.
2356
2357 // Compute the base destination address in `temp2`.
2358 if (dest_pos.IsConstant()) {
2359 int32_t constant = Int32ConstantFrom(dest_pos);
2360 __ Add(temp2, dest, element_size * constant + offset);
2361 } else {
2362 __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
2363 __ Add(temp2, temp2, offset);
2364 }
2365
2366 // Iterate over the arrays and do a raw copy of the objects. We don't need to
2367 // poison/unpoison.
2368 vixl32::Label loop, done;
2369 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002370 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002371 __ Bind(&loop);
2372
2373 {
2374 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2375 const vixl32::Register temp_reg = temps.Acquire();
2376
2377 __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
2378 __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
2379 }
2380
2381 __ Cmp(temp1, temp3);
Artem Serov517d9f62016-12-12 15:51:15 +00002382 __ B(ne, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002383 __ Bind(&done);
2384 }
2385
2386 // We only need one card marking on the destination array.
2387 codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
2388
2389 __ Bind(intrinsic_slow_path->GetExitLabel());
2390}
2391
2392static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2393 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2394 // the code generator. Furthermore, the register allocator creates fixed live intervals
2395 // for all caller-saved registers because we are doing a function call. As a result, if
2396 // the input and output locations are unallocated, the register allocator runs out of
2397 // registers and fails; however, a debuggable graph is not the common case.
2398 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2399 return;
2400 }
2401
2402 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2403 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2404 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2405
2406 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2407 LocationSummary::kCallOnMainOnly,
2408 kIntrinsified);
2409 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2410
2411 locations->SetInAt(0, Location::RequiresFpuRegister());
2412 locations->SetOut(Location::RequiresFpuRegister());
2413 // Native code uses the soft float ABI.
2414 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2415 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2416}
2417
2418static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
2419 // If the graph is debuggable, all callee-saved floating-point registers are blocked by
2420 // the code generator. Furthermore, the register allocator creates fixed live intervals
2421 // for all caller-saved registers because we are doing a function call. As a result, if
2422 // the input and output locations are unallocated, the register allocator runs out of
2423 // registers and fails; however, a debuggable graph is not the common case.
2424 if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
2425 return;
2426 }
2427
2428 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2429 DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
2430 DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
2431 DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
2432
2433 LocationSummary* const locations = new (arena) LocationSummary(invoke,
2434 LocationSummary::kCallOnMainOnly,
2435 kIntrinsified);
2436 const InvokeRuntimeCallingConventionARMVIXL calling_convention;
2437
2438 locations->SetInAt(0, Location::RequiresFpuRegister());
2439 locations->SetInAt(1, Location::RequiresFpuRegister());
2440 locations->SetOut(Location::RequiresFpuRegister());
2441 // Native code uses the soft float ABI.
2442 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
2443 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
2444 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
2445 locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
2446}
2447
2448static void GenFPToFPCall(HInvoke* invoke,
2449 ArmVIXLAssembler* assembler,
2450 CodeGeneratorARMVIXL* codegen,
2451 QuickEntrypointEnum entry) {
2452 LocationSummary* const locations = invoke->GetLocations();
2453
2454 DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
2455 DCHECK(locations->WillCall() && locations->Intrinsified());
2456
2457 // Native code uses the soft float ABI.
2458 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2459 RegisterFrom(locations->GetTemp(1)),
2460 InputDRegisterAt(invoke, 0));
2461 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2462 __ Vmov(OutputDRegister(invoke),
2463 RegisterFrom(locations->GetTemp(0)),
2464 RegisterFrom(locations->GetTemp(1)));
2465}
2466
2467static void GenFPFPToFPCall(HInvoke* invoke,
2468 ArmVIXLAssembler* assembler,
2469 CodeGeneratorARMVIXL* codegen,
2470 QuickEntrypointEnum entry) {
2471 LocationSummary* const locations = invoke->GetLocations();
2472
2473 DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
2474 DCHECK(locations->WillCall() && locations->Intrinsified());
2475
2476 // Native code uses the soft float ABI.
2477 __ Vmov(RegisterFrom(locations->GetTemp(0)),
2478 RegisterFrom(locations->GetTemp(1)),
2479 InputDRegisterAt(invoke, 0));
2480 __ Vmov(RegisterFrom(locations->GetTemp(2)),
2481 RegisterFrom(locations->GetTemp(3)),
2482 InputDRegisterAt(invoke, 1));
2483 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
2484 __ Vmov(OutputDRegister(invoke),
2485 RegisterFrom(locations->GetTemp(0)),
2486 RegisterFrom(locations->GetTemp(1)));
2487}
2488
2489void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
2490 CreateFPToFPCallLocations(arena_, invoke);
2491}
2492
2493void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
2494 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
2495}
2496
2497void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
2498 CreateFPToFPCallLocations(arena_, invoke);
2499}
2500
2501void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
2502 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
2503}
2504
2505void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
2506 CreateFPToFPCallLocations(arena_, invoke);
2507}
2508
2509void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
2510 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
2511}
2512
2513void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
2514 CreateFPToFPCallLocations(arena_, invoke);
2515}
2516
2517void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
2518 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
2519}
2520
2521void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
2522 CreateFPToFPCallLocations(arena_, invoke);
2523}
2524
2525void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
2526 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
2527}
2528
2529void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2530 CreateFPToFPCallLocations(arena_, invoke);
2531}
2532
2533void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
2534 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
2535}
2536
2537void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
2538 CreateFPToFPCallLocations(arena_, invoke);
2539}
2540
2541void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
2542 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
2543}
2544
2545void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
2546 CreateFPToFPCallLocations(arena_, invoke);
2547}
2548
2549void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
2550 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
2551}
2552
2553void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2554 CreateFPToFPCallLocations(arena_, invoke);
2555}
2556
2557void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
2558 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
2559}
2560
2561void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
2562 CreateFPToFPCallLocations(arena_, invoke);
2563}
2564
2565void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
2566 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
2567}
2568
2569void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
2570 CreateFPToFPCallLocations(arena_, invoke);
2571}
2572
2573void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
2574 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
2575}
2576
2577void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
2578 CreateFPToFPCallLocations(arena_, invoke);
2579}
2580
2581void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
2582 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
2583}
2584
2585void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
2586 CreateFPToFPCallLocations(arena_, invoke);
2587}
2588
2589void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
2590 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
2591}
2592
2593void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
2594 CreateFPToFPCallLocations(arena_, invoke);
2595}
2596
2597void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
2598 GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
2599}
2600
2601void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2602 CreateFPFPToFPCallLocations(arena_, invoke);
2603}
2604
2605void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
2606 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
2607}
2608
2609void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
2610 CreateFPFPToFPCallLocations(arena_, invoke);
2611}
2612
2613void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
2614 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
2615}
2616
2617void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2618 CreateFPFPToFPCallLocations(arena_, invoke);
2619}
2620
2621void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
2622 GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
2623}
2624
2625void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2626 CreateIntToIntLocations(arena_, invoke);
2627}
2628
2629void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
2630 ArmVIXLAssembler* assembler = GetAssembler();
2631 __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2632}
2633
2634void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
2635 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2636 LocationSummary::kNoCall,
2637 kIntrinsified);
2638 locations->SetInAt(0, Location::RequiresRegister());
2639 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2640}
2641
2642void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
2643 ArmVIXLAssembler* assembler = GetAssembler();
2644 LocationSummary* locations = invoke->GetLocations();
2645
2646 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2647 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2648 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2649 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2650
2651 __ Rbit(out_reg_lo, in_reg_hi);
2652 __ Rbit(out_reg_hi, in_reg_lo);
2653}
2654
2655void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2656 CreateIntToIntLocations(arena_, invoke);
2657}
2658
2659void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
2660 ArmVIXLAssembler* assembler = GetAssembler();
2661 __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2662}
2663
2664void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2665 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2666 LocationSummary::kNoCall,
2667 kIntrinsified);
2668 locations->SetInAt(0, Location::RequiresRegister());
2669 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2670}
2671
2672void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
2673 ArmVIXLAssembler* assembler = GetAssembler();
2674 LocationSummary* locations = invoke->GetLocations();
2675
2676 vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
2677 vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
2678 vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
2679 vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
2680
2681 __ Rev(out_reg_lo, in_reg_hi);
2682 __ Rev(out_reg_hi, in_reg_lo);
2683}
2684
2685void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2686 CreateIntToIntLocations(arena_, invoke);
2687}
2688
2689void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
2690 ArmVIXLAssembler* assembler = GetAssembler();
2691 __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
2692}
2693
2694static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
2695 DCHECK(Primitive::IsIntOrLongType(type)) << type;
2696 DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
2697 DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
2698
2699 bool is_long = type == Primitive::kPrimLong;
2700 LocationSummary* locations = instr->GetLocations();
2701 Location in = locations->InAt(0);
2702 vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
2703 vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
2704 vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
2705 vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
2706 vixl32::Register out_r = OutputRegister(instr);
2707
2708 // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
2709 // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
2710 // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
2711 // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
2712 __ Vmov(tmp_d, src_1, src_0); // Temp DReg |--src_1|--src_0|
2713 __ Vcnt(Untyped8, tmp_d, tmp_d); // Temp DReg |c|c|c|c|c|c|c|c|
2714 __ Vpaddl(U8, tmp_d, tmp_d); // Temp DReg |--c|--c|--c|--c|
2715 __ Vpaddl(U16, tmp_d, tmp_d); // Temp DReg |------c|------c|
2716 if (is_long) {
2717 __ Vpaddl(U32, tmp_d, tmp_d); // Temp DReg |--------------c|
2718 }
2719 __ Vmov(out_r, tmp_s);
2720}
2721
2722void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2723 CreateIntToIntLocations(arena_, invoke);
2724 invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
2725}
2726
2727void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
2728 GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
2729}
2730
2731void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2732 VisitIntegerBitCount(invoke);
2733}
2734
2735void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
2736 GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
2737}
2738
2739void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2740 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2741 LocationSummary::kNoCall,
2742 kIntrinsified);
2743 locations->SetInAt(0, Location::RequiresRegister());
2744 locations->SetInAt(1, Location::RequiresRegister());
2745 locations->SetInAt(2, Location::RequiresRegister());
2746 locations->SetInAt(3, Location::RequiresRegister());
2747 locations->SetInAt(4, Location::RequiresRegister());
2748
2749 // Temporary registers to store lengths of strings and for calculations.
2750 locations->AddTemp(Location::RequiresRegister());
2751 locations->AddTemp(Location::RequiresRegister());
2752 locations->AddTemp(Location::RequiresRegister());
2753}
2754
2755void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
2756 ArmVIXLAssembler* assembler = GetAssembler();
2757 LocationSummary* locations = invoke->GetLocations();
2758
2759 // Check assumption that sizeof(Char) is 2 (used in scaling below).
2760 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
2761 DCHECK_EQ(char_size, 2u);
2762
2763 // Location of data in char array buffer.
2764 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
2765
2766 // Location of char array data in string.
2767 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
2768
2769 // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
2770 // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
2771 vixl32::Register srcObj = InputRegisterAt(invoke, 0);
2772 vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
2773 vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
2774 vixl32::Register dstObj = InputRegisterAt(invoke, 3);
2775 vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
2776
2777 vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
2778 vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
2779 vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
2780
2781 vixl32::Label done, compressed_string_loop;
2782 // dst to be copied.
2783 __ Add(dst_ptr, dstObj, data_offset);
2784 __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
2785
2786 __ Subs(num_chr, srcEnd, srcBegin);
2787 // Early out for valid zero-length retrievals.
Artem Serov517d9f62016-12-12 15:51:15 +00002788 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002789
2790 // src range to copy.
2791 __ Add(src_ptr, srcObj, value_offset);
2792
2793 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2794 vixl32::Register temp;
2795 vixl32::Label compressed_string_preloop;
2796 if (mirror::kUseStringCompression) {
2797 // Location of count in string.
2798 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
2799 temp = temps.Acquire();
2800 // String's length.
2801 __ Ldr(temp, MemOperand(srcObj, count_offset));
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002802 __ Tst(temp, 1);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002803 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00002804 __ B(eq, &compressed_string_preloop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002805 }
2806 __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
2807
2808 // Do the copy.
2809 vixl32::Label loop, remainder;
2810
2811 temp = temps.Acquire();
2812 // Save repairing the value of num_chr on the < 4 character path.
2813 __ Subs(temp, num_chr, 4);
Artem Serov517d9f62016-12-12 15:51:15 +00002814 __ B(lt, &remainder, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002815
2816 // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
2817 __ Mov(num_chr, temp);
2818
2819 // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
2820 // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
2821 // to rectify these everywhere this intrinsic applies.)
2822 __ Bind(&loop);
2823 __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
2824 __ Subs(num_chr, num_chr, 4);
2825 __ Str(temp, MemOperand(dst_ptr, char_size * 2));
2826 __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
2827 __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
2828 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00002829 __ B(ge, &loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002830
2831 __ Adds(num_chr, num_chr, 4);
Artem Serov517d9f62016-12-12 15:51:15 +00002832 __ B(eq, &done, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002833
2834 // Main loop for < 4 character case and remainder handling. Loads and stores one
2835 // 16-bit Java character at a time.
2836 __ Bind(&remainder);
2837 temp = temps.Acquire();
2838 __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
2839 __ Subs(num_chr, num_chr, 1);
2840 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2841 temps.Release(temp);
Artem Serov517d9f62016-12-12 15:51:15 +00002842 __ B(gt, &remainder, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002843
2844 if (mirror::kUseStringCompression) {
Vladimir Markofdaf0f42016-10-13 19:29:53 +01002845 __ B(&done);
2846
Anton Kirilov5ec62182016-10-13 20:16:02 +01002847 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
2848 DCHECK_EQ(c_char_size, 1u);
2849 // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
2850 __ Bind(&compressed_string_preloop);
2851 __ Add(src_ptr, src_ptr, srcBegin);
2852 __ Bind(&compressed_string_loop);
2853 temp = temps.Acquire();
2854 __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
2855 __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
2856 temps.Release(temp);
2857 __ Subs(num_chr, num_chr, 1);
Artem Serov517d9f62016-12-12 15:51:15 +00002858 __ B(gt, &compressed_string_loop, /* far_target */ false);
Anton Kirilov5ec62182016-10-13 20:16:02 +01002859 }
2860
2861 __ Bind(&done);
2862}
2863
2864void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2865 CreateFPToIntLocations(arena_, invoke);
2866}
2867
2868void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
2869 ArmVIXLAssembler* const assembler = GetAssembler();
2870 const vixl32::Register out = OutputRegister(invoke);
2871 // Shifting left by 1 bit makes the value encodable as an immediate operand;
2872 // we don't care about the sign bit anyway.
2873 constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
2874
2875 __ Vmov(out, InputSRegisterAt(invoke, 0));
2876 // We don't care about the sign bit, so shift left.
2877 __ Lsl(out, out, 1);
2878 __ Eor(out, out, infinity);
2879 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2880 __ Clz(out, out);
2881 // Any number less than 32 logically shifted right by 5 bits results in 0;
2882 // the same operation on 32 yields 1.
2883 __ Lsr(out, out, 5);
2884}
2885
2886void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2887 CreateFPToIntLocations(arena_, invoke);
2888}
2889
2890void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
2891 ArmVIXLAssembler* const assembler = GetAssembler();
2892 const vixl32::Register out = OutputRegister(invoke);
2893 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2894 const vixl32::Register temp = temps.Acquire();
2895 // The highest 32 bits of double precision positive infinity separated into
2896 // two constants encodable as immediate operands.
2897 constexpr uint32_t infinity_high = 0x7f000000U;
2898 constexpr uint32_t infinity_high2 = 0x00f00000U;
2899
2900 static_assert((infinity_high | infinity_high2) ==
2901 static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
2902 "The constants do not add up to the high 32 bits of double "
2903 "precision positive infinity.");
2904 __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
2905 __ Eor(out, out, infinity_high);
2906 __ Eor(out, out, infinity_high2);
2907 // We don't care about the sign bit, so shift left.
2908 __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
2909 // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
2910 __ Clz(out, out);
2911 // Any number less than 32 logically shifted right by 5 bits results in 0;
2912 // the same operation on 32 yields 1.
2913 __ Lsr(out, out, 5);
2914}
2915
TatWai Chongd8c052a2016-11-02 16:12:48 +08002916void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2917 if (kEmitCompilerReadBarrier) {
2918 // Do not intrinsify this call with the read barrier configuration.
2919 return;
2920 }
2921 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2922 LocationSummary::kCallOnSlowPath,
2923 kIntrinsified);
2924 locations->SetInAt(0, Location::RequiresRegister());
2925 locations->SetOut(Location::SameAsFirstInput());
2926 locations->AddTemp(Location::RequiresRegister());
2927}
2928
2929void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
2930 DCHECK(!kEmitCompilerReadBarrier);
2931 ArmVIXLAssembler* assembler = GetAssembler();
2932 LocationSummary* locations = invoke->GetLocations();
2933
2934 vixl32::Register obj = InputRegisterAt(invoke, 0);
2935 vixl32::Register out = OutputRegister(invoke);
2936
2937 SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
2938 codegen_->AddSlowPath(slow_path);
2939
2940 // Load ArtMethod first.
2941 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2942 DCHECK(invoke_direct != nullptr);
2943 vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
2944 invoke_direct, locations->GetTemp(0)));
2945
2946 // Now get declaring class.
2947 __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
2948
2949 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2950 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2951 DCHECK_NE(slow_path_flag_offset, 0u);
2952 DCHECK_NE(disable_flag_offset, 0u);
2953 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2954
2955 // Check static flags that prevent using intrinsic.
2956 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
2957 vixl32::Register temp1 = temps.Acquire();
2958 __ Ldr(temp1, MemOperand(temp0, disable_flag_offset));
2959 __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset));
2960 __ Orr(temp0, temp1, temp0);
2961 __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel());
2962
2963 // Fast path.
2964 __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
2965 codegen_->MaybeRecordImplicitNullCheck(invoke);
2966 assembler->MaybeUnpoisonHeapReference(out);
2967 __ Bind(slow_path->GetExitLabel());
2968}
2969
Artem Serov9aee2d42017-01-06 15:58:31 +00002970void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
2971 if (features_.HasARMv8AInstructions()) {
2972 CreateFPToFPLocations(arena_, invoke);
2973 }
2974}
2975
2976void IntrinsicCodeGeneratorARMVIXL::VisitMathCeil(HInvoke* invoke) {
2977 ArmVIXLAssembler* assembler = GetAssembler();
2978 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2979 __ Vrintp(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2980}
2981
2982void IntrinsicLocationsBuilderARMVIXL::VisitMathFloor(HInvoke* invoke) {
2983 if (features_.HasARMv8AInstructions()) {
2984 CreateFPToFPLocations(arena_, invoke);
2985 }
2986}
2987
2988void IntrinsicCodeGeneratorARMVIXL::VisitMathFloor(HInvoke* invoke) {
2989 ArmVIXLAssembler* assembler = GetAssembler();
2990 DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
2991 __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
2992}
2993
Nicolas Geoffray331605a2017-03-01 11:01:41 +00002994void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
2995 InvokeRuntimeCallingConventionARMVIXL calling_convention;
2996 IntrinsicVisitor::ComputeIntegerValueOfLocations(
2997 invoke,
2998 codegen_,
2999 LocationFrom(r0),
3000 LocationFrom(calling_convention.GetRegisterAt(0)));
3001}
3002
3003void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
3004 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3005 LocationSummary* locations = invoke->GetLocations();
3006 ArmVIXLAssembler* const assembler = GetAssembler();
3007
3008 vixl32::Register out = RegisterFrom(locations->Out());
3009 UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
3010 vixl32::Register temp = temps.Acquire();
3011 InvokeRuntimeCallingConventionARMVIXL calling_convention;
3012 vixl32::Register argument = calling_convention.GetRegisterAt(0);
3013 if (invoke->InputAt(0)->IsConstant()) {
3014 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3015 if (value >= info.low && value <= info.high) {
3016 // Just embed the j.l.Integer in the code.
3017 ScopedObjectAccess soa(Thread::Current());
3018 mirror::Object* boxed = info.cache->Get(value + (-info.low));
3019 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3020 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3021 __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
3022 } else {
3023 // Allocate and initialize a new j.l.Integer.
3024 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3025 // JIT object table.
3026 uint32_t address =
3027 dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3028 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3029 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3030 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3031 __ Mov(temp, value);
3032 assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
3033 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3034 // one.
3035 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3036 }
3037 } else {
3038 vixl32::Register in = RegisterFrom(locations->InAt(0));
3039 // Check bounds of our cache.
3040 __ Add(out, in, -info.low);
3041 __ Cmp(out, info.high - info.low + 1);
3042 vixl32::Label allocate, done;
3043 __ B(hs, &allocate);
3044 // If the value is within the bounds, load the j.l.Integer directly from the array.
3045 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3046 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3047 __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
3048 codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
3049 assembler->MaybeUnpoisonHeapReference(out);
3050 __ B(&done);
3051 __ Bind(&allocate);
3052 // Otherwise allocate and initialize a new j.l.Integer.
3053 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3054 __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
3055 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3056 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3057 assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
3058 // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
3059 // one.
3060 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
3061 __ Bind(&done);
3062 }
3063}
3064
Anton Kirilov5ec62182016-10-13 20:16:02 +01003065UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble) // Could be done by changing rounding mode, maybe?
3066UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat) // Could be done by changing rounding mode, maybe?
3067UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong) // High register pressure.
3068UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
Anton Kirilov5ec62182016-10-13 20:16:02 +01003069UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
3070UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
3071UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
3072UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
3073
Aart Bikff7d89c2016-11-07 08:49:28 -08003074UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
3075UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08003076UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferAppend);
3077UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferLength);
3078UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBufferToString);
3079UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderAppend);
3080UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderLength);
3081UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringBuilderToString);
Aart Bikff7d89c2016-11-07 08:49:28 -08003082
Anton Kirilov5ec62182016-10-13 20:16:02 +01003083// 1.8.
3084UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
3085UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
3086UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
3087UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
3088UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
3089
3090UNREACHABLE_INTRINSICS(ARMVIXL)
3091
3092#undef __
3093
3094} // namespace arm
3095} // namespace art