blob: c1f4c94b7ff8bc417267bd4ea61b4d44fde5beca [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
19#include "code_generator_x86_64.h"
20#include "entrypoints/quick/quick_entrypoints.h"
21#include "intrinsics.h"
22#include "mirror/array-inl.h"
23#include "mirror/art_method.h"
24#include "mirror/string.h"
25#include "thread.h"
26#include "utils/x86_64/assembler_x86_64.h"
27#include "utils/x86_64/constants_x86_64.h"
28
29namespace art {
30
31namespace x86_64 {
32
33static constexpr bool kIntrinsified = true;
34
35X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
36 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
37}
38
39ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetArena() {
40 return codegen_->GetGraph()->GetArena();
41}
42
43bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
44 Dispatch(invoke);
45 const LocationSummary* res = invoke->GetLocations();
46 return res != nullptr && res->Intrinsified();
47}
48
49#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
50
51// TODO: trg as memory.
52static void MoveFromReturnRegister(Location trg,
53 Primitive::Type type,
54 CodeGeneratorX86_64* codegen) {
55 if (!trg.IsValid()) {
56 DCHECK(type == Primitive::kPrimVoid);
57 return;
58 }
59
60 switch (type) {
61 case Primitive::kPrimBoolean:
62 case Primitive::kPrimByte:
63 case Primitive::kPrimChar:
64 case Primitive::kPrimShort:
65 case Primitive::kPrimInt:
66 case Primitive::kPrimNot: {
67 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
68 if (trg_reg.AsRegister() != RAX) {
69 __ movl(trg_reg, CpuRegister(RAX));
70 }
71 break;
72 }
73 case Primitive::kPrimLong: {
74 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
75 if (trg_reg.AsRegister() != RAX) {
76 __ movq(trg_reg, CpuRegister(RAX));
77 }
78 break;
79 }
80
81 case Primitive::kPrimVoid:
82 LOG(FATAL) << "Unexpected void type for valid location " << trg;
83 UNREACHABLE();
84
85 case Primitive::kPrimDouble: {
86 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
87 if (trg_reg.AsFloatRegister() != XMM0) {
88 __ movsd(trg_reg, XmmRegister(XMM0));
89 }
90 break;
91 }
92 case Primitive::kPrimFloat: {
93 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
94 if (trg_reg.AsFloatRegister() != XMM0) {
95 __ movss(trg_reg, XmmRegister(XMM0));
96 }
97 break;
98 }
99 }
100}
101
102static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
103 if (invoke->InputCount() == 0) {
104 return;
105 }
106
107 LocationSummary* locations = invoke->GetLocations();
108 InvokeDexCallingConventionVisitor calling_convention_visitor;
109
110 // We're moving potentially two or more locations to locations that could overlap, so we need
111 // a parallel move resolver.
112 HParallelMove parallel_move(arena);
113
114 for (size_t i = 0; i < invoke->InputCount(); i++) {
115 HInstruction* input = invoke->InputAt(i);
116 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
117 Location actual_loc = locations->InAt(i);
118
119 parallel_move.AddMove(new (arena) MoveOperands(actual_loc, cc_loc, nullptr));
120 }
121
122 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
123}
124
125// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
126// call. This will copy the arguments into the positions for a regular call.
127//
128// Note: The actual parameters are required to be in the locations given by the invoke's location
129// summary. If an intrinsic modifies those locations before a slowpath call, they must be
130// restored!
131class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
132 public:
133 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
134
135 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
136 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
137 __ Bind(GetEntryLabel());
138
139 codegen->SaveLiveRegisters(invoke_->GetLocations());
140
141 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
142
143 if (invoke_->IsInvokeStaticOrDirect()) {
144 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
145 } else {
146 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
147 UNREACHABLE();
148 }
149
150 // Copy the result back to the expected output.
151 Location out = invoke_->GetLocations()->Out();
152 if (out.IsValid()) {
153 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
154 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
155 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
156 }
157
158 codegen->RestoreLiveRegisters(invoke_->GetLocations());
159 __ jmp(GetExitLabel());
160 }
161
162 private:
163 // The instruction where this slow path is happening.
164 HInvoke* const invoke_;
165
166 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
167};
168
169#undef __
170#define __ assembler->
171
172static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
173 LocationSummary* locations = new (arena) LocationSummary(invoke,
174 LocationSummary::kNoCall,
175 kIntrinsified);
176 locations->SetInAt(0, Location::RequiresFpuRegister());
177 locations->SetOut(Location::RequiresRegister());
178}
179
180static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
181 LocationSummary* locations = new (arena) LocationSummary(invoke,
182 LocationSummary::kNoCall,
183 kIntrinsified);
184 locations->SetInAt(0, Location::RequiresRegister());
185 locations->SetOut(Location::RequiresFpuRegister());
186}
187
188static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
189 Location input = locations->InAt(0);
190 Location output = locations->Out();
191 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
192}
193
194static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
195 Location input = locations->InAt(0);
196 Location output = locations->Out();
197 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
198}
199
200void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
201 CreateFPToIntLocations(arena_, invoke);
202}
203void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
204 CreateIntToFPLocations(arena_, invoke);
205}
206
207void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
208 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
209}
210void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
211 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
212}
213
214void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
215 CreateFPToIntLocations(arena_, invoke);
216}
217void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
218 CreateIntToFPLocations(arena_, invoke);
219}
220
221void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
222 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
223}
224void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
225 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
226}
227
228static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
229 LocationSummary* locations = new (arena) LocationSummary(invoke,
230 LocationSummary::kNoCall,
231 kIntrinsified);
232 locations->SetInAt(0, Location::RequiresRegister());
233 locations->SetOut(Location::SameAsFirstInput());
234}
235
236static void GenReverseBytes(LocationSummary* locations,
237 Primitive::Type size,
238 X86_64Assembler* assembler) {
239 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
240
241 switch (size) {
242 case Primitive::kPrimShort:
243 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
244 __ bswapl(out);
245 __ sarl(out, Immediate(16));
246 break;
247 case Primitive::kPrimInt:
248 __ bswapl(out);
249 break;
250 case Primitive::kPrimLong:
251 __ bswapq(out);
252 break;
253 default:
254 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
255 UNREACHABLE();
256 }
257}
258
259void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
260 CreateIntToIntLocations(arena_, invoke);
261}
262
263void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
264 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
265}
266
267void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
268 CreateIntToIntLocations(arena_, invoke);
269}
270
271void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
272 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
273}
274
275void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
276 CreateIntToIntLocations(arena_, invoke);
277}
278
279void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
280 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
281}
282
283
284// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
285// need is 64b.
286
287static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
288 // TODO: Enable memory operations when the assembler supports them.
289 LocationSummary* locations = new (arena) LocationSummary(invoke,
290 LocationSummary::kNoCall,
291 kIntrinsified);
292 locations->SetInAt(0, Location::RequiresFpuRegister());
293 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
294 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
295 locations->SetOut(Location::SameAsFirstInput());
296 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
297 locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
298}
299
300static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
301 Location output = locations->Out();
302 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
303
304 if (output.IsFpuRegister()) {
305 // In-register
306 XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
307
308 if (is64bit) {
309 __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
310 __ movd(xmm_temp, cpu_temp);
311 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
312 } else {
313 __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
314 __ movd(xmm_temp, cpu_temp);
315 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
316 }
317 } else {
318 // TODO: update when assember support is available.
319 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
320// Once assembler support is available, in-memory operations look like this:
321// if (is64bit) {
322// DCHECK(output.IsDoubleStackSlot());
323// // No 64b and with literal.
324// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
325// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
326// } else {
327// DCHECK(output.IsStackSlot());
328// // Can use and with a literal directly.
329// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
330// }
331 }
332}
333
334void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
335 CreateFloatToFloatPlusTemps(arena_, invoke);
336}
337
338void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
339 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
340}
341
342void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
343 CreateFloatToFloatPlusTemps(arena_, invoke);
344}
345
346void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
347 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
348}
349
350static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
351 LocationSummary* locations = new (arena) LocationSummary(invoke,
352 LocationSummary::kNoCall,
353 kIntrinsified);
354 locations->SetInAt(0, Location::RequiresRegister());
355 locations->SetOut(Location::SameAsFirstInput());
356 locations->AddTemp(Location::RequiresRegister());
357}
358
359static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
360 Location output = locations->Out();
361 CpuRegister out = output.AsRegister<CpuRegister>();
362 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
363
364 if (is64bit) {
365 // Create mask.
366 __ movq(mask, out);
367 __ sarq(mask, Immediate(63));
368 // Add mask.
369 __ addq(out, mask);
370 __ xorq(out, mask);
371 } else {
372 // Create mask.
373 __ movl(mask, out);
374 __ sarl(mask, Immediate(31));
375 // Add mask.
376 __ addl(out, mask);
377 __ xorl(out, mask);
378 }
379}
380
381void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
382 CreateIntToIntPlusTemp(arena_, invoke);
383}
384
385void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
386 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
387}
388
389void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
390 CreateIntToIntPlusTemp(arena_, invoke);
391}
392
393void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
394 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
395}
396
397static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
398 X86_64Assembler* assembler) {
399 Location op1_loc = locations->InAt(0);
400 Location op2_loc = locations->InAt(1);
401 Location out_loc = locations->Out();
402 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
403
404 // Shortcut for same input locations.
405 if (op1_loc.Equals(op2_loc)) {
406 DCHECK(out_loc.Equals(op1_loc));
407 return;
408 }
409
410 // (out := op1)
411 // out <=? op2
412 // if Nan jmp Nan_label
413 // if out is min jmp done
414 // if op2 is min jmp op2_label
415 // handle -0/+0
416 // jmp done
417 // Nan_label:
418 // out := NaN
419 // op2_label:
420 // out := op2
421 // done:
422 //
423 // This removes one jmp, but needs to copy one input (op1) to out.
424 //
425 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
426
427 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
428
429 Label nan, done, op2_label;
430 if (is_double) {
431 __ ucomisd(out, op2);
432 } else {
433 __ ucomiss(out, op2);
434 }
435
436 __ j(Condition::kParityEven, &nan);
437
438 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
439 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
440
441 // Handle 0.0/-0.0.
442 if (is_min) {
443 if (is_double) {
444 __ orpd(out, op2);
445 } else {
446 __ orps(out, op2);
447 }
448 } else {
449 if (is_double) {
450 __ andpd(out, op2);
451 } else {
452 __ andps(out, op2);
453 }
454 }
455 __ jmp(&done);
456
457 // NaN handling.
458 __ Bind(&nan);
459 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
460 // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
461 if (is_double) {
462 __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
463 } else {
464 __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
465 }
466 __ movd(out, cpu_temp, is_double);
467 __ jmp(&done);
468
469 // out := op2;
470 __ Bind(&op2_label);
471 if (is_double) {
472 __ movsd(out, op2);
473 } else {
474 __ movss(out, op2);
475 }
476
477 // Done.
478 __ Bind(&done);
479}
480
481static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
482 LocationSummary* locations = new (arena) LocationSummary(invoke,
483 LocationSummary::kNoCall,
484 kIntrinsified);
485 locations->SetInAt(0, Location::RequiresFpuRegister());
486 locations->SetInAt(1, Location::RequiresFpuRegister());
487 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
488 // the second input to be the output (we can simply swap inputs).
489 locations->SetOut(Location::SameAsFirstInput());
490 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
491}
492
493void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
494 CreateFPFPToFPPlusTempLocations(arena_, invoke);
495}
496
497void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
498 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
499}
500
501void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
502 CreateFPFPToFPPlusTempLocations(arena_, invoke);
503}
504
505void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
506 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
507}
508
509void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
510 CreateFPFPToFPPlusTempLocations(arena_, invoke);
511}
512
513void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
514 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
515}
516
517void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
518 CreateFPFPToFPPlusTempLocations(arena_, invoke);
519}
520
521void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
522 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
523}
524
525static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
526 X86_64Assembler* assembler) {
527 Location op1_loc = locations->InAt(0);
528 Location op2_loc = locations->InAt(1);
529
530 // Shortcut for same input locations.
531 if (op1_loc.Equals(op2_loc)) {
532 // Can return immediately, as op1_loc == out_loc.
533 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
534 // a copy here.
535 DCHECK(locations->Out().Equals(op1_loc));
536 return;
537 }
538
539 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
540 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
541
542 // (out := op1)
543 // out <=? op2
544 // if out is min jmp done
545 // out := op2
546 // done:
547
548 if (is_long) {
549 __ cmpq(out, op2);
550 } else {
551 __ cmpl(out, op2);
552 }
553
554 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
555}
556
557static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
558 LocationSummary* locations = new (arena) LocationSummary(invoke,
559 LocationSummary::kNoCall,
560 kIntrinsified);
561 locations->SetInAt(0, Location::RequiresRegister());
562 locations->SetInAt(1, Location::RequiresRegister());
563 locations->SetOut(Location::SameAsFirstInput());
564}
565
566void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
567 CreateIntIntToIntLocations(arena_, invoke);
568}
569
570void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
571 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
572}
573
574void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
575 CreateIntIntToIntLocations(arena_, invoke);
576}
577
578void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
579 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
580}
581
582void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
583 CreateIntIntToIntLocations(arena_, invoke);
584}
585
586void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
587 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
588}
589
590void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
591 CreateIntIntToIntLocations(arena_, invoke);
592}
593
594void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
595 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
596}
597
598static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
599 LocationSummary* locations = new (arena) LocationSummary(invoke,
600 LocationSummary::kNoCall,
601 kIntrinsified);
602 locations->SetInAt(0, Location::RequiresFpuRegister());
603 locations->SetOut(Location::RequiresFpuRegister());
604}
605
606void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
607 CreateFPToFPLocations(arena_, invoke);
608}
609
610void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
611 LocationSummary* locations = invoke->GetLocations();
612 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
613 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
614
615 GetAssembler()->sqrtsd(out, in);
616}
617
618void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
619 // The inputs plus one temp.
620 LocationSummary* locations = new (arena_) LocationSummary(invoke,
621 LocationSummary::kCallOnSlowPath,
622 kIntrinsified);
623 locations->SetInAt(0, Location::RequiresRegister());
624 locations->SetInAt(1, Location::RequiresRegister());
625 locations->SetOut(Location::SameAsFirstInput());
626 locations->AddTemp(Location::RequiresRegister());
627}
628
629void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
630 LocationSummary* locations = invoke->GetLocations();
631
632 // Location of reference to data array
633 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
634 // Location of count
635 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
636 // Starting offset within data array
637 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
638 // Start of char data with array_
639 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
640
641 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
642 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
643 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
644 Location temp_loc = locations->GetTemp(0);
645 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
646
647 // Note: Nullcheck has been done before in a HNullCheck before the HInvokeVirtual. If/when we
648 // move to (coalesced) implicit checks, we have to do a null check below.
649 DCHECK(!kCoalescedImplicitNullCheck);
650
651 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
652 // the cost.
653 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
654 // we will not optimize the code for constants (which would save a register).
655
656 SlowPathCodeX86_64* slow_path = new (GetArena()) IntrinsicSlowPathX86_64(invoke);
657 codegen_->AddSlowPath(slow_path);
658
659 X86_64Assembler* assembler = GetAssembler();
660
661 __ cmpl(idx, Address(obj, count_offset));
662 __ j(kAboveEqual, slow_path->GetEntryLabel());
663
664 // Get the actual element.
665 __ movl(temp, idx); // temp := idx.
666 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
667 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
668 // out = out[2*temp].
669 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
670
671 __ Bind(slow_path->GetExitLabel());
672}
673
674static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
675 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
676 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
677 // x86 allows unaligned access. We do not have to check the input or use specific instructions
678 // to avoid a SIGBUS.
679 switch (size) {
680 case Primitive::kPrimByte:
681 __ movsxb(out, Address(address, 0));
682 break;
683 case Primitive::kPrimShort:
684 __ movsxw(out, Address(address, 0));
685 break;
686 case Primitive::kPrimInt:
687 __ movl(out, Address(address, 0));
688 break;
689 case Primitive::kPrimLong:
690 __ movq(out, Address(address, 0));
691 break;
692 default:
693 LOG(FATAL) << "Type not recognized for peek: " << size;
694 UNREACHABLE();
695 }
696}
697
698void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
699 CreateIntToIntLocations(arena_, invoke);
700}
701
702void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
703 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
704}
705
706void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
707 CreateIntToIntLocations(arena_, invoke);
708}
709
710void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
711 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
712}
713
714void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
715 CreateIntToIntLocations(arena_, invoke);
716}
717
718void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
719 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
720}
721
722void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
723 CreateIntToIntLocations(arena_, invoke);
724}
725
726void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
727 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
728}
729
730static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
731 LocationSummary* locations = new (arena) LocationSummary(invoke,
732 LocationSummary::kNoCall,
733 kIntrinsified);
734 locations->SetInAt(0, Location::RequiresRegister());
735 locations->SetInAt(1, Location::RequiresRegister());
736}
737
738static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
739 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
740 CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
741 // x86 allows unaligned access. We do not have to check the input or use specific instructions
742 // to avoid a SIGBUS.
743 switch (size) {
744 case Primitive::kPrimByte:
745 __ movb(Address(address, 0), value);
746 break;
747 case Primitive::kPrimShort:
748 __ movw(Address(address, 0), value);
749 break;
750 case Primitive::kPrimInt:
751 __ movl(Address(address, 0), value);
752 break;
753 case Primitive::kPrimLong:
754 __ movq(Address(address, 0), value);
755 break;
756 default:
757 LOG(FATAL) << "Type not recognized for poke: " << size;
758 UNREACHABLE();
759 }
760}
761
762void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
763 CreateIntIntToVoidLocations(arena_, invoke);
764}
765
766void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
767 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
768}
769
770void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
771 CreateIntIntToVoidLocations(arena_, invoke);
772}
773
774void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
775 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
776}
777
778void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
779 CreateIntIntToVoidLocations(arena_, invoke);
780}
781
782void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
783 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
784}
785
786void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
787 CreateIntIntToVoidLocations(arena_, invoke);
788}
789
790void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
791 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
792}
793
794void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
795 LocationSummary* locations = new (arena_) LocationSummary(invoke,
796 LocationSummary::kNoCall,
797 kIntrinsified);
798 locations->SetOut(Location::RequiresRegister());
799}
800
801void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
802 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
803 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
804}
805
806static void GenUnsafeGet(LocationSummary* locations, bool is_long,
807 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
808 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
809 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
810 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
811
812 if (is_long) {
813 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
814 } else {
815 // TODO: Distinguish object. In case we move to an actual compressed heap, retrieving an object
816 // pointer will entail an unpack operation.
817 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
818 }
819}
820
821static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
822 LocationSummary* locations = new (arena) LocationSummary(invoke,
823 LocationSummary::kNoCall,
824 kIntrinsified);
825 locations->SetInAt(0, Location::RequiresRegister());
826 locations->SetInAt(1, Location::RequiresRegister());
827 locations->SetInAt(2, Location::RequiresRegister());
828 locations->SetOut(Location::SameAsFirstInput());
829}
830
831void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
832 CreateIntIntIntToIntLocations(arena_, invoke);
833}
834void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
835 CreateIntIntIntToIntLocations(arena_, invoke);
836}
837void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
838 CreateIntIntIntToIntLocations(arena_, invoke);
839}
840void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
841 CreateIntIntIntToIntLocations(arena_, invoke);
842}
843
844void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
845 GenUnsafeGet(invoke->GetLocations(), false, false, GetAssembler());
846}
847void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
848 GenUnsafeGet(invoke->GetLocations(), false, true, GetAssembler());
849}
850void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
851 GenUnsafeGet(invoke->GetLocations(), true, false, GetAssembler());
852}
853void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
854 GenUnsafeGet(invoke->GetLocations(), true, true, GetAssembler());
855}
856
857static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
858 Primitive::Type type,
859 HInvoke* invoke) {
860 LocationSummary* locations = new (arena) LocationSummary(invoke,
861 LocationSummary::kNoCall,
862 kIntrinsified);
863 locations->SetInAt(0, Location::NoLocation());
864 locations->SetInAt(1, Location::RequiresRegister());
865 locations->SetInAt(2, Location::RequiresRegister());
866 locations->SetInAt(3, Location::RequiresRegister());
867 if (type == Primitive::kPrimNot) {
868 // Need temp registers for card-marking.
869 locations->AddTemp(Location::RequiresRegister());
870 locations->AddTemp(Location::RequiresRegister());
871 }
872}
873
874void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
875 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
876}
877void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
878 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
879}
880void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
881 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
882}
883void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
884 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
885}
886void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
887 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
888}
889void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
890 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
891}
892void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
893 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
894}
895void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
896 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
897}
898void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
899 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
900}
901
902// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
903// memory model.
904static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
905 CodeGeneratorX86_64* codegen) {
906 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
907 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
908 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
909 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
910
911 if (type == Primitive::kPrimLong) {
912 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
913 } else {
914 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
915 }
916
917 if (is_volatile) {
918 __ mfence();
919 }
920
921 if (type == Primitive::kPrimNot) {
922 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
923 locations->GetTemp(1).AsRegister<CpuRegister>(),
924 base,
925 value);
926 }
927}
928
929void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
930 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
931}
932void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
933 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
934}
935void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
936 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
937}
938void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
939 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
940}
941void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
942 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
943}
944void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
945 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
946}
947void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
948 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
949}
950void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
951 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
952}
953void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
954 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
955}
956
957// Unimplemented intrinsics.
958
959#define UNIMPLEMENTED_INTRINSIC(Name) \
960void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
961} \
962void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
963}
964
965UNIMPLEMENTED_INTRINSIC(IntegerReverse)
966UNIMPLEMENTED_INTRINSIC(LongReverse)
967UNIMPLEMENTED_INTRINSIC(MathFloor)
968UNIMPLEMENTED_INTRINSIC(MathCeil)
969UNIMPLEMENTED_INTRINSIC(MathRint)
970UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
971UNIMPLEMENTED_INTRINSIC(MathRoundFloat)
972UNIMPLEMENTED_INTRINSIC(StringIsEmpty) // Might not want to do these two anyways, inlining should
973UNIMPLEMENTED_INTRINSIC(StringLength) // be good enough here.
974UNIMPLEMENTED_INTRINSIC(StringCompareTo)
975UNIMPLEMENTED_INTRINSIC(StringIndexOf)
976UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
977UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
978UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
979UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
980UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
981UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
982
983} // namespace x86_64
984} // namespace art