blob: f6fa013cc6547bbb8fe3eec2e06824049b486545 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86_64/instruction_set_features_x86_64.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080020#include "code_generator_x86_64.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86_64/assembler_x86_64.h"
28#include "utils/x86_64/constants_x86_64.h"
29
30namespace art {
31
32namespace x86_64 {
33
Mark Mendellfb8d2792015-03-31 22:16:59 -040034IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
35 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
36}
37
38
Andreas Gampe71fb52f2014-12-29 17:43:08 -080039X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
40 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
41}
42
Andreas Gampe878d58c2015-01-15 23:24:00 -080043ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080044 return codegen_->GetGraph()->GetArena();
45}
46
47bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
48 Dispatch(invoke);
49 const LocationSummary* res = invoke->GetLocations();
50 return res != nullptr && res->Intrinsified();
51}
52
53#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
54
55// TODO: trg as memory.
56static void MoveFromReturnRegister(Location trg,
57 Primitive::Type type,
58 CodeGeneratorX86_64* codegen) {
59 if (!trg.IsValid()) {
60 DCHECK(type == Primitive::kPrimVoid);
61 return;
62 }
63
64 switch (type) {
65 case Primitive::kPrimBoolean:
66 case Primitive::kPrimByte:
67 case Primitive::kPrimChar:
68 case Primitive::kPrimShort:
69 case Primitive::kPrimInt:
70 case Primitive::kPrimNot: {
71 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
72 if (trg_reg.AsRegister() != RAX) {
73 __ movl(trg_reg, CpuRegister(RAX));
74 }
75 break;
76 }
77 case Primitive::kPrimLong: {
78 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
79 if (trg_reg.AsRegister() != RAX) {
80 __ movq(trg_reg, CpuRegister(RAX));
81 }
82 break;
83 }
84
85 case Primitive::kPrimVoid:
86 LOG(FATAL) << "Unexpected void type for valid location " << trg;
87 UNREACHABLE();
88
89 case Primitive::kPrimDouble: {
90 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
91 if (trg_reg.AsFloatRegister() != XMM0) {
92 __ movsd(trg_reg, XmmRegister(XMM0));
93 }
94 break;
95 }
96 case Primitive::kPrimFloat: {
97 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
98 if (trg_reg.AsFloatRegister() != XMM0) {
99 __ movss(trg_reg, XmmRegister(XMM0));
100 }
101 break;
102 }
103 }
104}
105
106static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86_64* codegen) {
107 if (invoke->InputCount() == 0) {
108 return;
109 }
110
111 LocationSummary* locations = invoke->GetLocations();
112 InvokeDexCallingConventionVisitor calling_convention_visitor;
113
114 // We're moving potentially two or more locations to locations that could overlap, so we need
115 // a parallel move resolver.
116 HParallelMove parallel_move(arena);
117
118 for (size_t i = 0; i < invoke->InputCount(); i++) {
119 HInstruction* input = invoke->InputAt(i);
120 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
121 Location actual_loc = locations->InAt(i);
122
Nicolas Geoffray42d1f5f2015-01-16 09:14:18 +0000123 parallel_move.AddMove(actual_loc, cc_loc, nullptr);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800124 }
125
126 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
127}
128
129// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
130// call. This will copy the arguments into the positions for a regular call.
131//
132// Note: The actual parameters are required to be in the locations given by the invoke's location
133// summary. If an intrinsic modifies those locations before a slowpath call, they must be
134// restored!
135class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
136 public:
137 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
138
139 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
140 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
141 __ Bind(GetEntryLabel());
142
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000143 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800144
145 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
146
147 if (invoke_->IsInvokeStaticOrDirect()) {
148 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000149 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800150 } else {
151 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
152 UNREACHABLE();
153 }
154
155 // Copy the result back to the expected output.
156 Location out = invoke_->GetLocations()->Out();
157 if (out.IsValid()) {
158 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
159 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
160 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
161 }
162
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000163 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800164 __ jmp(GetExitLabel());
165 }
166
167 private:
168 // The instruction where this slow path is happening.
169 HInvoke* const invoke_;
170
171 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
172};
173
174#undef __
175#define __ assembler->
176
177static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
178 LocationSummary* locations = new (arena) LocationSummary(invoke,
179 LocationSummary::kNoCall,
180 kIntrinsified);
181 locations->SetInAt(0, Location::RequiresFpuRegister());
182 locations->SetOut(Location::RequiresRegister());
183}
184
185static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
186 LocationSummary* locations = new (arena) LocationSummary(invoke,
187 LocationSummary::kNoCall,
188 kIntrinsified);
189 locations->SetInAt(0, Location::RequiresRegister());
190 locations->SetOut(Location::RequiresFpuRegister());
191}
192
193static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
194 Location input = locations->InAt(0);
195 Location output = locations->Out();
196 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
197}
198
199static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
200 Location input = locations->InAt(0);
201 Location output = locations->Out();
202 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
203}
204
205void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
206 CreateFPToIntLocations(arena_, invoke);
207}
208void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
209 CreateIntToFPLocations(arena_, invoke);
210}
211
212void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
213 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
214}
215void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
216 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
217}
218
219void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
220 CreateFPToIntLocations(arena_, invoke);
221}
222void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
223 CreateIntToFPLocations(arena_, invoke);
224}
225
226void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
227 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
228}
229void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
230 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
231}
232
233static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
234 LocationSummary* locations = new (arena) LocationSummary(invoke,
235 LocationSummary::kNoCall,
236 kIntrinsified);
237 locations->SetInAt(0, Location::RequiresRegister());
238 locations->SetOut(Location::SameAsFirstInput());
239}
240
241static void GenReverseBytes(LocationSummary* locations,
242 Primitive::Type size,
243 X86_64Assembler* assembler) {
244 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
245
246 switch (size) {
247 case Primitive::kPrimShort:
248 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
249 __ bswapl(out);
250 __ sarl(out, Immediate(16));
251 break;
252 case Primitive::kPrimInt:
253 __ bswapl(out);
254 break;
255 case Primitive::kPrimLong:
256 __ bswapq(out);
257 break;
258 default:
259 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
260 UNREACHABLE();
261 }
262}
263
264void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
265 CreateIntToIntLocations(arena_, invoke);
266}
267
268void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
269 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
270}
271
272void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
273 CreateIntToIntLocations(arena_, invoke);
274}
275
276void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
277 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
278}
279
280void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
281 CreateIntToIntLocations(arena_, invoke);
282}
283
284void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
285 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
286}
287
288
289// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
290// need is 64b.
291
292static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
293 // TODO: Enable memory operations when the assembler supports them.
294 LocationSummary* locations = new (arena) LocationSummary(invoke,
295 LocationSummary::kNoCall,
296 kIntrinsified);
297 locations->SetInAt(0, Location::RequiresFpuRegister());
298 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
299 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
300 locations->SetOut(Location::SameAsFirstInput());
301 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
302 locations->AddTemp(Location::RequiresFpuRegister()); // FP version of above.
303}
304
305static void MathAbsFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
306 Location output = locations->Out();
307 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
308
309 if (output.IsFpuRegister()) {
310 // In-register
311 XmmRegister xmm_temp = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
312
313 if (is64bit) {
314 __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
315 __ movd(xmm_temp, cpu_temp);
316 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
317 } else {
318 __ movl(cpu_temp, Immediate(INT64_C(0x7FFFFFFF)));
319 __ movd(xmm_temp, cpu_temp);
320 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
321 }
322 } else {
323 // TODO: update when assember support is available.
324 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
325// Once assembler support is available, in-memory operations look like this:
326// if (is64bit) {
327// DCHECK(output.IsDoubleStackSlot());
328// // No 64b and with literal.
329// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
330// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
331// } else {
332// DCHECK(output.IsStackSlot());
333// // Can use and with a literal directly.
334// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
335// }
336 }
337}
338
339void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
340 CreateFloatToFloatPlusTemps(arena_, invoke);
341}
342
343void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
344 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
345}
346
347void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
348 CreateFloatToFloatPlusTemps(arena_, invoke);
349}
350
351void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
352 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
353}
354
355static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
356 LocationSummary* locations = new (arena) LocationSummary(invoke,
357 LocationSummary::kNoCall,
358 kIntrinsified);
359 locations->SetInAt(0, Location::RequiresRegister());
360 locations->SetOut(Location::SameAsFirstInput());
361 locations->AddTemp(Location::RequiresRegister());
362}
363
364static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
365 Location output = locations->Out();
366 CpuRegister out = output.AsRegister<CpuRegister>();
367 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
368
369 if (is64bit) {
370 // Create mask.
371 __ movq(mask, out);
372 __ sarq(mask, Immediate(63));
373 // Add mask.
374 __ addq(out, mask);
375 __ xorq(out, mask);
376 } else {
377 // Create mask.
378 __ movl(mask, out);
379 __ sarl(mask, Immediate(31));
380 // Add mask.
381 __ addl(out, mask);
382 __ xorl(out, mask);
383 }
384}
385
386void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
387 CreateIntToIntPlusTemp(arena_, invoke);
388}
389
390void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
391 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
392}
393
394void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
395 CreateIntToIntPlusTemp(arena_, invoke);
396}
397
398void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
399 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
400}
401
402static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
403 X86_64Assembler* assembler) {
404 Location op1_loc = locations->InAt(0);
405 Location op2_loc = locations->InAt(1);
406 Location out_loc = locations->Out();
407 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
408
409 // Shortcut for same input locations.
410 if (op1_loc.Equals(op2_loc)) {
411 DCHECK(out_loc.Equals(op1_loc));
412 return;
413 }
414
415 // (out := op1)
416 // out <=? op2
417 // if Nan jmp Nan_label
418 // if out is min jmp done
419 // if op2 is min jmp op2_label
420 // handle -0/+0
421 // jmp done
422 // Nan_label:
423 // out := NaN
424 // op2_label:
425 // out := op2
426 // done:
427 //
428 // This removes one jmp, but needs to copy one input (op1) to out.
429 //
430 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
431
432 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
433
434 Label nan, done, op2_label;
435 if (is_double) {
436 __ ucomisd(out, op2);
437 } else {
438 __ ucomiss(out, op2);
439 }
440
441 __ j(Condition::kParityEven, &nan);
442
443 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
444 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
445
446 // Handle 0.0/-0.0.
447 if (is_min) {
448 if (is_double) {
449 __ orpd(out, op2);
450 } else {
451 __ orps(out, op2);
452 }
453 } else {
454 if (is_double) {
455 __ andpd(out, op2);
456 } else {
457 __ andps(out, op2);
458 }
459 }
460 __ jmp(&done);
461
462 // NaN handling.
463 __ Bind(&nan);
464 CpuRegister cpu_temp = locations->GetTemp(0).AsRegister<CpuRegister>();
465 // TODO: Literal pool. Trades 64b immediate in CPU reg for direct memory access.
466 if (is_double) {
467 __ movq(cpu_temp, Immediate(INT64_C(0x7FF8000000000000)));
468 } else {
469 __ movl(cpu_temp, Immediate(INT64_C(0x7FC00000)));
470 }
471 __ movd(out, cpu_temp, is_double);
472 __ jmp(&done);
473
474 // out := op2;
475 __ Bind(&op2_label);
476 if (is_double) {
477 __ movsd(out, op2);
478 } else {
479 __ movss(out, op2);
480 }
481
482 // Done.
483 __ Bind(&done);
484}
485
486static void CreateFPFPToFPPlusTempLocations(ArenaAllocator* arena, HInvoke* invoke) {
487 LocationSummary* locations = new (arena) LocationSummary(invoke,
488 LocationSummary::kNoCall,
489 kIntrinsified);
490 locations->SetInAt(0, Location::RequiresFpuRegister());
491 locations->SetInAt(1, Location::RequiresFpuRegister());
492 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
493 // the second input to be the output (we can simply swap inputs).
494 locations->SetOut(Location::SameAsFirstInput());
495 locations->AddTemp(Location::RequiresRegister()); // Immediate constant.
496}
497
498void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
499 CreateFPFPToFPPlusTempLocations(arena_, invoke);
500}
501
502void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
503 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
504}
505
506void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
507 CreateFPFPToFPPlusTempLocations(arena_, invoke);
508}
509
510void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
511 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
512}
513
514void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
515 CreateFPFPToFPPlusTempLocations(arena_, invoke);
516}
517
518void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
519 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
520}
521
522void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
523 CreateFPFPToFPPlusTempLocations(arena_, invoke);
524}
525
526void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
527 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
528}
529
530static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
531 X86_64Assembler* assembler) {
532 Location op1_loc = locations->InAt(0);
533 Location op2_loc = locations->InAt(1);
534
535 // Shortcut for same input locations.
536 if (op1_loc.Equals(op2_loc)) {
537 // Can return immediately, as op1_loc == out_loc.
538 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
539 // a copy here.
540 DCHECK(locations->Out().Equals(op1_loc));
541 return;
542 }
543
544 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
545 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
546
547 // (out := op1)
548 // out <=? op2
549 // if out is min jmp done
550 // out := op2
551 // done:
552
553 if (is_long) {
554 __ cmpq(out, op2);
555 } else {
556 __ cmpl(out, op2);
557 }
558
559 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
560}
561
562static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
563 LocationSummary* locations = new (arena) LocationSummary(invoke,
564 LocationSummary::kNoCall,
565 kIntrinsified);
566 locations->SetInAt(0, Location::RequiresRegister());
567 locations->SetInAt(1, Location::RequiresRegister());
568 locations->SetOut(Location::SameAsFirstInput());
569}
570
571void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
572 CreateIntIntToIntLocations(arena_, invoke);
573}
574
575void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
576 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
577}
578
579void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
580 CreateIntIntToIntLocations(arena_, invoke);
581}
582
583void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
584 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
585}
586
587void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
588 CreateIntIntToIntLocations(arena_, invoke);
589}
590
591void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
592 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
593}
594
595void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
596 CreateIntIntToIntLocations(arena_, invoke);
597}
598
599void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
600 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
601}
602
603static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
604 LocationSummary* locations = new (arena) LocationSummary(invoke,
605 LocationSummary::kNoCall,
606 kIntrinsified);
607 locations->SetInAt(0, Location::RequiresFpuRegister());
608 locations->SetOut(Location::RequiresFpuRegister());
609}
610
611void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
612 CreateFPToFPLocations(arena_, invoke);
613}
614
615void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
616 LocationSummary* locations = invoke->GetLocations();
617 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
618 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
619
620 GetAssembler()->sqrtsd(out, in);
621}
622
Mark Mendellfb8d2792015-03-31 22:16:59 -0400623static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
624 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
625
626 DCHECK(invoke->IsInvokeStaticOrDirect());
627 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
628 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
629
630 // Copy the result back to the expected output.
631 Location out = invoke->GetLocations()->Out();
632 if (out.IsValid()) {
633 DCHECK(out.IsRegister());
634 MoveFromReturnRegister(out, invoke->GetType(), codegen);
635 }
636}
637
638static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
639 HInvoke* invoke,
640 CodeGeneratorX86_64* codegen) {
641 // Do we have instruction support?
642 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
643 CreateFPToFPLocations(arena, invoke);
644 return;
645 }
646
647 // We have to fall back to a call to the intrinsic.
648 LocationSummary* locations = new (arena) LocationSummary(invoke,
649 LocationSummary::kCall);
650 InvokeRuntimeCallingConvention calling_convention;
651 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
652 locations->SetOut(Location::FpuRegisterLocation(XMM0));
653 // Needs to be RDI for the invoke.
654 locations->AddTemp(Location::RegisterLocation(RDI));
655}
656
657static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
658 HInvoke* invoke,
659 X86_64Assembler* assembler,
660 int round_mode) {
661 LocationSummary* locations = invoke->GetLocations();
662 if (locations->WillCall()) {
663 InvokeOutOfLineIntrinsic(codegen, invoke);
664 } else {
665 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
666 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
667 __ roundsd(out, in, Immediate(round_mode));
668 }
669}
670
671void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
672 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
673}
674
675void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
676 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
677}
678
679void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
680 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
681}
682
683void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
684 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
685}
686
687void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
688 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
689}
690
691void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
692 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
693}
694
695static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
696 HInvoke* invoke,
697 CodeGeneratorX86_64* codegen) {
698 // Do we have instruction support?
699 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
700 LocationSummary* locations = new (arena) LocationSummary(invoke,
701 LocationSummary::kNoCall,
702 kIntrinsified);
703 locations->SetInAt(0, Location::RequiresFpuRegister());
704 locations->SetOut(Location::RequiresFpuRegister());
705 locations->AddTemp(Location::RequiresFpuRegister());
706 locations->AddTemp(Location::RequiresFpuRegister());
707 return;
708 }
709
710 // We have to fall back to a call to the intrinsic.
711 LocationSummary* locations = new (arena) LocationSummary(invoke,
712 LocationSummary::kCall);
713 InvokeRuntimeCallingConvention calling_convention;
714 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
715 locations->SetOut(Location::RegisterLocation(RAX));
716 // Needs to be RDI for the invoke.
717 locations->AddTemp(Location::RegisterLocation(RDI));
718}
719
720void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
721 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
722}
723
724void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
725 LocationSummary* locations = invoke->GetLocations();
726 if (locations->WillCall()) {
727 InvokeOutOfLineIntrinsic(codegen_, invoke);
728 return;
729 }
730
731 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
732 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
733 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
734 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
735 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
736 Label done, nan;
737 X86_64Assembler* assembler = GetAssembler();
738
739 // Generate 0.5 into inPlusPointFive.
740 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
741 __ movd(inPlusPointFive, out, false);
742
743 // Add in the input.
744 __ addss(inPlusPointFive, in);
745
746 // And truncate to an integer.
747 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
748
749 __ movl(out, Immediate(kPrimIntMax));
750 // maxInt = int-to-float(out)
751 __ cvtsi2ss(maxInt, out);
752
753 // if inPlusPointFive >= maxInt goto done
754 __ comiss(inPlusPointFive, maxInt);
755 __ j(kAboveEqual, &done);
756
757 // if input == NaN goto nan
758 __ j(kUnordered, &nan);
759
760 // output = float-to-int-truncate(input)
761 __ cvttss2si(out, inPlusPointFive);
762 __ jmp(&done);
763 __ Bind(&nan);
764
765 // output = 0
766 __ xorl(out, out);
767 __ Bind(&done);
768}
769
770void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
771 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
772}
773
774void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
775 LocationSummary* locations = invoke->GetLocations();
776 if (locations->WillCall()) {
777 InvokeOutOfLineIntrinsic(codegen_, invoke);
778 return;
779 }
780
781 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
782 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
783 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
784 XmmRegister maxLong = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
785 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
786 Label done, nan;
787 X86_64Assembler* assembler = GetAssembler();
788
789 // Generate 0.5 into inPlusPointFive.
790 __ movq(out, Immediate(bit_cast<int64_t, double>(0.5)));
791 __ movd(inPlusPointFive, out, true);
792
793 // Add in the input.
794 __ addsd(inPlusPointFive, in);
795
796 // And truncate to an integer.
797 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
798
799 __ movq(out, Immediate(kPrimLongMax));
800 // maxLong = long-to-double(out)
801 __ cvtsi2sd(maxLong, out, true);
802
803 // if inPlusPointFive >= maxLong goto done
804 __ comisd(inPlusPointFive, maxLong);
805 __ j(kAboveEqual, &done);
806
807 // if input == NaN goto nan
808 __ j(kUnordered, &nan);
809
810 // output = double-to-long-truncate(input)
811 __ cvttsd2si(out, inPlusPointFive, true);
812 __ jmp(&done);
813 __ Bind(&nan);
814
815 // output = 0
816 __ xorq(out, out);
817 __ Bind(&done);
818}
819
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800820void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
821 // The inputs plus one temp.
822 LocationSummary* locations = new (arena_) LocationSummary(invoke,
823 LocationSummary::kCallOnSlowPath,
824 kIntrinsified);
825 locations->SetInAt(0, Location::RequiresRegister());
826 locations->SetInAt(1, Location::RequiresRegister());
827 locations->SetOut(Location::SameAsFirstInput());
828 locations->AddTemp(Location::RequiresRegister());
829}
830
831void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
832 LocationSummary* locations = invoke->GetLocations();
833
834 // Location of reference to data array
835 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
836 // Location of count
837 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
838 // Starting offset within data array
839 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
840 // Start of char data with array_
841 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
842
843 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
844 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
845 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
846 Location temp_loc = locations->GetTemp(0);
847 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
848
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800849 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
850 // the cost.
851 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
852 // we will not optimize the code for constants (which would save a register).
853
Andreas Gampe878d58c2015-01-15 23:24:00 -0800854 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800855 codegen_->AddSlowPath(slow_path);
856
857 X86_64Assembler* assembler = GetAssembler();
858
859 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800860 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800861 __ j(kAboveEqual, slow_path->GetEntryLabel());
862
863 // Get the actual element.
864 __ movl(temp, idx); // temp := idx.
865 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
866 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
867 // out = out[2*temp].
868 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
869
870 __ Bind(slow_path->GetExitLabel());
871}
872
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000873void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
874 LocationSummary* locations = new (arena_) LocationSummary(invoke,
875 LocationSummary::kCall,
876 kIntrinsified);
877 InvokeRuntimeCallingConvention calling_convention;
878 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
879 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
880 locations->SetOut(Location::RegisterLocation(RAX));
881}
882
883void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
884 X86_64Assembler* assembler = GetAssembler();
885 LocationSummary* locations = invoke->GetLocations();
886
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000887 // Note that the null check must have been done earlier.
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000888 DCHECK(!invoke->CanDoImplicitNullCheck());
889
890 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
891 __ testl(argument, argument);
892 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
893 codegen_->AddSlowPath(slow_path);
894 __ j(kEqual, slow_path->GetEntryLabel());
895
896 __ gs()->call(Address::Absolute(
897 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
898 __ Bind(slow_path->GetExitLabel());
899}
900
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800901static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
902 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
903 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
904 // x86 allows unaligned access. We do not have to check the input or use specific instructions
905 // to avoid a SIGBUS.
906 switch (size) {
907 case Primitive::kPrimByte:
908 __ movsxb(out, Address(address, 0));
909 break;
910 case Primitive::kPrimShort:
911 __ movsxw(out, Address(address, 0));
912 break;
913 case Primitive::kPrimInt:
914 __ movl(out, Address(address, 0));
915 break;
916 case Primitive::kPrimLong:
917 __ movq(out, Address(address, 0));
918 break;
919 default:
920 LOG(FATAL) << "Type not recognized for peek: " << size;
921 UNREACHABLE();
922 }
923}
924
925void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
926 CreateIntToIntLocations(arena_, invoke);
927}
928
929void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
930 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
931}
932
933void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
934 CreateIntToIntLocations(arena_, invoke);
935}
936
937void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
938 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
939}
940
941void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
942 CreateIntToIntLocations(arena_, invoke);
943}
944
945void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
946 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
947}
948
949void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
950 CreateIntToIntLocations(arena_, invoke);
951}
952
953void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
954 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
955}
956
957static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
958 LocationSummary* locations = new (arena) LocationSummary(invoke,
959 LocationSummary::kNoCall,
960 kIntrinsified);
961 locations->SetInAt(0, Location::RequiresRegister());
962 locations->SetInAt(1, Location::RequiresRegister());
963}
964
965static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
966 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
967 CpuRegister value = locations->InAt(1).AsRegister<CpuRegister>();
968 // x86 allows unaligned access. We do not have to check the input or use specific instructions
969 // to avoid a SIGBUS.
970 switch (size) {
971 case Primitive::kPrimByte:
972 __ movb(Address(address, 0), value);
973 break;
974 case Primitive::kPrimShort:
975 __ movw(Address(address, 0), value);
976 break;
977 case Primitive::kPrimInt:
978 __ movl(Address(address, 0), value);
979 break;
980 case Primitive::kPrimLong:
981 __ movq(Address(address, 0), value);
982 break;
983 default:
984 LOG(FATAL) << "Type not recognized for poke: " << size;
985 UNREACHABLE();
986 }
987}
988
989void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
990 CreateIntIntToVoidLocations(arena_, invoke);
991}
992
993void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
994 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
995}
996
997void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
998 CreateIntIntToVoidLocations(arena_, invoke);
999}
1000
1001void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1002 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1003}
1004
1005void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1006 CreateIntIntToVoidLocations(arena_, invoke);
1007}
1008
1009void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1010 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1011}
1012
1013void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1014 CreateIntIntToVoidLocations(arena_, invoke);
1015}
1016
1017void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1018 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1019}
1020
1021void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1022 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1023 LocationSummary::kNoCall,
1024 kIntrinsified);
1025 locations->SetOut(Location::RequiresRegister());
1026}
1027
1028void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1029 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1030 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1031}
1032
Andreas Gampe878d58c2015-01-15 23:24:00 -08001033static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001034 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1035 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1036 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1037 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1038
Andreas Gampe878d58c2015-01-15 23:24:00 -08001039 switch (type) {
1040 case Primitive::kPrimInt:
1041 case Primitive::kPrimNot:
1042 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1043 break;
1044
1045 case Primitive::kPrimLong:
1046 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1047 break;
1048
1049 default:
1050 LOG(FATAL) << "Unsupported op size " << type;
1051 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001052 }
1053}
1054
1055static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1056 LocationSummary* locations = new (arena) LocationSummary(invoke,
1057 LocationSummary::kNoCall,
1058 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001059 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001060 locations->SetInAt(1, Location::RequiresRegister());
1061 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001062 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001063}
1064
1065void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1066 CreateIntIntIntToIntLocations(arena_, invoke);
1067}
1068void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1069 CreateIntIntIntToIntLocations(arena_, invoke);
1070}
1071void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1072 CreateIntIntIntToIntLocations(arena_, invoke);
1073}
1074void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1075 CreateIntIntIntToIntLocations(arena_, invoke);
1076}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001077void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1078 CreateIntIntIntToIntLocations(arena_, invoke);
1079}
1080void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1081 CreateIntIntIntToIntLocations(arena_, invoke);
1082}
1083
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001084
1085void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001086 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001087}
1088void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001089 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001090}
1091void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001092 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001093}
1094void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001095 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001096}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001097void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1098 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1099}
1100void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1101 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1102}
1103
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001104
1105static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1106 Primitive::Type type,
1107 HInvoke* invoke) {
1108 LocationSummary* locations = new (arena) LocationSummary(invoke,
1109 LocationSummary::kNoCall,
1110 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001111 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001112 locations->SetInAt(1, Location::RequiresRegister());
1113 locations->SetInAt(2, Location::RequiresRegister());
1114 locations->SetInAt(3, Location::RequiresRegister());
1115 if (type == Primitive::kPrimNot) {
1116 // Need temp registers for card-marking.
1117 locations->AddTemp(Location::RequiresRegister());
1118 locations->AddTemp(Location::RequiresRegister());
1119 }
1120}
1121
1122void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1123 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1124}
1125void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1126 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1127}
1128void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1129 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1130}
1131void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1132 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1133}
1134void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1135 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1136}
1137void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1138 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1139}
1140void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1141 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1142}
1143void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1144 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1145}
1146void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1147 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1148}
1149
1150// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1151// memory model.
1152static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1153 CodeGeneratorX86_64* codegen) {
1154 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1155 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1156 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1157 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1158
1159 if (type == Primitive::kPrimLong) {
1160 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1161 } else {
1162 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1163 }
1164
1165 if (is_volatile) {
1166 __ mfence();
1167 }
1168
1169 if (type == Primitive::kPrimNot) {
1170 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1171 locations->GetTemp(1).AsRegister<CpuRegister>(),
1172 base,
1173 value);
1174 }
1175}
1176
1177void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1178 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1179}
1180void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1181 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1182}
1183void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1184 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1185}
1186void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1187 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1188}
1189void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1190 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1191}
1192void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1193 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1194}
1195void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1196 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1197}
1198void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1199 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1200}
1201void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1202 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1203}
1204
1205// Unimplemented intrinsics.
1206
1207#define UNIMPLEMENTED_INTRINSIC(Name) \
1208void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1209} \
1210void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1211}
1212
1213UNIMPLEMENTED_INTRINSIC(IntegerReverse)
1214UNIMPLEMENTED_INTRINSIC(LongReverse)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001215UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1216UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1217UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
1218UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
1219UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
1220UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
1221UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1222
1223} // namespace x86_64
1224} // namespace art