blob: 0efa714a238c9a9014433bdc4a4750d261f9b114 [file] [log] [blame]
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86_64.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86_64/instruction_set_features_x86_64.h"
Andreas Gampe71fb52f2014-12-29 17:43:08 -080022#include "code_generator_x86_64.h"
23#include "entrypoints/quick/quick_entrypoints.h"
24#include "intrinsics.h"
25#include "mirror/array-inl.h"
26#include "mirror/art_method.h"
27#include "mirror/string.h"
28#include "thread.h"
29#include "utils/x86_64/assembler_x86_64.h"
30#include "utils/x86_64/constants_x86_64.h"
31
32namespace art {
33
34namespace x86_64 {
35
Mark Mendellfb8d2792015-03-31 22:16:59 -040036IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
37 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
38}
39
40
Andreas Gampe71fb52f2014-12-29 17:43:08 -080041X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
42 return reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
43}
44
Andreas Gampe878d58c2015-01-15 23:24:00 -080045ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
Andreas Gampe71fb52f2014-12-29 17:43:08 -080046 return codegen_->GetGraph()->GetArena();
47}
48
49bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
50 Dispatch(invoke);
51 const LocationSummary* res = invoke->GetLocations();
52 return res != nullptr && res->Intrinsified();
53}
54
55#define __ reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler())->
56
57// TODO: trg as memory.
58static void MoveFromReturnRegister(Location trg,
59 Primitive::Type type,
60 CodeGeneratorX86_64* codegen) {
61 if (!trg.IsValid()) {
62 DCHECK(type == Primitive::kPrimVoid);
63 return;
64 }
65
66 switch (type) {
67 case Primitive::kPrimBoolean:
68 case Primitive::kPrimByte:
69 case Primitive::kPrimChar:
70 case Primitive::kPrimShort:
71 case Primitive::kPrimInt:
72 case Primitive::kPrimNot: {
73 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
74 if (trg_reg.AsRegister() != RAX) {
75 __ movl(trg_reg, CpuRegister(RAX));
76 }
77 break;
78 }
79 case Primitive::kPrimLong: {
80 CpuRegister trg_reg = trg.AsRegister<CpuRegister>();
81 if (trg_reg.AsRegister() != RAX) {
82 __ movq(trg_reg, CpuRegister(RAX));
83 }
84 break;
85 }
86
87 case Primitive::kPrimVoid:
88 LOG(FATAL) << "Unexpected void type for valid location " << trg;
89 UNREACHABLE();
90
91 case Primitive::kPrimDouble: {
92 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
93 if (trg_reg.AsFloatRegister() != XMM0) {
94 __ movsd(trg_reg, XmmRegister(XMM0));
95 }
96 break;
97 }
98 case Primitive::kPrimFloat: {
99 XmmRegister trg_reg = trg.AsFpuRegister<XmmRegister>();
100 if (trg_reg.AsFloatRegister() != XMM0) {
101 __ movss(trg_reg, XmmRegister(XMM0));
102 }
103 break;
104 }
105 }
106}
107
Roland Levillainec525fc2015-04-28 15:50:20 +0100108static void MoveArguments(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +0100109 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +0100110 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800111}
112
113// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
114// call. This will copy the arguments into the positions for a regular call.
115//
116// Note: The actual parameters are required to be in the locations given by the invoke's location
117// summary. If an intrinsic modifies those locations before a slowpath call, they must be
118// restored!
119class IntrinsicSlowPathX86_64 : public SlowPathCodeX86_64 {
120 public:
121 explicit IntrinsicSlowPathX86_64(HInvoke* invoke) : invoke_(invoke) { }
122
123 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
124 CodeGeneratorX86_64* codegen = down_cast<CodeGeneratorX86_64*>(codegen_in);
125 __ Bind(GetEntryLabel());
126
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000127 SaveLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800128
Roland Levillainec525fc2015-04-28 15:50:20 +0100129 MoveArguments(invoke_, codegen);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800130
131 if (invoke_->IsInvokeStaticOrDirect()) {
132 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), CpuRegister(RDI));
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000133 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800134 } else {
135 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
136 UNREACHABLE();
137 }
138
139 // Copy the result back to the expected output.
140 Location out = invoke_->GetLocations()->Out();
141 if (out.IsValid()) {
142 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
143 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
144 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
145 }
146
Nicolas Geoffraya8ac9132015-03-13 16:36:36 +0000147 RestoreLiveRegisters(codegen, invoke_->GetLocations());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800148 __ jmp(GetExitLabel());
149 }
150
151 private:
152 // The instruction where this slow path is happening.
153 HInvoke* const invoke_;
154
155 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86_64);
156};
157
158#undef __
159#define __ assembler->
160
161static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
162 LocationSummary* locations = new (arena) LocationSummary(invoke,
163 LocationSummary::kNoCall,
164 kIntrinsified);
165 locations->SetInAt(0, Location::RequiresFpuRegister());
166 locations->SetOut(Location::RequiresRegister());
167}
168
169static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
170 LocationSummary* locations = new (arena) LocationSummary(invoke,
171 LocationSummary::kNoCall,
172 kIntrinsified);
173 locations->SetInAt(0, Location::RequiresRegister());
174 locations->SetOut(Location::RequiresFpuRegister());
175}
176
177static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
178 Location input = locations->InAt(0);
179 Location output = locations->Out();
180 __ movd(output.AsRegister<CpuRegister>(), input.AsFpuRegister<XmmRegister>(), is64bit);
181}
182
183static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
184 Location input = locations->InAt(0);
185 Location output = locations->Out();
186 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<CpuRegister>(), is64bit);
187}
188
189void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
190 CreateFPToIntLocations(arena_, invoke);
191}
192void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
193 CreateIntToFPLocations(arena_, invoke);
194}
195
196void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
197 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
198}
199void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
200 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
201}
202
203void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
204 CreateFPToIntLocations(arena_, invoke);
205}
206void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
207 CreateIntToFPLocations(arena_, invoke);
208}
209
210void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
211 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
212}
213void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
214 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
215}
216
217static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
218 LocationSummary* locations = new (arena) LocationSummary(invoke,
219 LocationSummary::kNoCall,
220 kIntrinsified);
221 locations->SetInAt(0, Location::RequiresRegister());
222 locations->SetOut(Location::SameAsFirstInput());
223}
224
225static void GenReverseBytes(LocationSummary* locations,
226 Primitive::Type size,
227 X86_64Assembler* assembler) {
228 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
229
230 switch (size) {
231 case Primitive::kPrimShort:
232 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
233 __ bswapl(out);
234 __ sarl(out, Immediate(16));
235 break;
236 case Primitive::kPrimInt:
237 __ bswapl(out);
238 break;
239 case Primitive::kPrimLong:
240 __ bswapq(out);
241 break;
242 default:
243 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
244 UNREACHABLE();
245 }
246}
247
248void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
249 CreateIntToIntLocations(arena_, invoke);
250}
251
252void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
253 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
254}
255
256void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
257 CreateIntToIntLocations(arena_, invoke);
258}
259
260void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
261 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
262}
263
264void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
265 CreateIntToIntLocations(arena_, invoke);
266}
267
268void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
269 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
270}
271
272
273// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
274// need is 64b.
275
276static void CreateFloatToFloatPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
277 // TODO: Enable memory operations when the assembler supports them.
278 LocationSummary* locations = new (arena) LocationSummary(invoke,
279 LocationSummary::kNoCall,
280 kIntrinsified);
281 locations->SetInAt(0, Location::RequiresFpuRegister());
282 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
283 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
284 locations->SetOut(Location::SameAsFirstInput());
Mark Mendellf55c3e02015-03-26 21:07:46 -0400285 locations->AddTemp(Location::RequiresFpuRegister()); // FP reg to hold mask.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800286}
287
Mark Mendell39dcf552015-04-09 20:42:42 -0400288static void MathAbsFP(LocationSummary* locations,
289 bool is64bit,
290 X86_64Assembler* assembler,
291 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800292 Location output = locations->Out();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800293
294 if (output.IsFpuRegister()) {
295 // In-register
Mark Mendellf55c3e02015-03-26 21:07:46 -0400296 XmmRegister xmm_temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800297
Mark Mendell39dcf552015-04-09 20:42:42 -0400298 // TODO: Can mask directly with constant area using pand if we can guarantee
299 // that the literal is aligned on a 16 byte boundary. This will avoid a
300 // temporary.
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800301 if (is64bit) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400302 __ movsd(xmm_temp, codegen->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800303 __ andpd(output.AsFpuRegister<XmmRegister>(), xmm_temp);
304 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400305 __ movss(xmm_temp, codegen->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800306 __ andps(output.AsFpuRegister<XmmRegister>(), xmm_temp);
307 }
308 } else {
309 // TODO: update when assember support is available.
310 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
311// Once assembler support is available, in-memory operations look like this:
312// if (is64bit) {
313// DCHECK(output.IsDoubleStackSlot());
314// // No 64b and with literal.
315// __ movq(cpu_temp, Immediate(INT64_C(0x7FFFFFFFFFFFFFFF)));
316// __ andq(Address(CpuRegister(RSP), output.GetStackIndex()), cpu_temp);
317// } else {
318// DCHECK(output.IsStackSlot());
319// // Can use and with a literal directly.
320// __ andl(Address(CpuRegister(RSP), output.GetStackIndex()), Immediate(INT64_C(0x7FFFFFFF)));
321// }
322 }
323}
324
325void IntrinsicLocationsBuilderX86_64::VisitMathAbsDouble(HInvoke* invoke) {
326 CreateFloatToFloatPlusTemps(arena_, invoke);
327}
328
329void IntrinsicCodeGeneratorX86_64::VisitMathAbsDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400330 MathAbsFP(invoke->GetLocations(), true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800331}
332
333void IntrinsicLocationsBuilderX86_64::VisitMathAbsFloat(HInvoke* invoke) {
334 CreateFloatToFloatPlusTemps(arena_, invoke);
335}
336
337void IntrinsicCodeGeneratorX86_64::VisitMathAbsFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400338 MathAbsFP(invoke->GetLocations(), false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800339}
340
341static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
342 LocationSummary* locations = new (arena) LocationSummary(invoke,
343 LocationSummary::kNoCall,
344 kIntrinsified);
345 locations->SetInAt(0, Location::RequiresRegister());
346 locations->SetOut(Location::SameAsFirstInput());
347 locations->AddTemp(Location::RequiresRegister());
348}
349
350static void GenAbsInteger(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
351 Location output = locations->Out();
352 CpuRegister out = output.AsRegister<CpuRegister>();
353 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
354
355 if (is64bit) {
356 // Create mask.
357 __ movq(mask, out);
358 __ sarq(mask, Immediate(63));
359 // Add mask.
360 __ addq(out, mask);
361 __ xorq(out, mask);
362 } else {
363 // Create mask.
364 __ movl(mask, out);
365 __ sarl(mask, Immediate(31));
366 // Add mask.
367 __ addl(out, mask);
368 __ xorl(out, mask);
369 }
370}
371
372void IntrinsicLocationsBuilderX86_64::VisitMathAbsInt(HInvoke* invoke) {
373 CreateIntToIntPlusTemp(arena_, invoke);
374}
375
376void IntrinsicCodeGeneratorX86_64::VisitMathAbsInt(HInvoke* invoke) {
377 GenAbsInteger(invoke->GetLocations(), false, GetAssembler());
378}
379
380void IntrinsicLocationsBuilderX86_64::VisitMathAbsLong(HInvoke* invoke) {
381 CreateIntToIntPlusTemp(arena_, invoke);
382}
383
384void IntrinsicCodeGeneratorX86_64::VisitMathAbsLong(HInvoke* invoke) {
385 GenAbsInteger(invoke->GetLocations(), true, GetAssembler());
386}
387
Mark Mendell39dcf552015-04-09 20:42:42 -0400388static void GenMinMaxFP(LocationSummary* locations,
389 bool is_min,
390 bool is_double,
391 X86_64Assembler* assembler,
392 CodeGeneratorX86_64* codegen) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800393 Location op1_loc = locations->InAt(0);
394 Location op2_loc = locations->InAt(1);
395 Location out_loc = locations->Out();
396 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
397
398 // Shortcut for same input locations.
399 if (op1_loc.Equals(op2_loc)) {
400 DCHECK(out_loc.Equals(op1_loc));
401 return;
402 }
403
404 // (out := op1)
405 // out <=? op2
406 // if Nan jmp Nan_label
407 // if out is min jmp done
408 // if op2 is min jmp op2_label
409 // handle -0/+0
410 // jmp done
411 // Nan_label:
412 // out := NaN
413 // op2_label:
414 // out := op2
415 // done:
416 //
417 // This removes one jmp, but needs to copy one input (op1) to out.
418 //
Mark Mendellf55c3e02015-03-26 21:07:46 -0400419 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800420
421 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
422
423 Label nan, done, op2_label;
424 if (is_double) {
425 __ ucomisd(out, op2);
426 } else {
427 __ ucomiss(out, op2);
428 }
429
430 __ j(Condition::kParityEven, &nan);
431
432 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
433 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
434
435 // Handle 0.0/-0.0.
436 if (is_min) {
437 if (is_double) {
438 __ orpd(out, op2);
439 } else {
440 __ orps(out, op2);
441 }
442 } else {
443 if (is_double) {
444 __ andpd(out, op2);
445 } else {
446 __ andps(out, op2);
447 }
448 }
449 __ jmp(&done);
450
451 // NaN handling.
452 __ Bind(&nan);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800453 if (is_double) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400454 __ movsd(out, codegen->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800455 } else {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400456 __ movss(out, codegen->LiteralInt32Address(INT32_C(0x7FC00000)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800457 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800458 __ jmp(&done);
459
460 // out := op2;
461 __ Bind(&op2_label);
462 if (is_double) {
463 __ movsd(out, op2);
464 } else {
465 __ movss(out, op2);
466 }
467
468 // Done.
469 __ Bind(&done);
470}
471
Mark Mendellf55c3e02015-03-26 21:07:46 -0400472static void CreateFPFPToFP(ArenaAllocator* arena, HInvoke* invoke) {
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800473 LocationSummary* locations = new (arena) LocationSummary(invoke,
474 LocationSummary::kNoCall,
475 kIntrinsified);
476 locations->SetInAt(0, Location::RequiresFpuRegister());
477 locations->SetInAt(1, Location::RequiresFpuRegister());
478 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
479 // the second input to be the output (we can simply swap inputs).
480 locations->SetOut(Location::SameAsFirstInput());
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800481}
482
483void IntrinsicLocationsBuilderX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400484 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800485}
486
487void IntrinsicCodeGeneratorX86_64::VisitMathMinDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400488 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800489}
490
491void IntrinsicLocationsBuilderX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400492 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800493}
494
495void IntrinsicCodeGeneratorX86_64::VisitMathMinFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400496 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800497}
498
499void IntrinsicLocationsBuilderX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400500 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800501}
502
503void IntrinsicCodeGeneratorX86_64::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400504 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800505}
506
507void IntrinsicLocationsBuilderX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400508 CreateFPFPToFP(arena_, invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800509}
510
511void IntrinsicCodeGeneratorX86_64::VisitMathMaxFloatFloat(HInvoke* invoke) {
Mark Mendellf55c3e02015-03-26 21:07:46 -0400512 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler(), codegen_);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800513}
514
515static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
516 X86_64Assembler* assembler) {
517 Location op1_loc = locations->InAt(0);
518 Location op2_loc = locations->InAt(1);
519
520 // Shortcut for same input locations.
521 if (op1_loc.Equals(op2_loc)) {
522 // Can return immediately, as op1_loc == out_loc.
523 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
524 // a copy here.
525 DCHECK(locations->Out().Equals(op1_loc));
526 return;
527 }
528
529 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
530 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
531
532 // (out := op1)
533 // out <=? op2
534 // if out is min jmp done
535 // out := op2
536 // done:
537
538 if (is_long) {
539 __ cmpq(out, op2);
540 } else {
541 __ cmpl(out, op2);
542 }
543
544 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, is_long);
545}
546
547static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
548 LocationSummary* locations = new (arena) LocationSummary(invoke,
549 LocationSummary::kNoCall,
550 kIntrinsified);
551 locations->SetInAt(0, Location::RequiresRegister());
552 locations->SetInAt(1, Location::RequiresRegister());
553 locations->SetOut(Location::SameAsFirstInput());
554}
555
556void IntrinsicLocationsBuilderX86_64::VisitMathMinIntInt(HInvoke* invoke) {
557 CreateIntIntToIntLocations(arena_, invoke);
558}
559
560void IntrinsicCodeGeneratorX86_64::VisitMathMinIntInt(HInvoke* invoke) {
561 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
562}
563
564void IntrinsicLocationsBuilderX86_64::VisitMathMinLongLong(HInvoke* invoke) {
565 CreateIntIntToIntLocations(arena_, invoke);
566}
567
568void IntrinsicCodeGeneratorX86_64::VisitMathMinLongLong(HInvoke* invoke) {
569 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
570}
571
572void IntrinsicLocationsBuilderX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
573 CreateIntIntToIntLocations(arena_, invoke);
574}
575
576void IntrinsicCodeGeneratorX86_64::VisitMathMaxIntInt(HInvoke* invoke) {
577 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
578}
579
580void IntrinsicLocationsBuilderX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
581 CreateIntIntToIntLocations(arena_, invoke);
582}
583
584void IntrinsicCodeGeneratorX86_64::VisitMathMaxLongLong(HInvoke* invoke) {
585 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
586}
587
588static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
589 LocationSummary* locations = new (arena) LocationSummary(invoke,
590 LocationSummary::kNoCall,
591 kIntrinsified);
592 locations->SetInAt(0, Location::RequiresFpuRegister());
593 locations->SetOut(Location::RequiresFpuRegister());
594}
595
596void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
597 CreateFPToFPLocations(arena_, invoke);
598}
599
600void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
601 LocationSummary* locations = invoke->GetLocations();
602 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
603 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
604
605 GetAssembler()->sqrtsd(out, in);
606}
607
Mark Mendellfb8d2792015-03-31 22:16:59 -0400608static void InvokeOutOfLineIntrinsic(CodeGeneratorX86_64* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100609 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400610
611 DCHECK(invoke->IsInvokeStaticOrDirect());
612 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), CpuRegister(RDI));
613 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
614
615 // Copy the result back to the expected output.
616 Location out = invoke->GetLocations()->Out();
617 if (out.IsValid()) {
618 DCHECK(out.IsRegister());
619 MoveFromReturnRegister(out, invoke->GetType(), codegen);
620 }
621}
622
623static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
624 HInvoke* invoke,
625 CodeGeneratorX86_64* codegen) {
626 // Do we have instruction support?
627 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
628 CreateFPToFPLocations(arena, invoke);
629 return;
630 }
631
632 // We have to fall back to a call to the intrinsic.
633 LocationSummary* locations = new (arena) LocationSummary(invoke,
634 LocationSummary::kCall);
635 InvokeRuntimeCallingConvention calling_convention;
636 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
637 locations->SetOut(Location::FpuRegisterLocation(XMM0));
638 // Needs to be RDI for the invoke.
639 locations->AddTemp(Location::RegisterLocation(RDI));
640}
641
642static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86_64* codegen,
643 HInvoke* invoke,
644 X86_64Assembler* assembler,
645 int round_mode) {
646 LocationSummary* locations = invoke->GetLocations();
647 if (locations->WillCall()) {
648 InvokeOutOfLineIntrinsic(codegen, invoke);
649 } else {
650 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
651 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
652 __ roundsd(out, in, Immediate(round_mode));
653 }
654}
655
656void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
657 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
658}
659
660void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
661 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
662}
663
664void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
665 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
666}
667
668void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
669 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
670}
671
672void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
673 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
674}
675
676void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
677 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
678}
679
680static void CreateSSE41FPToIntLocations(ArenaAllocator* arena,
681 HInvoke* invoke,
682 CodeGeneratorX86_64* codegen) {
683 // Do we have instruction support?
684 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
685 LocationSummary* locations = new (arena) LocationSummary(invoke,
686 LocationSummary::kNoCall,
687 kIntrinsified);
688 locations->SetInAt(0, Location::RequiresFpuRegister());
689 locations->SetOut(Location::RequiresFpuRegister());
690 locations->AddTemp(Location::RequiresFpuRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400691 return;
692 }
693
694 // We have to fall back to a call to the intrinsic.
695 LocationSummary* locations = new (arena) LocationSummary(invoke,
696 LocationSummary::kCall);
697 InvokeRuntimeCallingConvention calling_convention;
698 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
699 locations->SetOut(Location::RegisterLocation(RAX));
700 // Needs to be RDI for the invoke.
701 locations->AddTemp(Location::RegisterLocation(RDI));
702}
703
704void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
705 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
706}
707
708void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
709 LocationSummary* locations = invoke->GetLocations();
710 if (locations->WillCall()) {
711 InvokeOutOfLineIntrinsic(codegen_, invoke);
712 return;
713 }
714
715 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
716 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
717 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400718 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400719 Label done, nan;
720 X86_64Assembler* assembler = GetAssembler();
721
Mark Mendell40741f32015-04-20 22:10:34 -0400722 // Load 0.5 into inPlusPointFive.
723 __ movss(inPlusPointFive, codegen_->LiteralFloatAddress(0.5f));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400724
725 // Add in the input.
726 __ addss(inPlusPointFive, in);
727
728 // And truncate to an integer.
729 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
730
Mark Mendellfb8d2792015-03-31 22:16:59 -0400731 // if inPlusPointFive >= maxInt goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400732 __ comiss(inPlusPointFive, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400733 __ j(kAboveEqual, &done);
734
735 // if input == NaN goto nan
736 __ j(kUnordered, &nan);
737
738 // output = float-to-int-truncate(input)
739 __ cvttss2si(out, inPlusPointFive);
740 __ jmp(&done);
741 __ Bind(&nan);
742
743 // output = 0
744 __ xorl(out, out);
745 __ Bind(&done);
746}
747
748void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
749 CreateSSE41FPToIntLocations(arena_, invoke, codegen_);
750}
751
752void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
753 LocationSummary* locations = invoke->GetLocations();
754 if (locations->WillCall()) {
755 InvokeOutOfLineIntrinsic(codegen_, invoke);
756 return;
757 }
758
759 // Implement RoundDouble as t1 = floor(input + 0.5); convert to long.
760 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
761 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -0400762 XmmRegister inPlusPointFive = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400763 Label done, nan;
764 X86_64Assembler* assembler = GetAssembler();
765
Mark Mendell40741f32015-04-20 22:10:34 -0400766 // Load 0.5 into inPlusPointFive.
767 __ movsd(inPlusPointFive, codegen_->LiteralDoubleAddress(0.5));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400768
769 // Add in the input.
770 __ addsd(inPlusPointFive, in);
771
772 // And truncate to an integer.
773 __ roundsd(inPlusPointFive, inPlusPointFive, Immediate(1));
774
Mark Mendellfb8d2792015-03-31 22:16:59 -0400775 // if inPlusPointFive >= maxLong goto done
Mark Mendell40741f32015-04-20 22:10:34 -0400776 __ comisd(inPlusPointFive, codegen_->LiteralDoubleAddress(static_cast<double>(kPrimLongMax)));
Mark Mendellfb8d2792015-03-31 22:16:59 -0400777 __ j(kAboveEqual, &done);
778
779 // if input == NaN goto nan
780 __ j(kUnordered, &nan);
781
782 // output = double-to-long-truncate(input)
783 __ cvttsd2si(out, inPlusPointFive, true);
784 __ jmp(&done);
785 __ Bind(&nan);
786
787 // output = 0
Mark Mendell92e83bf2015-05-07 11:25:03 -0400788 __ xorl(out, out);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400789 __ Bind(&done);
790}
791
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800792void IntrinsicLocationsBuilderX86_64::VisitStringCharAt(HInvoke* invoke) {
793 // The inputs plus one temp.
794 LocationSummary* locations = new (arena_) LocationSummary(invoke,
795 LocationSummary::kCallOnSlowPath,
796 kIntrinsified);
797 locations->SetInAt(0, Location::RequiresRegister());
798 locations->SetInAt(1, Location::RequiresRegister());
799 locations->SetOut(Location::SameAsFirstInput());
800 locations->AddTemp(Location::RequiresRegister());
801}
802
803void IntrinsicCodeGeneratorX86_64::VisitStringCharAt(HInvoke* invoke) {
804 LocationSummary* locations = invoke->GetLocations();
805
806 // Location of reference to data array
807 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
808 // Location of count
809 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800810
811 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
812 CpuRegister idx = locations->InAt(1).AsRegister<CpuRegister>();
813 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800814
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800815 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
816 // the cost.
817 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
818 // we will not optimize the code for constants (which would save a register).
819
Andreas Gampe878d58c2015-01-15 23:24:00 -0800820 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800821 codegen_->AddSlowPath(slow_path);
822
823 X86_64Assembler* assembler = GetAssembler();
824
825 __ cmpl(idx, Address(obj, count_offset));
Andreas Gampe878d58c2015-01-15 23:24:00 -0800826 codegen_->MaybeRecordImplicitNullCheck(invoke);
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800827 __ j(kAboveEqual, slow_path->GetEntryLabel());
828
Jeff Hao848f70a2014-01-15 13:49:50 -0800829 // out = out[2*idx].
830 __ movzxw(out, Address(out, idx, ScaleFactor::TIMES_2, value_offset));
Andreas Gampe71fb52f2014-12-29 17:43:08 -0800831
832 __ Bind(slow_path->GetExitLabel());
833}
834
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000835void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
836 LocationSummary* locations = new (arena_) LocationSummary(invoke,
837 LocationSummary::kCall,
838 kIntrinsified);
839 InvokeRuntimeCallingConvention calling_convention;
840 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
841 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
842 locations->SetOut(Location::RegisterLocation(RAX));
843}
844
845void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
846 X86_64Assembler* assembler = GetAssembler();
847 LocationSummary* locations = invoke->GetLocations();
848
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000849 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +0100850 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000851
852 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
853 __ testl(argument, argument);
854 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
855 codegen_->AddSlowPath(slow_path);
856 __ j(kEqual, slow_path->GetEntryLabel());
857
858 __ gs()->call(Address::Absolute(
859 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pStringCompareTo), true));
860 __ Bind(slow_path->GetExitLabel());
861}
862
Andreas Gampe21030dd2015-05-07 14:46:15 -0700863static void CreateStringIndexOfLocations(HInvoke* invoke,
864 ArenaAllocator* allocator,
865 bool start_at_zero) {
866 LocationSummary* locations = new (allocator) LocationSummary(invoke,
867 LocationSummary::kCallOnSlowPath,
868 kIntrinsified);
869 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
870 locations->SetInAt(0, Location::RegisterLocation(RDI));
871 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
872 // allocator to do that, anyways. We can still do the constant check by checking the parameter
873 // of the instruction explicitly.
874 // Note: This works as we don't clobber RAX anywhere.
875 locations->SetInAt(1, Location::RegisterLocation(RAX));
876 if (!start_at_zero) {
877 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
878 }
879 // As we clobber RDI during execution anyways, also use it as the output.
880 locations->SetOut(Location::SameAsFirstInput());
881
882 // repne scasw uses RCX as the counter.
883 locations->AddTemp(Location::RegisterLocation(RCX));
884 // Need another temporary to be able to compute the result.
885 locations->AddTemp(Location::RequiresRegister());
886}
887
888static void GenerateStringIndexOf(HInvoke* invoke,
889 X86_64Assembler* assembler,
890 CodeGeneratorX86_64* codegen,
891 ArenaAllocator* allocator,
892 bool start_at_zero) {
893 LocationSummary* locations = invoke->GetLocations();
894
895 // Note that the null check must have been done earlier.
896 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
897
898 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
899 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
900 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
901 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
902 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
903
904 // Check our assumptions for registers.
905 DCHECK_EQ(string_obj.AsRegister(), RDI);
906 DCHECK_EQ(search_value.AsRegister(), RAX);
907 DCHECK_EQ(counter.AsRegister(), RCX);
908 DCHECK_EQ(out.AsRegister(), RDI);
909
910 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
911 // or directly dispatch if we have a constant.
912 SlowPathCodeX86_64* slow_path = nullptr;
913 if (invoke->InputAt(1)->IsIntConstant()) {
914 if (static_cast<uint32_t>(invoke->InputAt(1)->AsIntConstant()->GetValue()) >
915 std::numeric_limits<uint16_t>::max()) {
916 // Always needs the slow-path. We could directly dispatch to it, but this case should be
917 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
918 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
919 codegen->AddSlowPath(slow_path);
920 __ jmp(slow_path->GetEntryLabel());
921 __ Bind(slow_path->GetExitLabel());
922 return;
923 }
924 } else {
925 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
926 slow_path = new (allocator) IntrinsicSlowPathX86_64(invoke);
927 codegen->AddSlowPath(slow_path);
928 __ j(kAbove, slow_path->GetEntryLabel());
929 }
930
931 // From here down, we know that we are looking for a char that fits in 16 bits.
932 // Location of reference to data array within the String object.
933 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
934 // Location of count within the String object.
935 int32_t count_offset = mirror::String::CountOffset().Int32Value();
936
937 // Load string length, i.e., the count field of the string.
938 __ movl(string_length, Address(string_obj, count_offset));
939
940 // Do a length check.
941 // TODO: Support jecxz.
942 Label not_found_label;
943 __ testl(string_length, string_length);
944 __ j(kEqual, &not_found_label);
945
946 if (start_at_zero) {
947 // Number of chars to scan is the same as the string length.
948 __ movl(counter, string_length);
949
950 // Move to the start of the string.
951 __ addq(string_obj, Immediate(value_offset));
952 } else {
953 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
954
955 // Do a start_index check.
956 __ cmpl(start_index, string_length);
957 __ j(kGreaterEqual, &not_found_label);
958
959 // Ensure we have a start index >= 0;
960 __ xorl(counter, counter);
961 __ cmpl(start_index, Immediate(0));
962 __ cmov(kGreater, counter, start_index, false); // 32-bit copy is enough.
963
964 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
965 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
966
967 // Now update ecx, the work counter: it's gonna be string.length - start_index.
968 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
969 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
970 }
971
972 // Everything is set up for repne scasw:
973 // * Comparison address in RDI.
974 // * Counter in ECX.
975 __ repne_scasw();
976
977 // Did we find a match?
978 __ j(kNotEqual, &not_found_label);
979
980 // Yes, we matched. Compute the index of the result.
981 __ subl(string_length, counter);
982 __ leal(out, Address(string_length, -1));
983
984 Label done;
985 __ jmp(&done);
986
987 // Failed to match; return -1.
988 __ Bind(&not_found_label);
989 __ movl(out, Immediate(-1));
990
991 // And join up at the end.
992 __ Bind(&done);
993 if (slow_path != nullptr) {
994 __ Bind(slow_path->GetExitLabel());
995 }
996}
997
998void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
999 CreateStringIndexOfLocations(invoke, arena_, true);
1000}
1001
1002void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1003 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), true);
1004}
1005
1006void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1007 CreateStringIndexOfLocations(invoke, arena_, false);
1008}
1009
1010void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1011 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), false);
1012}
1013
Jeff Hao848f70a2014-01-15 13:49:50 -08001014void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1015 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1016 LocationSummary::kCall,
1017 kIntrinsified);
1018 InvokeRuntimeCallingConvention calling_convention;
1019 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1020 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1021 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1022 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1023 locations->SetOut(Location::RegisterLocation(RAX));
1024}
1025
1026void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1027 X86_64Assembler* assembler = GetAssembler();
1028 LocationSummary* locations = invoke->GetLocations();
1029
1030 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1031 __ testl(byte_array, byte_array);
1032 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1033 codegen_->AddSlowPath(slow_path);
1034 __ j(kEqual, slow_path->GetEntryLabel());
1035
1036 __ gs()->call(Address::Absolute(
1037 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromBytes), true));
1038 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1039 __ Bind(slow_path->GetExitLabel());
1040}
1041
1042void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1043 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1044 LocationSummary::kCall,
1045 kIntrinsified);
1046 InvokeRuntimeCallingConvention calling_convention;
1047 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1048 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1049 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1050 locations->SetOut(Location::RegisterLocation(RAX));
1051}
1052
1053void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1054 X86_64Assembler* assembler = GetAssembler();
1055
1056 __ gs()->call(Address::Absolute(
1057 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromChars), true));
1058 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1059}
1060
1061void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1062 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1063 LocationSummary::kCall,
1064 kIntrinsified);
1065 InvokeRuntimeCallingConvention calling_convention;
1066 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1067 locations->SetOut(Location::RegisterLocation(RAX));
1068}
1069
1070void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1071 X86_64Assembler* assembler = GetAssembler();
1072 LocationSummary* locations = invoke->GetLocations();
1073
1074 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1075 __ testl(string_to_copy, string_to_copy);
1076 SlowPathCodeX86_64* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
1077 codegen_->AddSlowPath(slow_path);
1078 __ j(kEqual, slow_path->GetEntryLabel());
1079
1080 __ gs()->call(Address::Absolute(
1081 QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pAllocStringFromString), true));
1082 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
1083 __ Bind(slow_path->GetExitLabel());
1084}
1085
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001086static void GenPeek(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1087 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1088 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1089 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1090 // to avoid a SIGBUS.
1091 switch (size) {
1092 case Primitive::kPrimByte:
1093 __ movsxb(out, Address(address, 0));
1094 break;
1095 case Primitive::kPrimShort:
1096 __ movsxw(out, Address(address, 0));
1097 break;
1098 case Primitive::kPrimInt:
1099 __ movl(out, Address(address, 0));
1100 break;
1101 case Primitive::kPrimLong:
1102 __ movq(out, Address(address, 0));
1103 break;
1104 default:
1105 LOG(FATAL) << "Type not recognized for peek: " << size;
1106 UNREACHABLE();
1107 }
1108}
1109
1110void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1111 CreateIntToIntLocations(arena_, invoke);
1112}
1113
1114void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1115 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1116}
1117
1118void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1119 CreateIntToIntLocations(arena_, invoke);
1120}
1121
1122void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1123 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1124}
1125
1126void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1127 CreateIntToIntLocations(arena_, invoke);
1128}
1129
1130void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1131 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1132}
1133
1134void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1135 CreateIntToIntLocations(arena_, invoke);
1136}
1137
1138void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1139 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1140}
1141
1142static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
1143 LocationSummary* locations = new (arena) LocationSummary(invoke,
1144 LocationSummary::kNoCall,
1145 kIntrinsified);
1146 locations->SetInAt(0, Location::RequiresRegister());
Mark Mendell40741f32015-04-20 22:10:34 -04001147 locations->SetInAt(1, Location::RegisterOrInt32LongConstant(invoke->InputAt(1)));
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001148}
1149
1150static void GenPoke(LocationSummary* locations, Primitive::Type size, X86_64Assembler* assembler) {
1151 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
Mark Mendell40741f32015-04-20 22:10:34 -04001152 Location value = locations->InAt(1);
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001153 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1154 // to avoid a SIGBUS.
1155 switch (size) {
1156 case Primitive::kPrimByte:
Mark Mendell40741f32015-04-20 22:10:34 -04001157 if (value.IsConstant()) {
1158 __ movb(Address(address, 0),
1159 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1160 } else {
1161 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1162 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001163 break;
1164 case Primitive::kPrimShort:
Mark Mendell40741f32015-04-20 22:10:34 -04001165 if (value.IsConstant()) {
1166 __ movw(Address(address, 0),
1167 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1168 } else {
1169 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1170 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001171 break;
1172 case Primitive::kPrimInt:
Mark Mendell40741f32015-04-20 22:10:34 -04001173 if (value.IsConstant()) {
1174 __ movl(Address(address, 0),
1175 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1176 } else {
1177 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1178 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001179 break;
1180 case Primitive::kPrimLong:
Mark Mendell40741f32015-04-20 22:10:34 -04001181 if (value.IsConstant()) {
1182 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1183 DCHECK(IsInt<32>(v));
1184 int32_t v_32 = v;
1185 __ movq(Address(address, 0), Immediate(v_32));
1186 } else {
1187 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1188 }
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001189 break;
1190 default:
1191 LOG(FATAL) << "Type not recognized for poke: " << size;
1192 UNREACHABLE();
1193 }
1194}
1195
1196void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1197 CreateIntIntToVoidLocations(arena_, invoke);
1198}
1199
1200void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1201 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1202}
1203
1204void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1205 CreateIntIntToVoidLocations(arena_, invoke);
1206}
1207
1208void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1209 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1210}
1211
1212void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1213 CreateIntIntToVoidLocations(arena_, invoke);
1214}
1215
1216void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1217 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1218}
1219
1220void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1221 CreateIntIntToVoidLocations(arena_, invoke);
1222}
1223
1224void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1225 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1226}
1227
1228void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1229 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1230 LocationSummary::kNoCall,
1231 kIntrinsified);
1232 locations->SetOut(Location::RequiresRegister());
1233}
1234
1235void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1236 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1237 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64WordSize>(), true));
1238}
1239
Andreas Gampe878d58c2015-01-15 23:24:00 -08001240static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001241 bool is_volatile ATTRIBUTE_UNUSED, X86_64Assembler* assembler) {
1242 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1243 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1244 CpuRegister trg = locations->Out().AsRegister<CpuRegister>();
1245
Andreas Gampe878d58c2015-01-15 23:24:00 -08001246 switch (type) {
1247 case Primitive::kPrimInt:
1248 case Primitive::kPrimNot:
1249 __ movl(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1250 break;
1251
1252 case Primitive::kPrimLong:
1253 __ movq(trg, Address(base, offset, ScaleFactor::TIMES_1, 0));
1254 break;
1255
1256 default:
1257 LOG(FATAL) << "Unsupported op size " << type;
1258 UNREACHABLE();
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001259 }
1260}
1261
1262static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
1263 LocationSummary* locations = new (arena) LocationSummary(invoke,
1264 LocationSummary::kNoCall,
1265 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001266 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001267 locations->SetInAt(1, Location::RequiresRegister());
1268 locations->SetInAt(2, Location::RequiresRegister());
Andreas Gampe878d58c2015-01-15 23:24:00 -08001269 locations->SetOut(Location::RequiresRegister());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001270}
1271
1272void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1273 CreateIntIntIntToIntLocations(arena_, invoke);
1274}
1275void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1276 CreateIntIntIntToIntLocations(arena_, invoke);
1277}
1278void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1279 CreateIntIntIntToIntLocations(arena_, invoke);
1280}
1281void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1282 CreateIntIntIntToIntLocations(arena_, invoke);
1283}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001284void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1285 CreateIntIntIntToIntLocations(arena_, invoke);
1286}
1287void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1288 CreateIntIntIntToIntLocations(arena_, invoke);
1289}
1290
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001291
1292void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001293 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001294}
1295void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001296 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001297}
1298void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001299 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001300}
1301void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Andreas Gampe878d58c2015-01-15 23:24:00 -08001302 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001303}
Andreas Gampe878d58c2015-01-15 23:24:00 -08001304void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1305 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1306}
1307void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1308 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1309}
1310
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001311
1312static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1313 Primitive::Type type,
1314 HInvoke* invoke) {
1315 LocationSummary* locations = new (arena) LocationSummary(invoke,
1316 LocationSummary::kNoCall,
1317 kIntrinsified);
Andreas Gampe878d58c2015-01-15 23:24:00 -08001318 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001319 locations->SetInAt(1, Location::RequiresRegister());
1320 locations->SetInAt(2, Location::RequiresRegister());
1321 locations->SetInAt(3, Location::RequiresRegister());
1322 if (type == Primitive::kPrimNot) {
1323 // Need temp registers for card-marking.
1324 locations->AddTemp(Location::RequiresRegister());
1325 locations->AddTemp(Location::RequiresRegister());
1326 }
1327}
1328
1329void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
1330 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1331}
1332void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1333 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1334}
1335void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1336 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke);
1337}
1338void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1339 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1340}
1341void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1342 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1343}
1344void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1345 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke);
1346}
1347void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1348 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1349}
1350void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1351 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1352}
1353void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1354 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke);
1355}
1356
1357// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1358// memory model.
1359static void GenUnsafePut(LocationSummary* locations, Primitive::Type type, bool is_volatile,
1360 CodeGeneratorX86_64* codegen) {
1361 X86_64Assembler* assembler = reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1362 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1363 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1364 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
1365
1366 if (type == Primitive::kPrimLong) {
1367 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1368 } else {
1369 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
1370 }
1371
1372 if (is_volatile) {
1373 __ mfence();
1374 }
1375
1376 if (type == Primitive::kPrimNot) {
1377 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1378 locations->GetTemp(1).AsRegister<CpuRegister>(),
1379 base,
1380 value);
1381 }
1382}
1383
1384void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
1385 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1386}
1387void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrdered(HInvoke* invoke) {
1388 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1389}
1390void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
1391 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1392}
1393void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
1394 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1395}
1396void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1397 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1398}
1399void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1400 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1401}
1402void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
1403 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1404}
1405void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1406 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1407}
1408void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1409 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1410}
1411
Mark Mendell58d25fd2015-04-03 14:52:31 -04001412static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
1413 HInvoke* invoke) {
1414 LocationSummary* locations = new (arena) LocationSummary(invoke,
1415 LocationSummary::kNoCall,
1416 kIntrinsified);
1417 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1418 locations->SetInAt(1, Location::RequiresRegister());
1419 locations->SetInAt(2, Location::RequiresRegister());
1420 // expected value must be in EAX/RAX.
1421 locations->SetInAt(3, Location::RegisterLocation(RAX));
1422 locations->SetInAt(4, Location::RequiresRegister());
1423
1424 locations->SetOut(Location::RequiresRegister());
1425 if (type == Primitive::kPrimNot) {
1426 // Need temp registers for card-marking.
1427 locations->AddTemp(Location::RequiresRegister());
1428 locations->AddTemp(Location::RequiresRegister());
1429 }
1430}
1431
1432void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1433 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
1434}
1435
1436void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1437 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
1438}
1439
1440void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1441 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
1442}
1443
1444static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
1445 X86_64Assembler* assembler =
1446 reinterpret_cast<X86_64Assembler*>(codegen->GetAssembler());
1447 LocationSummary* locations = invoke->GetLocations();
1448
1449 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
1450 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
1451 CpuRegister expected = locations->InAt(3).AsRegister<CpuRegister>();
1452 DCHECK_EQ(expected.AsRegister(), RAX);
1453 CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
1454 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1455
1456 if (type == Primitive::kPrimLong) {
1457 __ LockCmpxchgq(Address(base, offset, TIMES_1, 0), value);
1458 } else {
1459 // Integer or object.
1460 if (type == Primitive::kPrimNot) {
1461 // Mark card for object assuming new value is stored.
1462 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
1463 locations->GetTemp(1).AsRegister<CpuRegister>(),
1464 base,
1465 value);
1466 }
1467
1468 __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
1469 }
1470
1471 // locked cmpxchg has full barrier semantics, and we don't need scheduling
1472 // barriers at this time.
1473
1474 // Convert ZF into the boolean result.
1475 __ setcc(kZero, out);
1476 __ movzxb(out, out);
1477}
1478
1479void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
1480 GenCAS(Primitive::kPrimInt, invoke, codegen_);
1481}
1482
1483void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
1484 GenCAS(Primitive::kPrimLong, invoke, codegen_);
1485}
1486
1487void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
1488 GenCAS(Primitive::kPrimNot, invoke, codegen_);
1489}
1490
1491void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
1492 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1493 LocationSummary::kNoCall,
1494 kIntrinsified);
1495 locations->SetInAt(0, Location::RequiresRegister());
1496 locations->SetOut(Location::SameAsFirstInput());
1497 locations->AddTemp(Location::RequiresRegister());
1498}
1499
1500static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
1501 X86_64Assembler* assembler) {
1502 Immediate imm_shift(shift);
1503 Immediate imm_mask(mask);
1504 __ movl(temp, reg);
1505 __ shrl(reg, imm_shift);
1506 __ andl(temp, imm_mask);
1507 __ andl(reg, imm_mask);
1508 __ shll(temp, imm_shift);
1509 __ orl(reg, temp);
1510}
1511
1512void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
1513 X86_64Assembler* assembler =
1514 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1515 LocationSummary* locations = invoke->GetLocations();
1516
1517 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1518 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
1519
1520 /*
1521 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1522 * swapping bits to reverse bits in a number x. Using bswap to save instructions
1523 * compared to generic luni implementation which has 5 rounds of swapping bits.
1524 * x = bswap x
1525 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
1526 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
1527 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
1528 */
1529 __ bswapl(reg);
1530 SwapBits(reg, temp, 1, 0x55555555, assembler);
1531 SwapBits(reg, temp, 2, 0x33333333, assembler);
1532 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
1533}
1534
1535void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
1536 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1537 LocationSummary::kNoCall,
1538 kIntrinsified);
1539 locations->SetInAt(0, Location::RequiresRegister());
1540 locations->SetOut(Location::SameAsFirstInput());
1541 locations->AddTemp(Location::RequiresRegister());
1542 locations->AddTemp(Location::RequiresRegister());
1543}
1544
1545static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
1546 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
1547 Immediate imm_shift(shift);
1548 __ movq(temp_mask, Immediate(mask));
1549 __ movq(temp, reg);
1550 __ shrq(reg, imm_shift);
1551 __ andq(temp, temp_mask);
1552 __ andq(reg, temp_mask);
1553 __ shlq(temp, imm_shift);
1554 __ orq(reg, temp);
1555}
1556
1557void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
1558 X86_64Assembler* assembler =
1559 reinterpret_cast<X86_64Assembler*>(codegen_->GetAssembler());
1560 LocationSummary* locations = invoke->GetLocations();
1561
1562 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
1563 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
1564 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
1565
1566 /*
1567 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
1568 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
1569 * compared to generic luni implementation which has 5 rounds of swapping bits.
1570 * x = bswap x
1571 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
1572 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
1573 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
1574 */
1575 __ bswapq(reg);
1576 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
1577 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
1578 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
1579}
1580
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001581// Unimplemented intrinsics.
1582
1583#define UNIMPLEMENTED_INTRINSIC(Name) \
1584void IntrinsicLocationsBuilderX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1585} \
1586void IntrinsicCodeGeneratorX86_64::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1587}
1588
Jeff Hao848f70a2014-01-15 13:49:50 -08001589UNIMPLEMENTED_INTRINSIC(StringGetCharsNoCheck)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001590UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
Andreas Gampe71fb52f2014-12-29 17:43:08 -08001591UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1592
1593} // namespace x86_64
1594} // namespace art