blob: b6e451057af6c0a8371e6b0d1c66c6b2b8c35364 [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Mark Mendellfb8d2792015-03-31 22:16:59 -040019#include "arch/x86/instruction_set_features_x86.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040020#include "code_generator_x86.h"
21#include "entrypoints/quick/quick_entrypoints.h"
22#include "intrinsics.h"
23#include "mirror/array-inl.h"
24#include "mirror/art_method.h"
25#include "mirror/string.h"
26#include "thread.h"
27#include "utils/x86/assembler_x86.h"
28#include "utils/x86/constants_x86.h"
29
30namespace art {
31
32namespace x86 {
33
34static constexpr int kDoubleNaNHigh = 0x7FF80000;
35static constexpr int kDoubleNaNLow = 0x00000000;
36static constexpr int kFloatNaN = 0x7FC00000;
37
Mark Mendellfb8d2792015-03-31 22:16:59 -040038IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
39 : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
40}
41
42
Mark Mendell09ed1a32015-03-25 08:30:06 -040043X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
44 return reinterpret_cast<X86Assembler*>(codegen_->GetAssembler());
45}
46
47ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
48 return codegen_->GetGraph()->GetArena();
49}
50
51bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
52 Dispatch(invoke);
53 LocationSummary* res = invoke->GetLocations();
54 return res != nullptr && res->Intrinsified();
55}
56
57#define __ reinterpret_cast<X86Assembler*>(codegen->GetAssembler())->
58
59// TODO: target as memory.
60static void MoveFromReturnRegister(Location target,
61 Primitive::Type type,
62 CodeGeneratorX86* codegen) {
63 if (!target.IsValid()) {
64 DCHECK(type == Primitive::kPrimVoid);
65 return;
66 }
67
68 switch (type) {
69 case Primitive::kPrimBoolean:
70 case Primitive::kPrimByte:
71 case Primitive::kPrimChar:
72 case Primitive::kPrimShort:
73 case Primitive::kPrimInt:
74 case Primitive::kPrimNot: {
75 Register target_reg = target.AsRegister<Register>();
76 if (target_reg != EAX) {
77 __ movl(target_reg, EAX);
78 }
79 break;
80 }
81 case Primitive::kPrimLong: {
82 Register target_reg_lo = target.AsRegisterPairLow<Register>();
83 Register target_reg_hi = target.AsRegisterPairHigh<Register>();
84 if (target_reg_lo != EAX) {
85 __ movl(target_reg_lo, EAX);
86 }
87 if (target_reg_hi != EDX) {
88 __ movl(target_reg_hi, EDX);
89 }
90 break;
91 }
92
93 case Primitive::kPrimVoid:
94 LOG(FATAL) << "Unexpected void type for valid location " << target;
95 UNREACHABLE();
96
97 case Primitive::kPrimDouble: {
98 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
99 if (target_reg != XMM0) {
100 __ movsd(target_reg, XMM0);
101 }
102 break;
103 }
104 case Primitive::kPrimFloat: {
105 XmmRegister target_reg = target.AsFpuRegister<XmmRegister>();
106 if (target_reg != XMM0) {
107 __ movss(target_reg, XMM0);
108 }
109 break;
110 }
111 }
112}
113
114static void MoveArguments(HInvoke* invoke, ArenaAllocator* arena, CodeGeneratorX86* codegen) {
115 if (invoke->InputCount() == 0) {
116 return;
117 }
118
119 LocationSummary* locations = invoke->GetLocations();
120 InvokeDexCallingConventionVisitor calling_convention_visitor;
121
122 // We're moving potentially two or more locations to locations that could overlap, so we need
123 // a parallel move resolver.
124 HParallelMove parallel_move(arena);
125
126 for (size_t i = 0; i < invoke->InputCount(); i++) {
127 HInstruction* input = invoke->InputAt(i);
128 Location cc_loc = calling_convention_visitor.GetNextLocation(input->GetType());
129 Location actual_loc = locations->InAt(i);
130
131 parallel_move.AddMove(actual_loc, cc_loc, nullptr);
132 }
133
134 codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
135}
136
137// Slow-path for fallback (calling the managed code to handle the intrinsic) in an intrinsified
138// call. This will copy the arguments into the positions for a regular call.
139//
140// Note: The actual parameters are required to be in the locations given by the invoke's location
141// summary. If an intrinsic modifies those locations before a slowpath call, they must be
142// restored!
143class IntrinsicSlowPathX86 : public SlowPathCodeX86 {
144 public:
145 explicit IntrinsicSlowPathX86(HInvoke* invoke, Register temp)
146 : invoke_(invoke) {
147 // The temporary register has to be EAX for x86 invokes.
148 DCHECK_EQ(temp, EAX);
149 }
150
151 void EmitNativeCode(CodeGenerator* codegen_in) OVERRIDE {
152 CodeGeneratorX86* codegen = down_cast<CodeGeneratorX86*>(codegen_in);
153 __ Bind(GetEntryLabel());
154
155 SaveLiveRegisters(codegen, invoke_->GetLocations());
156
157 MoveArguments(invoke_, codegen->GetGraph()->GetArena(), codegen);
158
159 if (invoke_->IsInvokeStaticOrDirect()) {
160 codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), EAX);
Mingyao Yange90db122015-04-03 17:56:54 -0700161 RecordPcInfo(codegen, invoke_, invoke_->GetDexPc());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400162 } else {
163 UNIMPLEMENTED(FATAL) << "Non-direct intrinsic slow-path not yet implemented";
164 UNREACHABLE();
165 }
166
167 // Copy the result back to the expected output.
168 Location out = invoke_->GetLocations()->Out();
169 if (out.IsValid()) {
170 DCHECK(out.IsRegister()); // TODO: Replace this when we support output in memory.
171 DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
172 MoveFromReturnRegister(out, invoke_->GetType(), codegen);
173 }
174
175 RestoreLiveRegisters(codegen, invoke_->GetLocations());
176 __ jmp(GetExitLabel());
177 }
178
179 private:
180 // The instruction where this slow path is happening.
181 HInvoke* const invoke_;
182
183 DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathX86);
184};
185
186#undef __
187#define __ assembler->
188
189static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
190 LocationSummary* locations = new (arena) LocationSummary(invoke,
191 LocationSummary::kNoCall,
192 kIntrinsified);
193 locations->SetInAt(0, Location::RequiresFpuRegister());
194 locations->SetOut(Location::RequiresRegister());
195 if (is64bit) {
196 locations->AddTemp(Location::RequiresFpuRegister());
197 }
198}
199
200static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
201 LocationSummary* locations = new (arena) LocationSummary(invoke,
202 LocationSummary::kNoCall,
203 kIntrinsified);
204 locations->SetInAt(0, Location::RequiresRegister());
205 locations->SetOut(Location::RequiresFpuRegister());
206 if (is64bit) {
207 locations->AddTemp(Location::RequiresFpuRegister());
208 locations->AddTemp(Location::RequiresFpuRegister());
209 }
210}
211
212static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
213 Location input = locations->InAt(0);
214 Location output = locations->Out();
215 if (is64bit) {
216 // Need to use the temporary.
217 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
218 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
219 __ movd(output.AsRegisterPairLow<Register>(), temp);
220 __ psrlq(temp, Immediate(32));
221 __ movd(output.AsRegisterPairHigh<Register>(), temp);
222 } else {
223 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
224 }
225}
226
227static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
228 Location input = locations->InAt(0);
229 Location output = locations->Out();
230 if (is64bit) {
231 // Need to use the temporary.
232 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
233 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
234 __ movd(temp1, input.AsRegisterPairLow<Register>());
235 __ movd(temp2, input.AsRegisterPairHigh<Register>());
236 __ punpckldq(temp1, temp2);
237 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
238 } else {
239 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
240 }
241}
242
243void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
244 CreateFPToIntLocations(arena_, invoke, true);
245}
246void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
247 CreateIntToFPLocations(arena_, invoke, true);
248}
249
250void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
251 MoveFPToInt(invoke->GetLocations(), true, GetAssembler());
252}
253void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
254 MoveIntToFP(invoke->GetLocations(), true, GetAssembler());
255}
256
257void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
258 CreateFPToIntLocations(arena_, invoke, false);
259}
260void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
261 CreateIntToFPLocations(arena_, invoke, false);
262}
263
264void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
265 MoveFPToInt(invoke->GetLocations(), false, GetAssembler());
266}
267void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
268 MoveIntToFP(invoke->GetLocations(), false, GetAssembler());
269}
270
271static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
272 LocationSummary* locations = new (arena) LocationSummary(invoke,
273 LocationSummary::kNoCall,
274 kIntrinsified);
275 locations->SetInAt(0, Location::RequiresRegister());
276 locations->SetOut(Location::SameAsFirstInput());
277}
278
279static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
280 LocationSummary* locations = new (arena) LocationSummary(invoke,
281 LocationSummary::kNoCall,
282 kIntrinsified);
283 locations->SetInAt(0, Location::RequiresRegister());
284 locations->SetOut(Location::RequiresRegister());
285}
286
287static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
288 LocationSummary* locations = new (arena) LocationSummary(invoke,
289 LocationSummary::kNoCall,
290 kIntrinsified);
291 locations->SetInAt(0, Location::RequiresRegister());
292 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
293}
294
295static void GenReverseBytes(LocationSummary* locations,
296 Primitive::Type size,
297 X86Assembler* assembler) {
298 Register out = locations->Out().AsRegister<Register>();
299
300 switch (size) {
301 case Primitive::kPrimShort:
302 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
303 __ bswapl(out);
304 __ sarl(out, Immediate(16));
305 break;
306 case Primitive::kPrimInt:
307 __ bswapl(out);
308 break;
309 default:
310 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
311 UNREACHABLE();
312 }
313}
314
315void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
316 CreateIntToIntLocations(arena_, invoke);
317}
318
319void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
320 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
321}
322
323void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
324 CreateIntToIntLocations(arena_, invoke);
325}
326
327void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
328 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
329}
330
331
332// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
333// need is 64b.
334
335static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
336 // TODO: Enable memory operations when the assembler supports them.
337 LocationSummary* locations = new (arena) LocationSummary(invoke,
338 LocationSummary::kNoCall,
339 kIntrinsified);
340 locations->SetInAt(0, Location::RequiresFpuRegister());
341 // TODO: Allow x86 to work with memory. This requires assembler support, see below.
342 // locations->SetInAt(0, Location::Any()); // X86 can work on memory directly.
343 locations->SetOut(Location::SameAsFirstInput());
344}
345
346static void MathAbsFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
347 Location output = locations->Out();
348
349 if (output.IsFpuRegister()) {
350 // Create the right constant on an aligned stack.
351 if (is64bit) {
352 __ subl(ESP, Immediate(8));
353 __ pushl(Immediate(0x7FFFFFFF));
354 __ pushl(Immediate(0xFFFFFFFF));
355 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
356 } else {
357 __ subl(ESP, Immediate(12));
358 __ pushl(Immediate(0x7FFFFFFF));
359 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
360 }
361 __ addl(ESP, Immediate(16));
362 } else {
363 // TODO: update when assember support is available.
364 UNIMPLEMENTED(FATAL) << "Needs assembler support.";
365// Once assembler support is available, in-memory operations look like this:
366// if (is64bit) {
367// DCHECK(output.IsDoubleStackSlot());
368// __ andl(Address(Register(RSP), output.GetHighStackIndex(kX86WordSize)),
369// Immediate(0x7FFFFFFF));
370// } else {
371// DCHECK(output.IsStackSlot());
372// // Can use and with a literal directly.
373// __ andl(Address(Register(RSP), output.GetStackIndex()), Immediate(0x7FFFFFFF));
374// }
375 }
376}
377
378void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
379 CreateFloatToFloat(arena_, invoke);
380}
381
382void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
383 MathAbsFP(invoke->GetLocations(), true, GetAssembler());
384}
385
386void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
387 CreateFloatToFloat(arena_, invoke);
388}
389
390void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
391 MathAbsFP(invoke->GetLocations(), false, GetAssembler());
392}
393
394static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
395 LocationSummary* locations = new (arena) LocationSummary(invoke,
396 LocationSummary::kNoCall,
397 kIntrinsified);
398 locations->SetInAt(0, Location::RegisterLocation(EAX));
399 locations->SetOut(Location::SameAsFirstInput());
400 locations->AddTemp(Location::RegisterLocation(EDX));
401}
402
403static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
404 Location output = locations->Out();
405 Register out = output.AsRegister<Register>();
406 DCHECK_EQ(out, EAX);
407 Register temp = locations->GetTemp(0).AsRegister<Register>();
408 DCHECK_EQ(temp, EDX);
409
410 // Sign extend EAX into EDX.
411 __ cdq();
412
413 // XOR EAX with sign.
414 __ xorl(EAX, EDX);
415
416 // Subtract out sign to correct.
417 __ subl(EAX, EDX);
418
419 // The result is in EAX.
420}
421
422static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
423 LocationSummary* locations = new (arena) LocationSummary(invoke,
424 LocationSummary::kNoCall,
425 kIntrinsified);
426 locations->SetInAt(0, Location::RequiresRegister());
427 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
428 locations->AddTemp(Location::RequiresRegister());
429}
430
431static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
432 Location input = locations->InAt(0);
433 Register input_lo = input.AsRegisterPairLow<Register>();
434 Register input_hi = input.AsRegisterPairHigh<Register>();
435 Location output = locations->Out();
436 Register output_lo = output.AsRegisterPairLow<Register>();
437 Register output_hi = output.AsRegisterPairHigh<Register>();
438 Register temp = locations->GetTemp(0).AsRegister<Register>();
439
440 // Compute the sign into the temporary.
441 __ movl(temp, input_hi);
442 __ sarl(temp, Immediate(31));
443
444 // Store the sign into the output.
445 __ movl(output_lo, temp);
446 __ movl(output_hi, temp);
447
448 // XOR the input to the output.
449 __ xorl(output_lo, input_lo);
450 __ xorl(output_hi, input_hi);
451
452 // Subtract the sign.
453 __ subl(output_lo, temp);
454 __ sbbl(output_hi, temp);
455}
456
457void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
458 CreateAbsIntLocation(arena_, invoke);
459}
460
461void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
462 GenAbsInteger(invoke->GetLocations(), GetAssembler());
463}
464
465void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
466 CreateAbsLongLocation(arena_, invoke);
467}
468
469void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
470 GenAbsLong(invoke->GetLocations(), GetAssembler());
471}
472
473static void GenMinMaxFP(LocationSummary* locations, bool is_min, bool is_double,
474 X86Assembler* assembler) {
475 Location op1_loc = locations->InAt(0);
476 Location op2_loc = locations->InAt(1);
477 Location out_loc = locations->Out();
478 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
479
480 // Shortcut for same input locations.
481 if (op1_loc.Equals(op2_loc)) {
482 DCHECK(out_loc.Equals(op1_loc));
483 return;
484 }
485
486 // (out := op1)
487 // out <=? op2
488 // if Nan jmp Nan_label
489 // if out is min jmp done
490 // if op2 is min jmp op2_label
491 // handle -0/+0
492 // jmp done
493 // Nan_label:
494 // out := NaN
495 // op2_label:
496 // out := op2
497 // done:
498 //
499 // This removes one jmp, but needs to copy one input (op1) to out.
500 //
501 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
502
503 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
504
505 Label nan, done, op2_label;
506 if (is_double) {
507 __ ucomisd(out, op2);
508 } else {
509 __ ucomiss(out, op2);
510 }
511
512 __ j(Condition::kParityEven, &nan);
513
514 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
515 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
516
517 // Handle 0.0/-0.0.
518 if (is_min) {
519 if (is_double) {
520 __ orpd(out, op2);
521 } else {
522 __ orps(out, op2);
523 }
524 } else {
525 if (is_double) {
526 __ andpd(out, op2);
527 } else {
528 __ andps(out, op2);
529 }
530 }
531 __ jmp(&done);
532
533 // NaN handling.
534 __ Bind(&nan);
535 if (is_double) {
536 __ pushl(Immediate(kDoubleNaNHigh));
537 __ pushl(Immediate(kDoubleNaNLow));
538 __ movsd(out, Address(ESP, 0));
539 __ addl(ESP, Immediate(8));
540 } else {
541 __ pushl(Immediate(kFloatNaN));
542 __ movss(out, Address(ESP, 0));
543 __ addl(ESP, Immediate(4));
544 }
545 __ jmp(&done);
546
547 // out := op2;
548 __ Bind(&op2_label);
549 if (is_double) {
550 __ movsd(out, op2);
551 } else {
552 __ movss(out, op2);
553 }
554
555 // Done.
556 __ Bind(&done);
557}
558
559static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
560 LocationSummary* locations = new (arena) LocationSummary(invoke,
561 LocationSummary::kNoCall,
562 kIntrinsified);
563 locations->SetInAt(0, Location::RequiresFpuRegister());
564 locations->SetInAt(1, Location::RequiresFpuRegister());
565 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
566 // the second input to be the output (we can simply swap inputs).
567 locations->SetOut(Location::SameAsFirstInput());
568}
569
570void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
571 CreateFPFPToFPLocations(arena_, invoke);
572}
573
574void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
575 GenMinMaxFP(invoke->GetLocations(), true, true, GetAssembler());
576}
577
578void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
579 CreateFPFPToFPLocations(arena_, invoke);
580}
581
582void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
583 GenMinMaxFP(invoke->GetLocations(), true, false, GetAssembler());
584}
585
586void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
587 CreateFPFPToFPLocations(arena_, invoke);
588}
589
590void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
591 GenMinMaxFP(invoke->GetLocations(), false, true, GetAssembler());
592}
593
594void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
595 CreateFPFPToFPLocations(arena_, invoke);
596}
597
598void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
599 GenMinMaxFP(invoke->GetLocations(), false, false, GetAssembler());
600}
601
602static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
603 X86Assembler* assembler) {
604 Location op1_loc = locations->InAt(0);
605 Location op2_loc = locations->InAt(1);
606
607 // Shortcut for same input locations.
608 if (op1_loc.Equals(op2_loc)) {
609 // Can return immediately, as op1_loc == out_loc.
610 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
611 // a copy here.
612 DCHECK(locations->Out().Equals(op1_loc));
613 return;
614 }
615
616 if (is_long) {
617 // Need to perform a subtract to get the sign right.
618 // op1 is already in the same location as the output.
619 Location output = locations->Out();
620 Register output_lo = output.AsRegisterPairLow<Register>();
621 Register output_hi = output.AsRegisterPairHigh<Register>();
622
623 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
624 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
625
626 // Spare register to compute the subtraction to set condition code.
627 Register temp = locations->GetTemp(0).AsRegister<Register>();
628
629 // Subtract off op2_low.
630 __ movl(temp, output_lo);
631 __ subl(temp, op2_lo);
632
633 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
634 __ movl(temp, output_hi);
635 __ sbbl(temp, op2_hi);
636
637 // Now the condition code is correct.
638 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
639 __ cmovl(cond, output_lo, op2_lo);
640 __ cmovl(cond, output_hi, op2_hi);
641 } else {
642 Register out = locations->Out().AsRegister<Register>();
643 Register op2 = op2_loc.AsRegister<Register>();
644
645 // (out := op1)
646 // out <=? op2
647 // if out is min jmp done
648 // out := op2
649 // done:
650
651 __ cmpl(out, op2);
652 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
653 __ cmovl(cond, out, op2);
654 }
655}
656
657static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
658 LocationSummary* locations = new (arena) LocationSummary(invoke,
659 LocationSummary::kNoCall,
660 kIntrinsified);
661 locations->SetInAt(0, Location::RequiresRegister());
662 locations->SetInAt(1, Location::RequiresRegister());
663 locations->SetOut(Location::SameAsFirstInput());
664}
665
666static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
667 LocationSummary* locations = new (arena) LocationSummary(invoke,
668 LocationSummary::kNoCall,
669 kIntrinsified);
670 locations->SetInAt(0, Location::RequiresRegister());
671 locations->SetInAt(1, Location::RequiresRegister());
672 locations->SetOut(Location::SameAsFirstInput());
673 // Register to use to perform a long subtract to set cc.
674 locations->AddTemp(Location::RequiresRegister());
675}
676
677void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
678 CreateIntIntToIntLocations(arena_, invoke);
679}
680
681void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
682 GenMinMax(invoke->GetLocations(), true, false, GetAssembler());
683}
684
685void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
686 CreateLongLongToLongLocations(arena_, invoke);
687}
688
689void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
690 GenMinMax(invoke->GetLocations(), true, true, GetAssembler());
691}
692
693void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
694 CreateIntIntToIntLocations(arena_, invoke);
695}
696
697void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
698 GenMinMax(invoke->GetLocations(), false, false, GetAssembler());
699}
700
701void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
702 CreateLongLongToLongLocations(arena_, invoke);
703}
704
705void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
706 GenMinMax(invoke->GetLocations(), false, true, GetAssembler());
707}
708
709static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
710 LocationSummary* locations = new (arena) LocationSummary(invoke,
711 LocationSummary::kNoCall,
712 kIntrinsified);
713 locations->SetInAt(0, Location::RequiresFpuRegister());
714 locations->SetOut(Location::RequiresFpuRegister());
715}
716
717void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
718 CreateFPToFPLocations(arena_, invoke);
719}
720
721void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
722 LocationSummary* locations = invoke->GetLocations();
723 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
724 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
725
726 GetAssembler()->sqrtsd(out, in);
727}
728
Mark Mendellfb8d2792015-03-31 22:16:59 -0400729static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
730 MoveArguments(invoke, codegen->GetGraph()->GetArena(), codegen);
731
732 DCHECK(invoke->IsInvokeStaticOrDirect());
733 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(), EAX);
Mingyao Yange90db122015-04-03 17:56:54 -0700734 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400735
736 // Copy the result back to the expected output.
737 Location out = invoke->GetLocations()->Out();
738 if (out.IsValid()) {
739 DCHECK(out.IsRegister());
740 MoveFromReturnRegister(out, invoke->GetType(), codegen);
741 }
742}
743
744static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
745 HInvoke* invoke,
746 CodeGeneratorX86* codegen) {
747 // Do we have instruction support?
748 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
749 CreateFPToFPLocations(arena, invoke);
750 return;
751 }
752
753 // We have to fall back to a call to the intrinsic.
754 LocationSummary* locations = new (arena) LocationSummary(invoke,
755 LocationSummary::kCall);
756 InvokeRuntimeCallingConvention calling_convention;
757 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
758 locations->SetOut(Location::FpuRegisterLocation(XMM0));
759 // Needs to be EAX for the invoke.
760 locations->AddTemp(Location::RegisterLocation(EAX));
761}
762
763static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
764 HInvoke* invoke,
765 X86Assembler* assembler,
766 int round_mode) {
767 LocationSummary* locations = invoke->GetLocations();
768 if (locations->WillCall()) {
769 InvokeOutOfLineIntrinsic(codegen, invoke);
770 } else {
771 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
772 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
773 __ roundsd(out, in, Immediate(round_mode));
774 }
775}
776
777void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
778 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
779}
780
781void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
782 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
783}
784
785void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
786 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
787}
788
789void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
790 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
791}
792
793void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
794 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
795}
796
797void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
798 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
799}
800
801// Note that 32 bit x86 doesn't have the capability to inline MathRoundDouble,
802// as it needs 64 bit instructions.
803void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
804 // Do we have instruction support?
805 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
806 LocationSummary* locations = new (arena_) LocationSummary(invoke,
807 LocationSummary::kNoCall,
808 kIntrinsified);
809 locations->SetInAt(0, Location::RequiresFpuRegister());
810 locations->SetOut(Location::RequiresFpuRegister());
811 locations->AddTemp(Location::RequiresFpuRegister());
812 locations->AddTemp(Location::RequiresFpuRegister());
813 return;
814 }
815
816 // We have to fall back to a call to the intrinsic.
817 LocationSummary* locations = new (arena_) LocationSummary(invoke,
818 LocationSummary::kCall);
819 InvokeRuntimeCallingConvention calling_convention;
820 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
821 locations->SetOut(Location::RegisterLocation(EAX));
822 // Needs to be EAX for the invoke.
823 locations->AddTemp(Location::RegisterLocation(EAX));
824}
825
826void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
827 LocationSummary* locations = invoke->GetLocations();
828 if (locations->WillCall()) {
829 InvokeOutOfLineIntrinsic(codegen_, invoke);
830 return;
831 }
832
833 // Implement RoundFloat as t1 = floor(input + 0.5f); convert to int.
834 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
835 Register out = locations->Out().AsRegister<Register>();
836 XmmRegister maxInt = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
837 XmmRegister inPlusPointFive = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
838 Label done, nan;
839 X86Assembler* assembler = GetAssembler();
840
841 // Generate 0.5 into inPlusPointFive.
842 __ movl(out, Immediate(bit_cast<int32_t, float>(0.5f)));
843 __ movd(inPlusPointFive, out);
844
845 // Add in the input.
846 __ addss(inPlusPointFive, in);
847
848 // And truncate to an integer.
849 __ roundss(inPlusPointFive, inPlusPointFive, Immediate(1));
850
851 __ movl(out, Immediate(kPrimIntMax));
852 // maxInt = int-to-float(out)
853 __ cvtsi2ss(maxInt, out);
854
855 // if inPlusPointFive >= maxInt goto done
856 __ comiss(inPlusPointFive, maxInt);
857 __ j(kAboveEqual, &done);
858
859 // if input == NaN goto nan
860 __ j(kUnordered, &nan);
861
862 // output = float-to-int-truncate(input)
863 __ cvttss2si(out, inPlusPointFive);
864 __ jmp(&done);
865 __ Bind(&nan);
866
867 // output = 0
868 __ xorl(out, out);
869 __ Bind(&done);
870}
871
Mark Mendell09ed1a32015-03-25 08:30:06 -0400872void IntrinsicLocationsBuilderX86::VisitStringCharAt(HInvoke* invoke) {
873 // The inputs plus one temp.
874 LocationSummary* locations = new (arena_) LocationSummary(invoke,
875 LocationSummary::kCallOnSlowPath,
876 kIntrinsified);
877 locations->SetInAt(0, Location::RequiresRegister());
878 locations->SetInAt(1, Location::RequiresRegister());
879 locations->SetOut(Location::SameAsFirstInput());
880 // Needs to be EAX for the invoke.
881 locations->AddTemp(Location::RegisterLocation(EAX));
882}
883
884void IntrinsicCodeGeneratorX86::VisitStringCharAt(HInvoke* invoke) {
885 LocationSummary* locations = invoke->GetLocations();
886
887 // Location of reference to data array
888 const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
889 // Location of count
890 const int32_t count_offset = mirror::String::CountOffset().Int32Value();
891 // Starting offset within data array
892 const int32_t offset_offset = mirror::String::OffsetOffset().Int32Value();
893 // Start of char data with array_
894 const int32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Int32Value();
895
896 Register obj = locations->InAt(0).AsRegister<Register>();
897 Register idx = locations->InAt(1).AsRegister<Register>();
898 Register out = locations->Out().AsRegister<Register>();
899 Location temp_loc = locations->GetTemp(0);
900 Register temp = temp_loc.AsRegister<Register>();
901
902 // TODO: Maybe we can support range check elimination. Overall, though, I think it's not worth
903 // the cost.
904 // TODO: For simplicity, the index parameter is requested in a register, so different from Quick
905 // we will not optimize the code for constants (which would save a register).
906
907 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke, temp);
908 codegen_->AddSlowPath(slow_path);
909
910 X86Assembler* assembler = GetAssembler();
911
912 __ cmpl(idx, Address(obj, count_offset));
913 codegen_->MaybeRecordImplicitNullCheck(invoke);
914 __ j(kAboveEqual, slow_path->GetEntryLabel());
915
916 // Get the actual element.
917 __ movl(temp, idx); // temp := idx.
918 __ addl(temp, Address(obj, offset_offset)); // temp := offset + idx.
919 __ movl(out, Address(obj, value_offset)); // obj := obj.array.
920 // out = out[2*temp].
921 __ movzxw(out, Address(out, temp, ScaleFactor::TIMES_2, data_offset));
922
923 __ Bind(slow_path->GetExitLabel());
924}
925
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000926void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
927 // The inputs plus one temp.
928 LocationSummary* locations = new (arena_) LocationSummary(invoke,
929 LocationSummary::kCall,
930 kIntrinsified);
931 InvokeRuntimeCallingConvention calling_convention;
932 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
933 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
934 locations->SetOut(Location::RegisterLocation(EAX));
935 // Needs to be EAX for the invoke.
936 locations->AddTemp(Location::RegisterLocation(EAX));
937}
938
939void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
940 X86Assembler* assembler = GetAssembler();
941 LocationSummary* locations = invoke->GetLocations();
942
Nicolas Geoffray512e04d2015-03-27 17:21:24 +0000943 // Note that the null check must have been done earlier.
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +0000944 DCHECK(!invoke->CanDoImplicitNullCheck());
945
946 Register argument = locations->InAt(1).AsRegister<Register>();
947 __ testl(argument, argument);
948 SlowPathCodeX86* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(
949 invoke, locations->GetTemp(0).AsRegister<Register>());
950 codegen_->AddSlowPath(slow_path);
951 __ j(kEqual, slow_path->GetEntryLabel());
952
953 __ fs()->call(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pStringCompareTo)));
954 __ Bind(slow_path->GetExitLabel());
955}
956
Mark Mendell09ed1a32015-03-25 08:30:06 -0400957static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
958 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
959 Location out_loc = locations->Out();
960 // x86 allows unaligned access. We do not have to check the input or use specific instructions
961 // to avoid a SIGBUS.
962 switch (size) {
963 case Primitive::kPrimByte:
964 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
965 break;
966 case Primitive::kPrimShort:
967 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
968 break;
969 case Primitive::kPrimInt:
970 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
971 break;
972 case Primitive::kPrimLong:
973 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
974 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
975 break;
976 default:
977 LOG(FATAL) << "Type not recognized for peek: " << size;
978 UNREACHABLE();
979 }
980}
981
982void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
983 CreateLongToIntLocations(arena_, invoke);
984}
985
986void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
987 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
988}
989
990void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
991 CreateLongToIntLocations(arena_, invoke);
992}
993
994void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
995 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
996}
997
998void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
999 CreateLongToLongLocations(arena_, invoke);
1000}
1001
1002void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1003 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1004}
1005
1006void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1007 CreateLongToIntLocations(arena_, invoke);
1008}
1009
1010void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1011 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1012}
1013
1014static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1015 HInvoke* invoke) {
1016 LocationSummary* locations = new (arena) LocationSummary(invoke,
1017 LocationSummary::kNoCall,
1018 kIntrinsified);
1019 locations->SetInAt(0, Location::RequiresRegister());
1020 HInstruction *value = invoke->InputAt(1);
1021 if (size == Primitive::kPrimByte) {
1022 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1023 } else {
1024 locations->SetInAt(1, Location::RegisterOrConstant(value));
1025 }
1026}
1027
1028static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1029 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1030 Location value_loc = locations->InAt(1);
1031 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1032 // to avoid a SIGBUS.
1033 switch (size) {
1034 case Primitive::kPrimByte:
1035 if (value_loc.IsConstant()) {
1036 __ movb(Address(address, 0),
1037 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1038 } else {
1039 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1040 }
1041 break;
1042 case Primitive::kPrimShort:
1043 if (value_loc.IsConstant()) {
1044 __ movw(Address(address, 0),
1045 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1046 } else {
1047 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1048 }
1049 break;
1050 case Primitive::kPrimInt:
1051 if (value_loc.IsConstant()) {
1052 __ movl(Address(address, 0),
1053 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1054 } else {
1055 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1056 }
1057 break;
1058 case Primitive::kPrimLong:
1059 if (value_loc.IsConstant()) {
1060 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1061 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1062 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1063 } else {
1064 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1065 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1066 }
1067 break;
1068 default:
1069 LOG(FATAL) << "Type not recognized for poke: " << size;
1070 UNREACHABLE();
1071 }
1072}
1073
1074void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1075 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1076}
1077
1078void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1079 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1080}
1081
1082void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1083 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1084}
1085
1086void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1087 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1088}
1089
1090void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1091 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1092}
1093
1094void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1095 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1096}
1097
1098void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1099 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1100}
1101
1102void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1103 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1104}
1105
1106void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1107 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1108 LocationSummary::kNoCall,
1109 kIntrinsified);
1110 locations->SetOut(Location::RequiresRegister());
1111}
1112
1113void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1114 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1115 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86WordSize>()));
1116}
1117
1118static void GenUnsafeGet(LocationSummary* locations, Primitive::Type type,
1119 bool is_volatile, X86Assembler* assembler) {
1120 Register base = locations->InAt(1).AsRegister<Register>();
1121 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1122 Location output = locations->Out();
1123
1124 switch (type) {
1125 case Primitive::kPrimInt:
1126 case Primitive::kPrimNot:
1127 __ movl(output.AsRegister<Register>(), Address(base, offset, ScaleFactor::TIMES_1, 0));
1128 break;
1129
1130 case Primitive::kPrimLong: {
1131 Register output_lo = output.AsRegisterPairLow<Register>();
1132 Register output_hi = output.AsRegisterPairHigh<Register>();
1133 if (is_volatile) {
1134 // Need to use a XMM to read atomically.
1135 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1136 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1137 __ movd(output_lo, temp);
1138 __ psrlq(temp, Immediate(32));
1139 __ movd(output_hi, temp);
1140 } else {
1141 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1142 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1143 }
1144 }
1145 break;
1146
1147 default:
1148 LOG(FATAL) << "Unsupported op size " << type;
1149 UNREACHABLE();
1150 }
1151}
1152
1153static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke,
1154 bool is_long, bool is_volatile) {
1155 LocationSummary* locations = new (arena) LocationSummary(invoke,
1156 LocationSummary::kNoCall,
1157 kIntrinsified);
1158 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1159 locations->SetInAt(1, Location::RequiresRegister());
1160 locations->SetInAt(2, Location::RequiresRegister());
1161 if (is_long) {
1162 if (is_volatile) {
1163 // Need to use XMM to read volatile.
1164 locations->AddTemp(Location::RequiresFpuRegister());
1165 locations->SetOut(Location::RequiresRegister());
1166 } else {
1167 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1168 }
1169 } else {
1170 locations->SetOut(Location::RequiresRegister());
1171 }
1172}
1173
1174void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1175 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1176}
1177void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1178 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1179}
1180void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1181 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1182}
1183void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1184 CreateIntIntIntToIntLocations(arena_, invoke, true, true);
1185}
1186void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1187 CreateIntIntIntToIntLocations(arena_, invoke, false, false);
1188}
1189void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1190 CreateIntIntIntToIntLocations(arena_, invoke, false, true);
1191}
1192
1193
1194void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1195 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, false, GetAssembler());
1196}
1197void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1198 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimInt, true, GetAssembler());
1199}
1200void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1201 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, false, GetAssembler());
1202}
1203void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1204 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimLong, true, GetAssembler());
1205}
1206void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1207 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, false, GetAssembler());
1208}
1209void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1210 GenUnsafeGet(invoke->GetLocations(), Primitive::kPrimNot, true, GetAssembler());
1211}
1212
1213
1214static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
1215 Primitive::Type type,
1216 HInvoke* invoke,
1217 bool is_volatile) {
1218 LocationSummary* locations = new (arena) LocationSummary(invoke,
1219 LocationSummary::kNoCall,
1220 kIntrinsified);
1221 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1222 locations->SetInAt(1, Location::RequiresRegister());
1223 locations->SetInAt(2, Location::RequiresRegister());
1224 locations->SetInAt(3, Location::RequiresRegister());
1225 if (type == Primitive::kPrimNot) {
1226 // Need temp registers for card-marking.
1227 locations->AddTemp(Location::RequiresRegister());
1228 // Ensure the value is in a byte register.
1229 locations->AddTemp(Location::RegisterLocation(ECX));
1230 } else if (type == Primitive::kPrimLong && is_volatile) {
1231 locations->AddTemp(Location::RequiresFpuRegister());
1232 locations->AddTemp(Location::RequiresFpuRegister());
1233 }
1234}
1235
1236void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1237 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1238}
1239void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1240 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, false);
1241}
1242void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1243 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimInt, invoke, true);
1244}
1245void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1246 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1247}
1248void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1249 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, false);
1250}
1251void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1252 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimNot, invoke, true);
1253}
1254void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1255 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1256}
1257void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1258 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, false);
1259}
1260void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1261 CreateIntIntIntIntToVoidPlusTempsLocations(arena_, Primitive::kPrimLong, invoke, true);
1262}
1263
1264// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1265// memory model.
1266static void GenUnsafePut(LocationSummary* locations,
1267 Primitive::Type type,
1268 bool is_volatile,
1269 CodeGeneratorX86* codegen) {
1270 X86Assembler* assembler = reinterpret_cast<X86Assembler*>(codegen->GetAssembler());
1271 Register base = locations->InAt(1).AsRegister<Register>();
1272 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1273 Location value_loc = locations->InAt(3);
1274
1275 if (type == Primitive::kPrimLong) {
1276 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1277 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
1278 if (is_volatile) {
1279 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1280 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
1281 __ movd(temp1, value_lo);
1282 __ movd(temp2, value_hi);
1283 __ punpckldq(temp1, temp2);
1284 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
1285 } else {
1286 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
1287 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
1288 }
1289 } else {
1290 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
1291 }
1292
1293 if (is_volatile) {
1294 __ mfence();
1295 }
1296
1297 if (type == Primitive::kPrimNot) {
1298 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
1299 locations->GetTemp(1).AsRegister<Register>(),
1300 base,
1301 value_loc.AsRegister<Register>());
1302 }
1303}
1304
1305void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
1306 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1307}
1308void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1309 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, false, codegen_);
1310}
1311void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1312 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, true, codegen_);
1313}
1314void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
1315 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1316}
1317void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1318 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, false, codegen_);
1319}
1320void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1321 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, true, codegen_);
1322}
1323void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
1324 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1325}
1326void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1327 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, false, codegen_);
1328}
1329void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1330 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, true, codegen_);
1331}
1332
1333// Unimplemented intrinsics.
1334
1335#define UNIMPLEMENTED_INTRINSIC(Name) \
1336void IntrinsicLocationsBuilderX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1337} \
1338void IntrinsicCodeGeneratorX86::Visit ## Name(HInvoke* invoke ATTRIBUTE_UNUSED) { \
1339}
1340
1341UNIMPLEMENTED_INTRINSIC(IntegerReverse)
1342UNIMPLEMENTED_INTRINSIC(LongReverse)
1343UNIMPLEMENTED_INTRINSIC(LongReverseBytes)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001344UNIMPLEMENTED_INTRINSIC(MathRoundDouble)
Mark Mendell09ed1a32015-03-25 08:30:06 -04001345UNIMPLEMENTED_INTRINSIC(StringIndexOf)
1346UNIMPLEMENTED_INTRINSIC(StringIndexOfAfter)
1347UNIMPLEMENTED_INTRINSIC(SystemArrayCopyChar)
1348UNIMPLEMENTED_INTRINSIC(UnsafeCASInt)
1349UNIMPLEMENTED_INTRINSIC(UnsafeCASLong)
1350UNIMPLEMENTED_INTRINSIC(UnsafeCASObject)
1351UNIMPLEMENTED_INTRINSIC(ReferenceGetReferent)
1352
1353} // namespace x86
1354} // namespace art