blob: 8e4574774f89d4c2c8dbbf32a90ea1eebe701faf [file] [log] [blame]
Mark Mendell09ed1a32015-03-25 08:30:06 -04001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "intrinsics_x86.h"
18
Andreas Gampe21030dd2015-05-07 14:46:15 -070019#include <limits>
20
Mark Mendellfb8d2792015-03-31 22:16:59 -040021#include "arch/x86/instruction_set_features_x86.h"
Mathieu Chartiere401d142015-04-22 13:56:20 -070022#include "art_method.h"
Mark Mendelld5897672015-08-12 21:16:41 -040023#include "base/bit_utils.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040024#include "code_generator_x86.h"
25#include "entrypoints/quick/quick_entrypoints.h"
26#include "intrinsics.h"
Andreas Gampe85b62f22015-09-09 13:15:38 -070027#include "intrinsics_utils.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080028#include "lock_word.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040029#include "mirror/array-inl.h"
Andreas Gampec15a2f42017-04-21 12:09:39 -070030#include "mirror/object_array-inl.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080031#include "mirror/reference.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040032#include "mirror/string.h"
Andreas Gampec6ea7d02017-02-01 16:46:28 -080033#include "scoped_thread_state_change-inl.h"
34#include "thread-inl.h"
Mark Mendell09ed1a32015-03-25 08:30:06 -040035#include "utils/x86/assembler_x86.h"
36#include "utils/x86/constants_x86.h"
37
38namespace art {
39
40namespace x86 {
41
42static constexpr int kDoubleNaNHigh = 0x7FF80000;
43static constexpr int kDoubleNaNLow = 0x00000000;
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000044static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
45static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
Mark Mendell09ed1a32015-03-25 08:30:06 -040046
Mark Mendellfb8d2792015-03-31 22:16:59 -040047IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
Mark P Mendell2f10a5f2016-01-25 14:47:50 +000048 : arena_(codegen->GetGraph()->GetArena()),
49 codegen_(codegen) {
Mark Mendellfb8d2792015-03-31 22:16:59 -040050}
51
52
Mark Mendell09ed1a32015-03-25 08:30:06 -040053X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
Roland Levillainb488b782015-10-22 11:38:49 +010054 return down_cast<X86Assembler*>(codegen_->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -040055}
56
57ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
58 return codegen_->GetGraph()->GetArena();
59}
60
61bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
62 Dispatch(invoke);
63 LocationSummary* res = invoke->GetLocations();
Roland Levillain0d5a2812015-11-13 10:07:31 +000064 if (res == nullptr) {
65 return false;
66 }
Roland Levillain0d5a2812015-11-13 10:07:31 +000067 return res->Intrinsified();
Mark Mendell09ed1a32015-03-25 08:30:06 -040068}
69
Roland Levillainec525fc2015-04-28 15:50:20 +010070static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillain2d27c8e2015-04-28 15:48:45 +010071 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
Roland Levillainec525fc2015-04-28 15:50:20 +010072 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
Mark Mendell09ed1a32015-03-25 08:30:06 -040073}
74
Andreas Gampe85b62f22015-09-09 13:15:38 -070075using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
Mark Mendell09ed1a32015-03-25 08:30:06 -040076
Roland Levillain0b671c02016-08-19 12:02:34 +010077// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78#define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
79
80// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
81class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
82 public:
83 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
84 : SlowPathCode(instruction) {
85 DCHECK(kEmitCompilerReadBarrier);
86 DCHECK(kUseBakerReadBarrier);
87 }
88
89 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
90 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
91 LocationSummary* locations = instruction_->GetLocations();
92 DCHECK(locations->CanCall());
93 DCHECK(instruction_->IsInvokeStaticOrDirect())
94 << "Unexpected instruction in read barrier arraycopy slow path: "
95 << instruction_->DebugName();
96 DCHECK(instruction_->GetLocations()->Intrinsified());
97 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
98
99 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
100 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
101
102 Register src = locations->InAt(0).AsRegister<Register>();
103 Location src_pos = locations->InAt(1);
104 Register dest = locations->InAt(2).AsRegister<Register>();
105 Location dest_pos = locations->InAt(3);
106 Location length = locations->InAt(4);
107 Location temp1_loc = locations->GetTemp(0);
108 Register temp1 = temp1_loc.AsRegister<Register>();
109 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
110 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
111
112 __ Bind(GetEntryLabel());
113 // In this code path, registers `temp1`, `temp2`, and `temp3`
114 // (resp.) are not used for the base source address, the base
115 // destination address, and the end source address (resp.), as in
116 // other SystemArrayCopy intrinsic code paths. Instead they are
117 // (resp.) used for:
118 // - the loop index (`i`);
119 // - the source index (`src_index`) and the loaded (source)
120 // reference (`value`); and
121 // - the destination index (`dest_index`).
122
123 // i = 0
124 __ xorl(temp1, temp1);
125 NearLabel loop;
126 __ Bind(&loop);
127 // value = src_array[i + src_pos]
128 if (src_pos.IsConstant()) {
129 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
130 int32_t adjusted_offset = offset + constant * element_size;
131 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
132 } else {
133 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
134 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
135 }
136 __ MaybeUnpoisonHeapReference(temp2);
137 // TODO: Inline the mark bit check before calling the runtime?
138 // value = ReadBarrier::Mark(value)
139 // No need to save live registers; it's taken care of by the
140 // entrypoint. Also, there is no need to update the stack mask,
141 // as this runtime call will not trigger a garbage collection.
142 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
143 // explanations.)
144 DCHECK_NE(temp2, ESP);
145 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
146 int32_t entry_point_offset =
147 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
148 // This runtime call does not require a stack map.
149 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
150 __ MaybePoisonHeapReference(temp2);
151 // dest_array[i + dest_pos] = value
152 if (dest_pos.IsConstant()) {
153 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
154 int32_t adjusted_offset = offset + constant * element_size;
155 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
156 } else {
157 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
158 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
159 }
160 // ++i
161 __ addl(temp1, Immediate(1));
162 // if (i != length) goto loop
163 x86_codegen->GenerateIntCompare(temp1_loc, length);
164 __ j(kNotEqual, &loop);
165 __ jmp(GetExitLabel());
166 }
167
168 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
169
170 private:
171 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
172};
173
174#undef __
175
Mark Mendell09ed1a32015-03-25 08:30:06 -0400176#define __ assembler->
177
178static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
179 LocationSummary* locations = new (arena) LocationSummary(invoke,
180 LocationSummary::kNoCall,
181 kIntrinsified);
182 locations->SetInAt(0, Location::RequiresFpuRegister());
183 locations->SetOut(Location::RequiresRegister());
184 if (is64bit) {
185 locations->AddTemp(Location::RequiresFpuRegister());
186 }
187}
188
189static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
190 LocationSummary* locations = new (arena) LocationSummary(invoke,
191 LocationSummary::kNoCall,
192 kIntrinsified);
193 locations->SetInAt(0, Location::RequiresRegister());
194 locations->SetOut(Location::RequiresFpuRegister());
195 if (is64bit) {
196 locations->AddTemp(Location::RequiresFpuRegister());
197 locations->AddTemp(Location::RequiresFpuRegister());
198 }
199}
200
201static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
202 Location input = locations->InAt(0);
203 Location output = locations->Out();
204 if (is64bit) {
205 // Need to use the temporary.
206 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
207 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
208 __ movd(output.AsRegisterPairLow<Register>(), temp);
209 __ psrlq(temp, Immediate(32));
210 __ movd(output.AsRegisterPairHigh<Register>(), temp);
211 } else {
212 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
213 }
214}
215
216static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
217 Location input = locations->InAt(0);
218 Location output = locations->Out();
219 if (is64bit) {
220 // Need to use the temporary.
221 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
222 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
223 __ movd(temp1, input.AsRegisterPairLow<Register>());
224 __ movd(temp2, input.AsRegisterPairHigh<Register>());
225 __ punpckldq(temp1, temp2);
226 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
227 } else {
228 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
229 }
230}
231
232void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000233 CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400234}
235void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000236 CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400237}
238
239void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000240 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400241}
242void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000243 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400244}
245
246void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000247 CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400248}
249void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000250 CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400251}
252
253void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000254 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400255}
256void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000257 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400258}
259
260static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
261 LocationSummary* locations = new (arena) LocationSummary(invoke,
262 LocationSummary::kNoCall,
263 kIntrinsified);
264 locations->SetInAt(0, Location::RequiresRegister());
265 locations->SetOut(Location::SameAsFirstInput());
266}
267
268static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
269 LocationSummary* locations = new (arena) LocationSummary(invoke,
270 LocationSummary::kNoCall,
271 kIntrinsified);
272 locations->SetInAt(0, Location::RequiresRegister());
273 locations->SetOut(Location::RequiresRegister());
274}
275
276static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
277 LocationSummary* locations = new (arena) LocationSummary(invoke,
278 LocationSummary::kNoCall,
279 kIntrinsified);
280 locations->SetInAt(0, Location::RequiresRegister());
281 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
282}
283
284static void GenReverseBytes(LocationSummary* locations,
285 Primitive::Type size,
286 X86Assembler* assembler) {
287 Register out = locations->Out().AsRegister<Register>();
288
289 switch (size) {
290 case Primitive::kPrimShort:
291 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
292 __ bswapl(out);
293 __ sarl(out, Immediate(16));
294 break;
295 case Primitive::kPrimInt:
296 __ bswapl(out);
297 break;
298 default:
299 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
300 UNREACHABLE();
301 }
302}
303
304void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
305 CreateIntToIntLocations(arena_, invoke);
306}
307
308void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
309 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
310}
311
Mark Mendell58d25fd2015-04-03 14:52:31 -0400312void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
313 CreateLongToLongLocations(arena_, invoke);
314}
315
316void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
317 LocationSummary* locations = invoke->GetLocations();
318 Location input = locations->InAt(0);
319 Register input_lo = input.AsRegisterPairLow<Register>();
320 Register input_hi = input.AsRegisterPairHigh<Register>();
321 Location output = locations->Out();
322 Register output_lo = output.AsRegisterPairLow<Register>();
323 Register output_hi = output.AsRegisterPairHigh<Register>();
324
325 X86Assembler* assembler = GetAssembler();
326 // Assign the inputs to the outputs, mixing low/high.
327 __ movl(output_lo, input_hi);
328 __ movl(output_hi, input_lo);
329 __ bswapl(output_lo);
330 __ bswapl(output_hi);
331}
332
Mark Mendell09ed1a32015-03-25 08:30:06 -0400333void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
334 CreateIntToIntLocations(arena_, invoke);
335}
336
337void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
338 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
339}
340
341
342// TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
343// need is 64b.
344
345static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
346 // TODO: Enable memory operations when the assembler supports them.
347 LocationSummary* locations = new (arena) LocationSummary(invoke,
348 LocationSummary::kNoCall,
349 kIntrinsified);
350 locations->SetInAt(0, Location::RequiresFpuRegister());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400351 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000352 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
353 DCHECK(static_or_direct != nullptr);
Nicolas Geoffray97793072016-02-16 15:33:54 +0000354 if (static_or_direct->HasSpecialInput() &&
355 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000356 // We need addressibility for the constant area.
357 locations->SetInAt(1, Location::RequiresRegister());
358 // We need a temporary to hold the constant.
359 locations->AddTemp(Location::RequiresFpuRegister());
360 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400361}
362
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000363static void MathAbsFP(HInvoke* invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000364 bool is64bit,
365 X86Assembler* assembler,
366 CodeGeneratorX86* codegen) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000367 LocationSummary* locations = invoke->GetLocations();
Mark Mendell09ed1a32015-03-25 08:30:06 -0400368 Location output = locations->Out();
369
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000370 DCHECK(output.IsFpuRegister());
Nicolas Geoffray97793072016-02-16 15:33:54 +0000371 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000372 HX86ComputeBaseMethodAddress* method_address =
373 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000374 DCHECK(locations->InAt(1).IsRegister());
375 // We also have a constant area pointer.
376 Register constant_area = locations->InAt(1).AsRegister<Register>();
377 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
378 if (is64bit) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000379 __ movsd(temp, codegen->LiteralInt64Address(
380 INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000381 __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
382 } else {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000383 __ movss(temp, codegen->LiteralInt32Address(
384 INT32_C(0x7FFFFFFF), method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000385 __ andps(output.AsFpuRegister<XmmRegister>(), temp);
386 }
387 } else {
Mark Mendell09ed1a32015-03-25 08:30:06 -0400388 // Create the right constant on an aligned stack.
389 if (is64bit) {
390 __ subl(ESP, Immediate(8));
391 __ pushl(Immediate(0x7FFFFFFF));
392 __ pushl(Immediate(0xFFFFFFFF));
393 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
394 } else {
395 __ subl(ESP, Immediate(12));
396 __ pushl(Immediate(0x7FFFFFFF));
397 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
398 }
399 __ addl(ESP, Immediate(16));
Mark Mendell09ed1a32015-03-25 08:30:06 -0400400 }
401}
402
403void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
404 CreateFloatToFloat(arena_, invoke);
405}
406
407void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000408 MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400409}
410
411void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
412 CreateFloatToFloat(arena_, invoke);
413}
414
415void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000416 MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400417}
418
419static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
420 LocationSummary* locations = new (arena) LocationSummary(invoke,
421 LocationSummary::kNoCall,
422 kIntrinsified);
423 locations->SetInAt(0, Location::RegisterLocation(EAX));
424 locations->SetOut(Location::SameAsFirstInput());
425 locations->AddTemp(Location::RegisterLocation(EDX));
426}
427
428static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
429 Location output = locations->Out();
430 Register out = output.AsRegister<Register>();
431 DCHECK_EQ(out, EAX);
432 Register temp = locations->GetTemp(0).AsRegister<Register>();
433 DCHECK_EQ(temp, EDX);
434
435 // Sign extend EAX into EDX.
436 __ cdq();
437
438 // XOR EAX with sign.
439 __ xorl(EAX, EDX);
440
441 // Subtract out sign to correct.
442 __ subl(EAX, EDX);
443
444 // The result is in EAX.
445}
446
447static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
448 LocationSummary* locations = new (arena) LocationSummary(invoke,
449 LocationSummary::kNoCall,
450 kIntrinsified);
451 locations->SetInAt(0, Location::RequiresRegister());
452 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
453 locations->AddTemp(Location::RequiresRegister());
454}
455
456static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
457 Location input = locations->InAt(0);
458 Register input_lo = input.AsRegisterPairLow<Register>();
459 Register input_hi = input.AsRegisterPairHigh<Register>();
460 Location output = locations->Out();
461 Register output_lo = output.AsRegisterPairLow<Register>();
462 Register output_hi = output.AsRegisterPairHigh<Register>();
463 Register temp = locations->GetTemp(0).AsRegister<Register>();
464
465 // Compute the sign into the temporary.
466 __ movl(temp, input_hi);
467 __ sarl(temp, Immediate(31));
468
469 // Store the sign into the output.
470 __ movl(output_lo, temp);
471 __ movl(output_hi, temp);
472
473 // XOR the input to the output.
474 __ xorl(output_lo, input_lo);
475 __ xorl(output_hi, input_hi);
476
477 // Subtract the sign.
478 __ subl(output_lo, temp);
479 __ sbbl(output_hi, temp);
480}
481
482void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
483 CreateAbsIntLocation(arena_, invoke);
484}
485
486void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
487 GenAbsInteger(invoke->GetLocations(), GetAssembler());
488}
489
490void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
491 CreateAbsLongLocation(arena_, invoke);
492}
493
494void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
495 GenAbsLong(invoke->GetLocations(), GetAssembler());
496}
497
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000498static void GenMinMaxFP(HInvoke* invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000499 bool is_min,
500 bool is_double,
501 X86Assembler* assembler,
502 CodeGeneratorX86* codegen) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000503 LocationSummary* locations = invoke->GetLocations();
Mark Mendell09ed1a32015-03-25 08:30:06 -0400504 Location op1_loc = locations->InAt(0);
505 Location op2_loc = locations->InAt(1);
506 Location out_loc = locations->Out();
507 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
508
509 // Shortcut for same input locations.
510 if (op1_loc.Equals(op2_loc)) {
511 DCHECK(out_loc.Equals(op1_loc));
512 return;
513 }
514
515 // (out := op1)
516 // out <=? op2
517 // if Nan jmp Nan_label
518 // if out is min jmp done
519 // if op2 is min jmp op2_label
520 // handle -0/+0
521 // jmp done
522 // Nan_label:
523 // out := NaN
524 // op2_label:
525 // out := op2
526 // done:
527 //
528 // This removes one jmp, but needs to copy one input (op1) to out.
529 //
530 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
531
532 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
533
Mark Mendell0c9497d2015-08-21 09:30:05 -0400534 NearLabel nan, done, op2_label;
Mark Mendell09ed1a32015-03-25 08:30:06 -0400535 if (is_double) {
536 __ ucomisd(out, op2);
537 } else {
538 __ ucomiss(out, op2);
539 }
540
541 __ j(Condition::kParityEven, &nan);
542
543 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
544 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
545
546 // Handle 0.0/-0.0.
547 if (is_min) {
548 if (is_double) {
549 __ orpd(out, op2);
550 } else {
551 __ orps(out, op2);
552 }
553 } else {
554 if (is_double) {
555 __ andpd(out, op2);
556 } else {
557 __ andps(out, op2);
558 }
559 }
560 __ jmp(&done);
561
562 // NaN handling.
563 __ Bind(&nan);
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000564 // Do we have a constant area pointer?
Nicolas Geoffray97793072016-02-16 15:33:54 +0000565 if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000566 HX86ComputeBaseMethodAddress* method_address =
567 invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000568 DCHECK(locations->InAt(2).IsRegister());
569 Register constant_area = locations->InAt(2).AsRegister<Register>();
570 if (is_double) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000571 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000572 } else {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000573 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000574 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400575 } else {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000576 if (is_double) {
577 __ pushl(Immediate(kDoubleNaNHigh));
578 __ pushl(Immediate(kDoubleNaNLow));
579 __ movsd(out, Address(ESP, 0));
580 __ addl(ESP, Immediate(8));
581 } else {
582 __ pushl(Immediate(kFloatNaN));
583 __ movss(out, Address(ESP, 0));
584 __ addl(ESP, Immediate(4));
585 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400586 }
587 __ jmp(&done);
588
589 // out := op2;
590 __ Bind(&op2_label);
591 if (is_double) {
592 __ movsd(out, op2);
593 } else {
594 __ movss(out, op2);
595 }
596
597 // Done.
598 __ Bind(&done);
599}
600
601static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
602 LocationSummary* locations = new (arena) LocationSummary(invoke,
603 LocationSummary::kNoCall,
604 kIntrinsified);
605 locations->SetInAt(0, Location::RequiresFpuRegister());
606 locations->SetInAt(1, Location::RequiresFpuRegister());
607 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
608 // the second input to be the output (we can simply swap inputs).
609 locations->SetOut(Location::SameAsFirstInput());
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000610 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
611 DCHECK(static_or_direct != nullptr);
Nicolas Geoffray97793072016-02-16 15:33:54 +0000612 if (static_or_direct->HasSpecialInput() &&
613 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000614 locations->SetInAt(2, Location::RequiresRegister());
615 }
Mark Mendell09ed1a32015-03-25 08:30:06 -0400616}
617
618void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
619 CreateFPFPToFPLocations(arena_, invoke);
620}
621
622void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000623 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000624 /* is_min */ true,
625 /* is_double */ true,
626 GetAssembler(),
627 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400628}
629
630void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
631 CreateFPFPToFPLocations(arena_, invoke);
632}
633
634void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000635 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000636 /* is_min */ true,
637 /* is_double */ false,
638 GetAssembler(),
639 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400640}
641
642void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
643 CreateFPFPToFPLocations(arena_, invoke);
644}
645
646void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000647 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000648 /* is_min */ false,
649 /* is_double */ true,
650 GetAssembler(),
651 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400652}
653
654void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
655 CreateFPFPToFPLocations(arena_, invoke);
656}
657
658void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000659 GenMinMaxFP(invoke,
Mark P Mendell2f10a5f2016-01-25 14:47:50 +0000660 /* is_min */ false,
661 /* is_double */ false,
662 GetAssembler(),
663 codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -0400664}
665
666static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
667 X86Assembler* assembler) {
668 Location op1_loc = locations->InAt(0);
669 Location op2_loc = locations->InAt(1);
670
671 // Shortcut for same input locations.
672 if (op1_loc.Equals(op2_loc)) {
673 // Can return immediately, as op1_loc == out_loc.
674 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
675 // a copy here.
676 DCHECK(locations->Out().Equals(op1_loc));
677 return;
678 }
679
680 if (is_long) {
681 // Need to perform a subtract to get the sign right.
682 // op1 is already in the same location as the output.
683 Location output = locations->Out();
684 Register output_lo = output.AsRegisterPairLow<Register>();
685 Register output_hi = output.AsRegisterPairHigh<Register>();
686
687 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
688 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
689
690 // Spare register to compute the subtraction to set condition code.
691 Register temp = locations->GetTemp(0).AsRegister<Register>();
692
693 // Subtract off op2_low.
694 __ movl(temp, output_lo);
695 __ subl(temp, op2_lo);
696
697 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
698 __ movl(temp, output_hi);
699 __ sbbl(temp, op2_hi);
700
701 // Now the condition code is correct.
702 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
703 __ cmovl(cond, output_lo, op2_lo);
704 __ cmovl(cond, output_hi, op2_hi);
705 } else {
706 Register out = locations->Out().AsRegister<Register>();
707 Register op2 = op2_loc.AsRegister<Register>();
708
709 // (out := op1)
710 // out <=? op2
711 // if out is min jmp done
712 // out := op2
713 // done:
714
715 __ cmpl(out, op2);
716 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
717 __ cmovl(cond, out, op2);
718 }
719}
720
721static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
722 LocationSummary* locations = new (arena) LocationSummary(invoke,
723 LocationSummary::kNoCall,
724 kIntrinsified);
725 locations->SetInAt(0, Location::RequiresRegister());
726 locations->SetInAt(1, Location::RequiresRegister());
727 locations->SetOut(Location::SameAsFirstInput());
728}
729
730static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
731 LocationSummary* locations = new (arena) LocationSummary(invoke,
732 LocationSummary::kNoCall,
733 kIntrinsified);
734 locations->SetInAt(0, Location::RequiresRegister());
735 locations->SetInAt(1, Location::RequiresRegister());
736 locations->SetOut(Location::SameAsFirstInput());
737 // Register to use to perform a long subtract to set cc.
738 locations->AddTemp(Location::RequiresRegister());
739}
740
741void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
742 CreateIntIntToIntLocations(arena_, invoke);
743}
744
745void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000746 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400747}
748
749void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
750 CreateLongLongToLongLocations(arena_, invoke);
751}
752
753void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000754 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400755}
756
757void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
758 CreateIntIntToIntLocations(arena_, invoke);
759}
760
761void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000762 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400763}
764
765void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
766 CreateLongLongToLongLocations(arena_, invoke);
767}
768
769void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +0000770 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -0400771}
772
773static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
774 LocationSummary* locations = new (arena) LocationSummary(invoke,
775 LocationSummary::kNoCall,
776 kIntrinsified);
777 locations->SetInAt(0, Location::RequiresFpuRegister());
778 locations->SetOut(Location::RequiresFpuRegister());
779}
780
781void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
782 CreateFPToFPLocations(arena_, invoke);
783}
784
785void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
786 LocationSummary* locations = invoke->GetLocations();
787 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
788 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
789
790 GetAssembler()->sqrtsd(out, in);
791}
792
Mark Mendellfb8d2792015-03-31 22:16:59 -0400793static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
Roland Levillainec525fc2015-04-28 15:50:20 +0100794 MoveArguments(invoke, codegen);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400795
796 DCHECK(invoke->IsInvokeStaticOrDirect());
Nicolas Geoffray94015b92015-06-04 18:21:04 +0100797 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
798 Location::RegisterLocation(EAX));
Mingyao Yange90db122015-04-03 17:56:54 -0700799 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400800
801 // Copy the result back to the expected output.
802 Location out = invoke->GetLocations()->Out();
803 if (out.IsValid()) {
804 DCHECK(out.IsRegister());
Andreas Gampe85b62f22015-09-09 13:15:38 -0700805 codegen->MoveFromReturnRegister(out, invoke->GetType());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400806 }
807}
808
809static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
810 HInvoke* invoke,
811 CodeGeneratorX86* codegen) {
812 // Do we have instruction support?
813 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
814 CreateFPToFPLocations(arena, invoke);
815 return;
816 }
817
818 // We have to fall back to a call to the intrinsic.
819 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100820 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400821 InvokeRuntimeCallingConvention calling_convention;
822 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
823 locations->SetOut(Location::FpuRegisterLocation(XMM0));
824 // Needs to be EAX for the invoke.
825 locations->AddTemp(Location::RegisterLocation(EAX));
826}
827
828static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
829 HInvoke* invoke,
830 X86Assembler* assembler,
831 int round_mode) {
832 LocationSummary* locations = invoke->GetLocations();
833 if (locations->WillCall()) {
834 InvokeOutOfLineIntrinsic(codegen, invoke);
835 } else {
836 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
837 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
838 __ roundsd(out, in, Immediate(round_mode));
839 }
840}
841
842void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
843 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
844}
845
846void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
847 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
848}
849
850void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
851 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
852}
853
854void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
855 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
856}
857
858void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
859 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
860}
861
862void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
863 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
864}
865
Mark Mendellfb8d2792015-03-31 22:16:59 -0400866void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
867 // Do we have instruction support?
868 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
Aart Bik2c9f4952016-08-01 16:52:27 -0700869 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
870 DCHECK(static_or_direct != nullptr);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400871 LocationSummary* locations = new (arena_) LocationSummary(invoke,
872 LocationSummary::kNoCall,
873 kIntrinsified);
874 locations->SetInAt(0, Location::RequiresFpuRegister());
Aart Bik2c9f4952016-08-01 16:52:27 -0700875 if (static_or_direct->HasSpecialInput() &&
876 invoke->InputAt(
877 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
878 locations->SetInAt(1, Location::RequiresRegister());
879 }
Nicolas Geoffrayd9b92402015-04-21 10:02:22 +0100880 locations->SetOut(Location::RequiresRegister());
Mark Mendellfb8d2792015-03-31 22:16:59 -0400881 locations->AddTemp(Location::RequiresFpuRegister());
882 locations->AddTemp(Location::RequiresFpuRegister());
883 return;
884 }
885
886 // We have to fall back to a call to the intrinsic.
887 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Aart Bik2c9f4952016-08-01 16:52:27 -0700888 LocationSummary::kCallOnMainOnly);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400889 InvokeRuntimeCallingConvention calling_convention;
890 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
891 locations->SetOut(Location::RegisterLocation(EAX));
892 // Needs to be EAX for the invoke.
893 locations->AddTemp(Location::RegisterLocation(EAX));
894}
895
896void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
897 LocationSummary* locations = invoke->GetLocations();
Aart Bik2c9f4952016-08-01 16:52:27 -0700898 if (locations->WillCall()) { // TODO: can we reach this?
Mark Mendellfb8d2792015-03-31 22:16:59 -0400899 InvokeOutOfLineIntrinsic(codegen_, invoke);
900 return;
901 }
902
Mark Mendellfb8d2792015-03-31 22:16:59 -0400903 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
Aart Bik2c9f4952016-08-01 16:52:27 -0700904 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
905 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
Mark Mendellfb8d2792015-03-31 22:16:59 -0400906 Register out = locations->Out().AsRegister<Register>();
Aart Bik2c9f4952016-08-01 16:52:27 -0700907 NearLabel skip_incr, done;
Mark Mendellfb8d2792015-03-31 22:16:59 -0400908 X86Assembler* assembler = GetAssembler();
909
Aart Bik2c9f4952016-08-01 16:52:27 -0700910 // Since no direct x86 rounding instruction matches the required semantics,
911 // this intrinsic is implemented as follows:
912 // result = floor(in);
913 // if (in - result >= 0.5f)
914 // result = result + 1.0f;
915 __ movss(t2, in);
916 __ roundss(t1, in, Immediate(1));
917 __ subss(t2, t1);
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700918 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
919 // Direct constant area available.
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000920 HX86ComputeBaseMethodAddress* method_address =
921 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700922 Register constant_area = locations->InAt(1).AsRegister<Register>();
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000923 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
924 method_address,
925 constant_area));
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700926 __ j(kBelow, &skip_incr);
Nicolas Geoffray133719e2017-01-22 15:44:39 +0000927 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
928 method_address,
929 constant_area));
Aart Bik0cf8d9c2016-08-10 14:05:54 -0700930 __ Bind(&skip_incr);
931 } else {
932 // No constant area: go through stack.
933 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
934 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
935 __ comiss(t2, Address(ESP, 4));
936 __ j(kBelow, &skip_incr);
937 __ addss(t1, Address(ESP, 0));
938 __ Bind(&skip_incr);
939 __ addl(ESP, Immediate(8));
940 }
Mark Mendellfb8d2792015-03-31 22:16:59 -0400941
Aart Bik2c9f4952016-08-01 16:52:27 -0700942 // Final conversion to an integer. Unfortunately this also does not have a
943 // direct x86 instruction, since NaN should map to 0 and large positive
944 // values need to be clipped to the extreme value.
Mark Mendellfb8d2792015-03-31 22:16:59 -0400945 __ movl(out, Immediate(kPrimIntMax));
Aart Bik2c9f4952016-08-01 16:52:27 -0700946 __ cvtsi2ss(t2, out);
947 __ comiss(t1, t2);
948 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
949 __ movl(out, Immediate(0)); // does not change flags
950 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
951 __ cvttss2si(out, t1);
Mark Mendellfb8d2792015-03-31 22:16:59 -0400952 __ Bind(&done);
953}
954
Mark Mendella4f12202015-08-06 15:23:34 -0400955static void CreateFPToFPCallLocations(ArenaAllocator* arena,
956 HInvoke* invoke) {
957 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +0100958 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -0400959 kIntrinsified);
960 InvokeRuntimeCallingConvention calling_convention;
961 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
962 locations->SetOut(Location::FpuRegisterLocation(XMM0));
963}
964
965static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
966 LocationSummary* locations = invoke->GetLocations();
967 DCHECK(locations->WillCall());
968 DCHECK(invoke->IsInvokeStaticOrDirect());
969 X86Assembler* assembler = codegen->GetAssembler();
970
971 // We need some place to pass the parameters.
972 __ subl(ESP, Immediate(16));
973 __ cfi().AdjustCFAOffset(16);
974
975 // Pass the parameters at the bottom of the stack.
976 __ movsd(Address(ESP, 0), XMM0);
977
978 // If we have a second parameter, pass it next.
979 if (invoke->GetNumberOfArguments() == 2) {
980 __ movsd(Address(ESP, 8), XMM1);
981 }
982
983 // Now do the actual call.
Serban Constantinescuba45db02016-07-12 22:53:02 +0100984 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
Mark Mendella4f12202015-08-06 15:23:34 -0400985
986 // Extract the return value from the FP stack.
987 __ fstpl(Address(ESP, 0));
988 __ movsd(XMM0, Address(ESP, 0));
989
990 // And clean up the stack.
991 __ addl(ESP, Immediate(16));
992 __ cfi().AdjustCFAOffset(-16);
Mark Mendella4f12202015-08-06 15:23:34 -0400993}
994
995void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
996 CreateFPToFPCallLocations(arena_, invoke);
997}
998
999void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
1000 GenFPToFPCall(invoke, codegen_, kQuickCos);
1001}
1002
1003void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
1004 CreateFPToFPCallLocations(arena_, invoke);
1005}
1006
1007void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
1008 GenFPToFPCall(invoke, codegen_, kQuickSin);
1009}
1010
1011void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
1012 CreateFPToFPCallLocations(arena_, invoke);
1013}
1014
1015void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
1016 GenFPToFPCall(invoke, codegen_, kQuickAcos);
1017}
1018
1019void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
1020 CreateFPToFPCallLocations(arena_, invoke);
1021}
1022
1023void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
1024 GenFPToFPCall(invoke, codegen_, kQuickAsin);
1025}
1026
1027void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
1028 CreateFPToFPCallLocations(arena_, invoke);
1029}
1030
1031void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
1032 GenFPToFPCall(invoke, codegen_, kQuickAtan);
1033}
1034
1035void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
1036 CreateFPToFPCallLocations(arena_, invoke);
1037}
1038
1039void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
1040 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1041}
1042
1043void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
1044 CreateFPToFPCallLocations(arena_, invoke);
1045}
1046
1047void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
1048 GenFPToFPCall(invoke, codegen_, kQuickCosh);
1049}
1050
1051void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
1052 CreateFPToFPCallLocations(arena_, invoke);
1053}
1054
1055void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
1056 GenFPToFPCall(invoke, codegen_, kQuickExp);
1057}
1058
1059void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
1060 CreateFPToFPCallLocations(arena_, invoke);
1061}
1062
1063void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
1064 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1065}
1066
1067void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
1068 CreateFPToFPCallLocations(arena_, invoke);
1069}
1070
1071void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
1072 GenFPToFPCall(invoke, codegen_, kQuickLog);
1073}
1074
1075void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
1076 CreateFPToFPCallLocations(arena_, invoke);
1077}
1078
1079void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
1080 GenFPToFPCall(invoke, codegen_, kQuickLog10);
1081}
1082
1083void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
1084 CreateFPToFPCallLocations(arena_, invoke);
1085}
1086
1087void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
1088 GenFPToFPCall(invoke, codegen_, kQuickSinh);
1089}
1090
1091void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
1092 CreateFPToFPCallLocations(arena_, invoke);
1093}
1094
1095void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
1096 GenFPToFPCall(invoke, codegen_, kQuickTan);
1097}
1098
1099void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
1100 CreateFPToFPCallLocations(arena_, invoke);
1101}
1102
1103void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
1104 GenFPToFPCall(invoke, codegen_, kQuickTanh);
1105}
1106
1107static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
1108 HInvoke* invoke) {
1109 LocationSummary* locations = new (arena) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001110 LocationSummary::kCallOnMainOnly,
Mark Mendella4f12202015-08-06 15:23:34 -04001111 kIntrinsified);
1112 InvokeRuntimeCallingConvention calling_convention;
1113 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
1114 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
1115 locations->SetOut(Location::FpuRegisterLocation(XMM0));
1116}
1117
1118void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
1119 CreateFPFPToFPCallLocations(arena_, invoke);
1120}
1121
1122void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
1123 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1124}
1125
1126void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
1127 CreateFPFPToFPCallLocations(arena_, invoke);
1128}
1129
1130void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
1131 GenFPToFPCall(invoke, codegen_, kQuickHypot);
1132}
1133
1134void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
1135 CreateFPFPToFPCallLocations(arena_, invoke);
1136}
1137
1138void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
1139 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1140}
1141
Mark Mendell6bc53a92015-07-01 14:26:52 -04001142void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1143 // We need at least two of the positions or length to be an integer constant,
1144 // or else we won't have enough free registers.
1145 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1146 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1147 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1148
1149 int num_constants =
1150 ((src_pos != nullptr) ? 1 : 0)
1151 + ((dest_pos != nullptr) ? 1 : 0)
1152 + ((length != nullptr) ? 1 : 0);
1153
1154 if (num_constants < 2) {
1155 // Not enough free registers.
1156 return;
1157 }
1158
1159 // As long as we are checking, we might as well check to see if the src and dest
1160 // positions are >= 0.
1161 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1162 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
1163 // We will have to fail anyways.
1164 return;
1165 }
1166
1167 // And since we are already checking, check the length too.
1168 if (length != nullptr) {
1169 int32_t len = length->GetValue();
1170 if (len < 0) {
1171 // Just call as normal.
1172 return;
1173 }
1174 }
1175
1176 // Okay, it is safe to generate inline code.
1177 LocationSummary* locations =
1178 new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1179 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
1180 locations->SetInAt(0, Location::RequiresRegister());
1181 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1182 locations->SetInAt(2, Location::RequiresRegister());
1183 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
1184 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
1185
1186 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1187 locations->AddTemp(Location::RegisterLocation(ESI));
1188 locations->AddTemp(Location::RegisterLocation(EDI));
1189 locations->AddTemp(Location::RegisterLocation(ECX));
1190}
1191
1192static void CheckPosition(X86Assembler* assembler,
1193 Location pos,
1194 Register input,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001195 Location length,
Andreas Gampe85b62f22015-09-09 13:15:38 -07001196 SlowPathCode* slow_path,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001197 Register temp,
1198 bool length_is_input_length = false) {
1199 // Where is the length in the Array?
Mark Mendell6bc53a92015-07-01 14:26:52 -04001200 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1201
1202 if (pos.IsConstant()) {
1203 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1204 if (pos_const == 0) {
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001205 if (!length_is_input_length) {
1206 // Check that length(input) >= length.
1207 if (length.IsConstant()) {
1208 __ cmpl(Address(input, length_offset),
1209 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1210 } else {
1211 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
1212 }
1213 __ j(kLess, slow_path->GetEntryLabel());
1214 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001215 } else {
1216 // Check that length(input) >= pos.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001217 __ movl(temp, Address(input, length_offset));
1218 __ subl(temp, Immediate(pos_const));
Mark Mendell6bc53a92015-07-01 14:26:52 -04001219 __ j(kLess, slow_path->GetEntryLabel());
1220
1221 // Check that (length(input) - pos) >= length.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001222 if (length.IsConstant()) {
1223 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1224 } else {
1225 __ cmpl(temp, length.AsRegister<Register>());
1226 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001227 __ j(kLess, slow_path->GetEntryLabel());
1228 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001229 } else if (length_is_input_length) {
1230 // The only way the copy can succeed is if pos is zero.
1231 Register pos_reg = pos.AsRegister<Register>();
1232 __ testl(pos_reg, pos_reg);
1233 __ j(kNotEqual, slow_path->GetEntryLabel());
Mark Mendell6bc53a92015-07-01 14:26:52 -04001234 } else {
1235 // Check that pos >= 0.
1236 Register pos_reg = pos.AsRegister<Register>();
1237 __ testl(pos_reg, pos_reg);
1238 __ j(kLess, slow_path->GetEntryLabel());
1239
1240 // Check that pos <= length(input).
1241 __ cmpl(Address(input, length_offset), pos_reg);
1242 __ j(kLess, slow_path->GetEntryLabel());
1243
1244 // Check that (length(input) - pos) >= length.
1245 __ movl(temp, Address(input, length_offset));
1246 __ subl(temp, pos_reg);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001247 if (length.IsConstant()) {
1248 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1249 } else {
1250 __ cmpl(temp, length.AsRegister<Register>());
1251 }
Mark Mendell6bc53a92015-07-01 14:26:52 -04001252 __ j(kLess, slow_path->GetEntryLabel());
1253 }
1254}
1255
1256void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1257 X86Assembler* assembler = GetAssembler();
1258 LocationSummary* locations = invoke->GetLocations();
1259
1260 Register src = locations->InAt(0).AsRegister<Register>();
1261 Location srcPos = locations->InAt(1);
1262 Register dest = locations->InAt(2).AsRegister<Register>();
1263 Location destPos = locations->InAt(3);
1264 Location length = locations->InAt(4);
1265
1266 // Temporaries that we need for MOVSW.
1267 Register src_base = locations->GetTemp(0).AsRegister<Register>();
1268 DCHECK_EQ(src_base, ESI);
1269 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
1270 DCHECK_EQ(dest_base, EDI);
1271 Register count = locations->GetTemp(2).AsRegister<Register>();
1272 DCHECK_EQ(count, ECX);
1273
Andreas Gampe85b62f22015-09-09 13:15:38 -07001274 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001275 codegen_->AddSlowPath(slow_path);
1276
1277 // Bail out if the source and destination are the same (to handle overlap).
1278 __ cmpl(src, dest);
1279 __ j(kEqual, slow_path->GetEntryLabel());
1280
1281 // Bail out if the source is null.
1282 __ testl(src, src);
1283 __ j(kEqual, slow_path->GetEntryLabel());
1284
1285 // Bail out if the destination is null.
1286 __ testl(dest, dest);
1287 __ j(kEqual, slow_path->GetEntryLabel());
1288
1289 // If the length is negative, bail out.
1290 // We have already checked in the LocationsBuilder for the constant case.
1291 if (!length.IsConstant()) {
1292 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
1293 __ j(kLess, slow_path->GetEntryLabel());
1294 }
1295
1296 // We need the count in ECX.
1297 if (length.IsConstant()) {
1298 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1299 } else {
1300 __ movl(count, length.AsRegister<Register>());
1301 }
1302
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001303 // Validity checks: source. Use src_base as a temporary register.
1304 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001305
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01001306 // Validity checks: dest. Use src_base as a temporary register.
1307 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
Mark Mendell6bc53a92015-07-01 14:26:52 -04001308
1309 // Okay, everything checks out. Finally time to do the copy.
1310 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1311 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1312 DCHECK_EQ(char_size, 2u);
1313
1314 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1315
1316 if (srcPos.IsConstant()) {
1317 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
1318 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
1319 } else {
1320 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
1321 ScaleFactor::TIMES_2, data_offset));
1322 }
1323 if (destPos.IsConstant()) {
1324 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
1325
1326 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
1327 } else {
1328 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
1329 ScaleFactor::TIMES_2, data_offset));
1330 }
1331
1332 // Do the move.
1333 __ rep_movsw();
1334
1335 __ Bind(slow_path->GetExitLabel());
1336}
1337
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001338void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1339 // The inputs plus one temp.
1340 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001341 LocationSummary::kCallOnMainAndSlowPath,
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001342 kIntrinsified);
1343 InvokeRuntimeCallingConvention calling_convention;
1344 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1345 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1346 locations->SetOut(Location::RegisterLocation(EAX));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001347}
1348
1349void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1350 X86Assembler* assembler = GetAssembler();
1351 LocationSummary* locations = invoke->GetLocations();
1352
Nicolas Geoffray512e04d2015-03-27 17:21:24 +00001353 // Note that the null check must have been done earlier.
Calin Juravle641547a2015-04-21 22:08:51 +01001354 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001355
1356 Register argument = locations->InAt(1).AsRegister<Register>();
1357 __ testl(argument, argument);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001358 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001359 codegen_->AddSlowPath(slow_path);
1360 __ j(kEqual, slow_path->GetEntryLabel());
1361
Serban Constantinescuba45db02016-07-12 22:53:02 +01001362 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
Nicolas Geoffrayd75948a2015-03-27 09:53:16 +00001363 __ Bind(slow_path->GetExitLabel());
1364}
1365
Agi Csakid7138c82015-08-13 17:46:44 -07001366void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1367 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1368 LocationSummary::kNoCall,
1369 kIntrinsified);
1370 locations->SetInAt(0, Location::RequiresRegister());
1371 locations->SetInAt(1, Location::RequiresRegister());
1372
1373 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1374 locations->AddTemp(Location::RegisterLocation(ECX));
1375 locations->AddTemp(Location::RegisterLocation(EDI));
1376
1377 // Set output, ESI needed for repe_cmpsl instruction anyways.
1378 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1379}
1380
1381void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1382 X86Assembler* assembler = GetAssembler();
1383 LocationSummary* locations = invoke->GetLocations();
1384
1385 Register str = locations->InAt(0).AsRegister<Register>();
1386 Register arg = locations->InAt(1).AsRegister<Register>();
1387 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1388 Register edi = locations->GetTemp(1).AsRegister<Register>();
1389 Register esi = locations->Out().AsRegister<Register>();
1390
Mark Mendell0c9497d2015-08-21 09:30:05 -04001391 NearLabel end, return_true, return_false;
Agi Csakid7138c82015-08-13 17:46:44 -07001392
1393 // Get offsets of count, value, and class fields within a string object.
1394 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1395 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1396 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1397
1398 // Note that the null check must have been done earlier.
1399 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1400
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001401 StringEqualsOptimizations optimizations(invoke);
1402 if (!optimizations.GetArgumentNotNull()) {
1403 // Check if input is null, return false if it is.
1404 __ testl(arg, arg);
1405 __ j(kEqual, &return_false);
1406 }
Agi Csakid7138c82015-08-13 17:46:44 -07001407
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001408 if (!optimizations.GetArgumentIsString()) {
Vladimir Marko53b52002016-05-24 19:30:45 +01001409 // Instanceof check for the argument by comparing class fields.
1410 // All string objects must have the same type since String cannot be subclassed.
1411 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1412 // If the argument is a string object, its class field must be equal to receiver's class field.
Nicolas Geoffraya83a54d2015-10-02 17:30:26 +01001413 __ movl(ecx, Address(str, class_offset));
1414 __ cmpl(ecx, Address(arg, class_offset));
1415 __ j(kNotEqual, &return_false);
1416 }
Agi Csakid7138c82015-08-13 17:46:44 -07001417
1418 // Reference equality check, return true if same reference.
1419 __ cmpl(str, arg);
1420 __ j(kEqual, &return_true);
1421
jessicahandojo4877b792016-09-08 19:49:13 -07001422 // Load length and compression flag of receiver string.
Agi Csakid7138c82015-08-13 17:46:44 -07001423 __ movl(ecx, Address(str, count_offset));
jessicahandojo4877b792016-09-08 19:49:13 -07001424 // Check if lengths and compression flags are equal, return false if they're not.
1425 // Two identical strings will always have same compression style since
1426 // compression style is decided on alloc.
Agi Csakid7138c82015-08-13 17:46:44 -07001427 __ cmpl(ecx, Address(arg, count_offset));
1428 __ j(kNotEqual, &return_false);
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001429 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1430 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1431 "Expecting 0=compressed, 1=uncompressed");
1432 __ jecxz(&return_true);
Agi Csakid7138c82015-08-13 17:46:44 -07001433
jessicahandojo4877b792016-09-08 19:49:13 -07001434 if (mirror::kUseStringCompression) {
1435 NearLabel string_uncompressed;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001436 // Extract length and differentiate between both compressed or both uncompressed.
1437 // Different compression style is cut above.
1438 __ shrl(ecx, Immediate(1));
1439 __ j(kCarrySet, &string_uncompressed);
jessicahandojo4877b792016-09-08 19:49:13 -07001440 // Divide string length by 2, rounding up, and continue as if uncompressed.
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001441 __ addl(ecx, Immediate(1));
jessicahandojo4877b792016-09-08 19:49:13 -07001442 __ shrl(ecx, Immediate(1));
1443 __ Bind(&string_uncompressed);
1444 }
Agi Csakid7138c82015-08-13 17:46:44 -07001445 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1446 __ leal(esi, Address(str, value_offset));
1447 __ leal(edi, Address(arg, value_offset));
1448
jessicahandojo4877b792016-09-08 19:49:13 -07001449 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1450 // divisible by 2.
Agi Csakid7138c82015-08-13 17:46:44 -07001451 __ addl(ecx, Immediate(1));
1452 __ shrl(ecx, Immediate(1));
1453
jessicahandojo4877b792016-09-08 19:49:13 -07001454 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1455 // or 4 characters (compressed) at a time.
Agi Csakid7138c82015-08-13 17:46:44 -07001456 DCHECK_ALIGNED(value_offset, 4);
1457 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1458
1459 // Loop to compare strings two characters at a time starting at the beginning of the string.
1460 __ repe_cmpsl();
1461 // If strings are not equal, zero flag will be cleared.
1462 __ j(kNotEqual, &return_false);
1463
1464 // Return true and exit the function.
1465 // If loop does not result in returning false, we return true.
1466 __ Bind(&return_true);
1467 __ movl(esi, Immediate(1));
1468 __ jmp(&end);
1469
1470 // Return false and exit the function.
1471 __ Bind(&return_false);
1472 __ xorl(esi, esi);
1473 __ Bind(&end);
1474}
1475
Andreas Gampe21030dd2015-05-07 14:46:15 -07001476static void CreateStringIndexOfLocations(HInvoke* invoke,
1477 ArenaAllocator* allocator,
1478 bool start_at_zero) {
1479 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1480 LocationSummary::kCallOnSlowPath,
1481 kIntrinsified);
1482 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1483 locations->SetInAt(0, Location::RegisterLocation(EDI));
1484 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1485 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1486 // of the instruction explicitly.
1487 // Note: This works as we don't clobber EAX anywhere.
1488 locations->SetInAt(1, Location::RegisterLocation(EAX));
1489 if (!start_at_zero) {
1490 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1491 }
1492 // As we clobber EDI during execution anyways, also use it as the output.
1493 locations->SetOut(Location::SameAsFirstInput());
1494
1495 // repne scasw uses ECX as the counter.
1496 locations->AddTemp(Location::RegisterLocation(ECX));
1497 // Need another temporary to be able to compute the result.
1498 locations->AddTemp(Location::RequiresRegister());
jessicahandojo4877b792016-09-08 19:49:13 -07001499 if (mirror::kUseStringCompression) {
1500 // Need another temporary to be able to save unflagged string length.
1501 locations->AddTemp(Location::RequiresRegister());
1502 }
Andreas Gampe21030dd2015-05-07 14:46:15 -07001503}
1504
1505static void GenerateStringIndexOf(HInvoke* invoke,
1506 X86Assembler* assembler,
1507 CodeGeneratorX86* codegen,
1508 ArenaAllocator* allocator,
1509 bool start_at_zero) {
1510 LocationSummary* locations = invoke->GetLocations();
1511
1512 // Note that the null check must have been done earlier.
1513 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1514
1515 Register string_obj = locations->InAt(0).AsRegister<Register>();
1516 Register search_value = locations->InAt(1).AsRegister<Register>();
1517 Register counter = locations->GetTemp(0).AsRegister<Register>();
1518 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1519 Register out = locations->Out().AsRegister<Register>();
jessicahandojo4877b792016-09-08 19:49:13 -07001520 // Only used when string compression feature is on.
1521 Register string_length_flagged;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001522
1523 // Check our assumptions for registers.
1524 DCHECK_EQ(string_obj, EDI);
1525 DCHECK_EQ(search_value, EAX);
1526 DCHECK_EQ(counter, ECX);
1527 DCHECK_EQ(out, EDI);
1528
1529 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001530 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
Andreas Gampe85b62f22015-09-09 13:15:38 -07001531 SlowPathCode* slow_path = nullptr;
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001532 HInstruction* code_point = invoke->InputAt(1);
1533 if (code_point->IsIntConstant()) {
Vladimir Markoda051082016-05-17 16:10:20 +01001534 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
Andreas Gampe21030dd2015-05-07 14:46:15 -07001535 std::numeric_limits<uint16_t>::max()) {
1536 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1537 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1538 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1539 codegen->AddSlowPath(slow_path);
1540 __ jmp(slow_path->GetEntryLabel());
1541 __ Bind(slow_path->GetExitLabel());
1542 return;
1543 }
Vladimir Markofb6c90a2016-05-06 15:52:12 +01001544 } else if (code_point->GetType() != Primitive::kPrimChar) {
Andreas Gampe21030dd2015-05-07 14:46:15 -07001545 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1546 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1547 codegen->AddSlowPath(slow_path);
1548 __ j(kAbove, slow_path->GetEntryLabel());
1549 }
1550
1551 // From here down, we know that we are looking for a char that fits in 16 bits.
1552 // Location of reference to data array within the String object.
1553 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1554 // Location of count within the String object.
1555 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1556
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001557 // Load the count field of the string containing the length and compression flag.
Andreas Gampe21030dd2015-05-07 14:46:15 -07001558 __ movl(string_length, Address(string_obj, count_offset));
1559
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001560 // Do a zero-length check. Even with string compression `count == 0` means empty.
1561 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1562 "Expecting 0=compressed, 1=uncompressed");
Andreas Gampe21030dd2015-05-07 14:46:15 -07001563 // TODO: Support jecxz.
Mark Mendell0c9497d2015-08-21 09:30:05 -04001564 NearLabel not_found_label;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001565 __ testl(string_length, string_length);
1566 __ j(kEqual, &not_found_label);
1567
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001568 if (mirror::kUseStringCompression) {
1569 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1570 __ movl(string_length_flagged, string_length);
1571 // Extract the length and shift out the least significant bit used as compression flag.
1572 __ shrl(string_length, Immediate(1));
1573 }
1574
Andreas Gampe21030dd2015-05-07 14:46:15 -07001575 if (start_at_zero) {
1576 // Number of chars to scan is the same as the string length.
1577 __ movl(counter, string_length);
1578
1579 // Move to the start of the string.
1580 __ addl(string_obj, Immediate(value_offset));
1581 } else {
1582 Register start_index = locations->InAt(2).AsRegister<Register>();
1583
1584 // Do a start_index check.
1585 __ cmpl(start_index, string_length);
1586 __ j(kGreaterEqual, &not_found_label);
1587
1588 // Ensure we have a start index >= 0;
1589 __ xorl(counter, counter);
1590 __ cmpl(start_index, Immediate(0));
1591 __ cmovl(kGreater, counter, start_index);
1592
jessicahandojo4877b792016-09-08 19:49:13 -07001593 if (mirror::kUseStringCompression) {
1594 NearLabel modify_counter, offset_uncompressed_label;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001595 __ testl(string_length_flagged, Immediate(1));
1596 __ j(kNotZero, &offset_uncompressed_label);
jessicahandojo4877b792016-09-08 19:49:13 -07001597 // Move to the start of the string: string_obj + value_offset + start_index.
1598 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1599 __ jmp(&modify_counter);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001600
jessicahandojo4877b792016-09-08 19:49:13 -07001601 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1602 __ Bind(&offset_uncompressed_label);
1603 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1604
1605 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1606 // compare.
1607 __ Bind(&modify_counter);
1608 } else {
1609 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1610 }
Andreas Gampe21030dd2015-05-07 14:46:15 -07001611 __ negl(counter);
1612 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1613 }
1614
jessicahandojo4877b792016-09-08 19:49:13 -07001615 if (mirror::kUseStringCompression) {
1616 NearLabel uncompressed_string_comparison;
1617 NearLabel comparison_done;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001618 __ testl(string_length_flagged, Immediate(1));
1619 __ j(kNotZero, &uncompressed_string_comparison);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001620
jessicahandojo4877b792016-09-08 19:49:13 -07001621 // Check if EAX (search_value) is ASCII.
1622 __ cmpl(search_value, Immediate(127));
1623 __ j(kGreater, &not_found_label);
1624 // Comparing byte-per-byte.
1625 __ repne_scasb();
1626 __ jmp(&comparison_done);
1627
1628 // Everything is set up for repne scasw:
1629 // * Comparison address in EDI.
1630 // * Counter in ECX.
1631 __ Bind(&uncompressed_string_comparison);
1632 __ repne_scasw();
1633 __ Bind(&comparison_done);
1634 } else {
1635 __ repne_scasw();
1636 }
Andreas Gampe21030dd2015-05-07 14:46:15 -07001637 // Did we find a match?
1638 __ j(kNotEqual, &not_found_label);
1639
1640 // Yes, we matched. Compute the index of the result.
1641 __ subl(string_length, counter);
1642 __ leal(out, Address(string_length, -1));
1643
Mark Mendell0c9497d2015-08-21 09:30:05 -04001644 NearLabel done;
Andreas Gampe21030dd2015-05-07 14:46:15 -07001645 __ jmp(&done);
1646
1647 // Failed to match; return -1.
1648 __ Bind(&not_found_label);
1649 __ movl(out, Immediate(-1));
1650
1651 // And join up at the end.
1652 __ Bind(&done);
1653 if (slow_path != nullptr) {
1654 __ Bind(slow_path->GetExitLabel());
1655 }
1656}
1657
1658void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001659 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001660}
1661
1662void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001663 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001664}
1665
1666void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001667 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001668}
1669
1670void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00001671 GenerateStringIndexOf(
1672 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
Andreas Gampe21030dd2015-05-07 14:46:15 -07001673}
1674
Jeff Hao848f70a2014-01-15 13:49:50 -08001675void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1676 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001677 LocationSummary::kCallOnMainAndSlowPath,
Jeff Hao848f70a2014-01-15 13:49:50 -08001678 kIntrinsified);
1679 InvokeRuntimeCallingConvention calling_convention;
1680 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1681 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1682 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1683 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1684 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001685}
1686
1687void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1688 X86Assembler* assembler = GetAssembler();
1689 LocationSummary* locations = invoke->GetLocations();
1690
1691 Register byte_array = locations->InAt(0).AsRegister<Register>();
1692 __ testl(byte_array, byte_array);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001693 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001694 codegen_->AddSlowPath(slow_path);
1695 __ j(kEqual, slow_path->GetEntryLabel());
1696
Serban Constantinescuba45db02016-07-12 22:53:02 +01001697 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00001698 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001699 __ Bind(slow_path->GetExitLabel());
1700}
1701
1702void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1703 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu54ff4822016-07-07 18:03:19 +01001704 LocationSummary::kCallOnMainOnly,
Jeff Hao848f70a2014-01-15 13:49:50 -08001705 kIntrinsified);
1706 InvokeRuntimeCallingConvention calling_convention;
1707 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1708 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1709 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1710 locations->SetOut(Location::RegisterLocation(EAX));
1711}
1712
1713void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
Roland Levillaincc3839c2016-02-29 16:23:48 +00001714 // No need to emit code checking whether `locations->InAt(2)` is a null
1715 // pointer, as callers of the native method
1716 //
1717 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1718 //
1719 // all include a null check on `data` before calling that method.
Serban Constantinescuba45db02016-07-12 22:53:02 +01001720 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00001721 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001722}
1723
1724void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1725 LocationSummary* locations = new (arena_) LocationSummary(invoke,
Serban Constantinescu806f0122016-03-09 11:10:16 +00001726 LocationSummary::kCallOnMainAndSlowPath,
Jeff Hao848f70a2014-01-15 13:49:50 -08001727 kIntrinsified);
1728 InvokeRuntimeCallingConvention calling_convention;
1729 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1730 locations->SetOut(Location::RegisterLocation(EAX));
Jeff Hao848f70a2014-01-15 13:49:50 -08001731}
1732
1733void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1734 X86Assembler* assembler = GetAssembler();
1735 LocationSummary* locations = invoke->GetLocations();
1736
1737 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1738 __ testl(string_to_copy, string_to_copy);
Andreas Gampe85b62f22015-09-09 13:15:38 -07001739 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
Jeff Hao848f70a2014-01-15 13:49:50 -08001740 codegen_->AddSlowPath(slow_path);
1741 __ j(kEqual, slow_path->GetEntryLabel());
1742
Serban Constantinescuba45db02016-07-12 22:53:02 +01001743 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
Roland Levillainf969a202016-03-09 16:14:00 +00001744 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
Jeff Hao848f70a2014-01-15 13:49:50 -08001745 __ Bind(slow_path->GetExitLabel());
1746}
1747
Mark Mendell8f8926a2015-08-17 11:39:06 -04001748void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1749 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1750 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1751 LocationSummary::kNoCall,
1752 kIntrinsified);
1753 locations->SetInAt(0, Location::RequiresRegister());
1754 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1755 // Place srcEnd in ECX to save a move below.
1756 locations->SetInAt(2, Location::RegisterLocation(ECX));
1757 locations->SetInAt(3, Location::RequiresRegister());
1758 locations->SetInAt(4, Location::RequiresRegister());
1759
1760 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1761 // We don't have enough registers to also grab ECX, so handle below.
1762 locations->AddTemp(Location::RegisterLocation(ESI));
1763 locations->AddTemp(Location::RegisterLocation(EDI));
1764}
1765
1766void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1767 X86Assembler* assembler = GetAssembler();
1768 LocationSummary* locations = invoke->GetLocations();
1769
1770 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1771 // Location of data in char array buffer.
1772 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1773 // Location of char array data in string.
1774 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1775
1776 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1777 Register obj = locations->InAt(0).AsRegister<Register>();
1778 Location srcBegin = locations->InAt(1);
1779 int srcBegin_value =
1780 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1781 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1782 Register dst = locations->InAt(3).AsRegister<Register>();
1783 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1784
1785 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1786 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1787 DCHECK_EQ(char_size, 2u);
1788
Mark Mendell8f8926a2015-08-17 11:39:06 -04001789 // Compute the number of chars (words) to move.
jessicahandojo4877b792016-09-08 19:49:13 -07001790 // Save ECX, since we don't know if it will be used later.
Mark Mendell8f8926a2015-08-17 11:39:06 -04001791 __ pushl(ECX);
1792 int stack_adjust = kX86WordSize;
1793 __ cfi().AdjustCFAOffset(stack_adjust);
1794 DCHECK_EQ(srcEnd, ECX);
1795 if (srcBegin.IsConstant()) {
jessicahandojo4877b792016-09-08 19:49:13 -07001796 __ subl(ECX, Immediate(srcBegin_value));
Mark Mendell8f8926a2015-08-17 11:39:06 -04001797 } else {
1798 DCHECK(srcBegin.IsRegister());
1799 __ subl(ECX, srcBegin.AsRegister<Register>());
1800 }
1801
jessicahandojo4877b792016-09-08 19:49:13 -07001802 NearLabel done;
1803 if (mirror::kUseStringCompression) {
1804 // Location of count in string
1805 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1806 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1807 DCHECK_EQ(c_char_size, 1u);
1808 __ pushl(EAX);
1809 __ cfi().AdjustCFAOffset(stack_adjust);
1810
1811 NearLabel copy_loop, copy_uncompressed;
Vladimir Markofdaf0f42016-10-13 19:29:53 +01001812 __ testl(Address(obj, count_offset), Immediate(1));
1813 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1814 "Expecting 0=compressed, 1=uncompressed");
1815 __ j(kNotZero, &copy_uncompressed);
jessicahandojo4877b792016-09-08 19:49:13 -07001816 // Compute the address of the source string by adding the number of chars from
1817 // the source beginning to the value offset of a string.
1818 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1819
1820 // Start the loop to copy String's value to Array of Char.
1821 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1822 __ Bind(&copy_loop);
1823 __ jecxz(&done);
1824 // Use EAX temporary (convert byte from ESI to word).
1825 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1826 __ movzxb(EAX, Address(ESI, 0));
1827 __ movw(Address(EDI, 0), EAX);
1828 __ leal(EDI, Address(EDI, char_size));
1829 __ leal(ESI, Address(ESI, c_char_size));
1830 // TODO: Add support for LOOP to X86Assembler.
1831 __ subl(ECX, Immediate(1));
1832 __ jmp(&copy_loop);
1833 __ Bind(&copy_uncompressed);
1834 }
1835
1836 // Do the copy for uncompressed string.
1837 // Compute the address of the destination buffer.
1838 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1839 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
Mark Mendell8f8926a2015-08-17 11:39:06 -04001840 __ rep_movsw();
1841
jessicahandojo4877b792016-09-08 19:49:13 -07001842 __ Bind(&done);
1843 if (mirror::kUseStringCompression) {
1844 // Restore EAX.
1845 __ popl(EAX);
1846 __ cfi().AdjustCFAOffset(-stack_adjust);
1847 }
1848 // Restore ECX.
Mark Mendell8f8926a2015-08-17 11:39:06 -04001849 __ popl(ECX);
1850 __ cfi().AdjustCFAOffset(-stack_adjust);
1851}
1852
Mark Mendell09ed1a32015-03-25 08:30:06 -04001853static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1854 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1855 Location out_loc = locations->Out();
1856 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1857 // to avoid a SIGBUS.
1858 switch (size) {
1859 case Primitive::kPrimByte:
1860 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1861 break;
1862 case Primitive::kPrimShort:
1863 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1864 break;
1865 case Primitive::kPrimInt:
1866 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1867 break;
1868 case Primitive::kPrimLong:
1869 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1870 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1871 break;
1872 default:
1873 LOG(FATAL) << "Type not recognized for peek: " << size;
1874 UNREACHABLE();
1875 }
1876}
1877
1878void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1879 CreateLongToIntLocations(arena_, invoke);
1880}
1881
1882void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1883 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1884}
1885
1886void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1887 CreateLongToIntLocations(arena_, invoke);
1888}
1889
1890void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1891 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1892}
1893
1894void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1895 CreateLongToLongLocations(arena_, invoke);
1896}
1897
1898void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1899 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1900}
1901
1902void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1903 CreateLongToIntLocations(arena_, invoke);
1904}
1905
1906void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1907 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1908}
1909
1910static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1911 HInvoke* invoke) {
1912 LocationSummary* locations = new (arena) LocationSummary(invoke,
1913 LocationSummary::kNoCall,
1914 kIntrinsified);
1915 locations->SetInAt(0, Location::RequiresRegister());
Roland Levillain4c0eb422015-04-24 16:43:49 +01001916 HInstruction* value = invoke->InputAt(1);
Mark Mendell09ed1a32015-03-25 08:30:06 -04001917 if (size == Primitive::kPrimByte) {
1918 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1919 } else {
1920 locations->SetInAt(1, Location::RegisterOrConstant(value));
1921 }
1922}
1923
1924static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1925 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1926 Location value_loc = locations->InAt(1);
1927 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1928 // to avoid a SIGBUS.
1929 switch (size) {
1930 case Primitive::kPrimByte:
1931 if (value_loc.IsConstant()) {
1932 __ movb(Address(address, 0),
1933 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1934 } else {
1935 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1936 }
1937 break;
1938 case Primitive::kPrimShort:
1939 if (value_loc.IsConstant()) {
1940 __ movw(Address(address, 0),
1941 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1942 } else {
1943 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1944 }
1945 break;
1946 case Primitive::kPrimInt:
1947 if (value_loc.IsConstant()) {
1948 __ movl(Address(address, 0),
1949 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1950 } else {
1951 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1952 }
1953 break;
1954 case Primitive::kPrimLong:
1955 if (value_loc.IsConstant()) {
1956 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1957 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1958 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1959 } else {
1960 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1961 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1962 }
1963 break;
1964 default:
1965 LOG(FATAL) << "Type not recognized for poke: " << size;
1966 UNREACHABLE();
1967 }
1968}
1969
1970void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1971 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1972}
1973
1974void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1975 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1976}
1977
1978void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1979 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1980}
1981
1982void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1983 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1984}
1985
1986void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1987 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1988}
1989
1990void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1991 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1992}
1993
1994void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1995 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1996}
1997
1998void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1999 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
2000}
2001
2002void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
2003 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2004 LocationSummary::kNoCall,
2005 kIntrinsified);
2006 locations->SetOut(Location::RequiresRegister());
2007}
2008
2009void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
2010 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
Andreas Gampe542451c2016-07-26 09:02:02 -07002011 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
Mark Mendell09ed1a32015-03-25 08:30:06 -04002012}
2013
Roland Levillain0d5a2812015-11-13 10:07:31 +00002014static void GenUnsafeGet(HInvoke* invoke,
2015 Primitive::Type type,
2016 bool is_volatile,
2017 CodeGeneratorX86* codegen) {
2018 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2019 LocationSummary* locations = invoke->GetLocations();
2020 Location base_loc = locations->InAt(1);
2021 Register base = base_loc.AsRegister<Register>();
2022 Location offset_loc = locations->InAt(2);
2023 Register offset = offset_loc.AsRegisterPairLow<Register>();
2024 Location output_loc = locations->Out();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002025
2026 switch (type) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002027 case Primitive::kPrimInt: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00002028 Register output = output_loc.AsRegister<Register>();
2029 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
Roland Levillain7c1559a2015-12-15 10:55:36 +00002030 break;
2031 }
2032
2033 case Primitive::kPrimNot: {
2034 Register output = output_loc.AsRegister<Register>();
2035 if (kEmitCompilerReadBarrier) {
2036 if (kUseBakerReadBarrier) {
Sang, Chunlei0fcd2b82016-04-05 17:12:59 +08002037 Address src(base, offset, ScaleFactor::TIMES_1, 0);
2038 codegen->GenerateReferenceLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00002039 invoke, output_loc, base, src, /* needs_null_check */ false);
Roland Levillain7c1559a2015-12-15 10:55:36 +00002040 } else {
2041 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2042 codegen->GenerateReadBarrierSlow(
2043 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
2044 }
2045 } else {
2046 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2047 __ MaybeUnpoisonHeapReference(output);
Roland Levillain4d027112015-07-01 15:41:14 +01002048 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04002049 break;
Roland Levillain4d027112015-07-01 15:41:14 +01002050 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04002051
2052 case Primitive::kPrimLong: {
Roland Levillain0d5a2812015-11-13 10:07:31 +00002053 Register output_lo = output_loc.AsRegisterPairLow<Register>();
2054 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002055 if (is_volatile) {
2056 // Need to use a XMM to read atomically.
2057 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2058 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
2059 __ movd(output_lo, temp);
2060 __ psrlq(temp, Immediate(32));
2061 __ movd(output_hi, temp);
2062 } else {
2063 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
2064 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
2065 }
2066 }
2067 break;
2068
2069 default:
2070 LOG(FATAL) << "Unsupported op size " << type;
2071 UNREACHABLE();
2072 }
2073}
2074
Roland Levillain7c1559a2015-12-15 10:55:36 +00002075static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
2076 HInvoke* invoke,
2077 Primitive::Type type,
2078 bool is_volatile) {
Roland Levillain0d5a2812015-11-13 10:07:31 +00002079 bool can_call = kEmitCompilerReadBarrier &&
2080 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2081 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002082 LocationSummary* locations = new (arena) LocationSummary(invoke,
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002083 (can_call
2084 ? LocationSummary::kCallOnSlowPath
2085 : LocationSummary::kNoCall),
Mark Mendell09ed1a32015-03-25 08:30:06 -04002086 kIntrinsified);
Vladimir Marko70e97462016-08-09 11:04:26 +01002087 if (can_call && kUseBakerReadBarrier) {
Vladimir Marko804b03f2016-09-14 16:26:36 +01002088 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
Vladimir Marko70e97462016-08-09 11:04:26 +01002089 }
Mark Mendell09ed1a32015-03-25 08:30:06 -04002090 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2091 locations->SetInAt(1, Location::RequiresRegister());
2092 locations->SetInAt(2, Location::RequiresRegister());
Roland Levillain7c1559a2015-12-15 10:55:36 +00002093 if (type == Primitive::kPrimLong) {
Mark Mendell09ed1a32015-03-25 08:30:06 -04002094 if (is_volatile) {
2095 // Need to use XMM to read volatile.
2096 locations->AddTemp(Location::RequiresFpuRegister());
Roland Levillain3d312422016-06-23 13:53:42 +01002097 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002098 } else {
2099 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2100 }
2101 } else {
Roland Levillain3d312422016-06-23 13:53:42 +01002102 locations->SetOut(Location::RequiresRegister(),
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002103 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
Mark Mendell09ed1a32015-03-25 08:30:06 -04002104 }
2105}
2106
2107void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002108 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002109}
2110void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002111 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002112}
2113void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002114 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002115}
2116void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002117 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002118}
2119void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002120 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002121}
2122void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillain7c1559a2015-12-15 10:55:36 +00002123 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002124}
2125
2126
2127void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002128 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002129}
2130void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002131 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002132}
2133void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002134 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002135}
2136void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002137 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002138}
2139void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002140 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002141}
2142void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002143 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002144}
2145
2146
2147static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
2148 Primitive::Type type,
2149 HInvoke* invoke,
2150 bool is_volatile) {
2151 LocationSummary* locations = new (arena) LocationSummary(invoke,
2152 LocationSummary::kNoCall,
2153 kIntrinsified);
2154 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2155 locations->SetInAt(1, Location::RequiresRegister());
2156 locations->SetInAt(2, Location::RequiresRegister());
2157 locations->SetInAt(3, Location::RequiresRegister());
2158 if (type == Primitive::kPrimNot) {
2159 // Need temp registers for card-marking.
Roland Levillain4d027112015-07-01 15:41:14 +01002160 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell09ed1a32015-03-25 08:30:06 -04002161 // Ensure the value is in a byte register.
2162 locations->AddTemp(Location::RegisterLocation(ECX));
2163 } else if (type == Primitive::kPrimLong && is_volatile) {
2164 locations->AddTemp(Location::RequiresFpuRegister());
2165 locations->AddTemp(Location::RequiresFpuRegister());
2166 }
2167}
2168
2169void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002170 CreateIntIntIntIntToVoidPlusTempsLocations(
2171 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002172}
2173void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002174 CreateIntIntIntIntToVoidPlusTempsLocations(
2175 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002176}
2177void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002178 CreateIntIntIntIntToVoidPlusTempsLocations(
2179 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002180}
2181void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002182 CreateIntIntIntIntToVoidPlusTempsLocations(
2183 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002184}
2185void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002186 CreateIntIntIntIntToVoidPlusTempsLocations(
2187 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002188}
2189void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002190 CreateIntIntIntIntToVoidPlusTempsLocations(
2191 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002192}
2193void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002194 CreateIntIntIntIntToVoidPlusTempsLocations(
2195 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002196}
2197void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002198 CreateIntIntIntIntToVoidPlusTempsLocations(
2199 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002200}
2201void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002202 CreateIntIntIntIntToVoidPlusTempsLocations(
2203 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002204}
2205
2206// We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2207// memory model.
2208static void GenUnsafePut(LocationSummary* locations,
2209 Primitive::Type type,
2210 bool is_volatile,
2211 CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002212 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell09ed1a32015-03-25 08:30:06 -04002213 Register base = locations->InAt(1).AsRegister<Register>();
2214 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2215 Location value_loc = locations->InAt(3);
2216
2217 if (type == Primitive::kPrimLong) {
2218 Register value_lo = value_loc.AsRegisterPairLow<Register>();
2219 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2220 if (is_volatile) {
2221 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2222 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2223 __ movd(temp1, value_lo);
2224 __ movd(temp2, value_hi);
2225 __ punpckldq(temp1, temp2);
2226 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2227 } else {
2228 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2229 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2230 }
Roland Levillain4d027112015-07-01 15:41:14 +01002231 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2232 Register temp = locations->GetTemp(0).AsRegister<Register>();
2233 __ movl(temp, value_loc.AsRegister<Register>());
2234 __ PoisonHeapReference(temp);
2235 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002236 } else {
2237 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2238 }
2239
2240 if (is_volatile) {
Mark P Mendell17077d82015-12-16 19:15:59 +00002241 codegen->MemoryFence();
Mark Mendell09ed1a32015-03-25 08:30:06 -04002242 }
2243
2244 if (type == Primitive::kPrimNot) {
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002245 bool value_can_be_null = true; // TODO: Worth finding out this information?
Mark Mendell09ed1a32015-03-25 08:30:06 -04002246 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2247 locations->GetTemp(1).AsRegister<Register>(),
2248 base,
Nicolas Geoffray07276db2015-05-18 14:22:09 +01002249 value_loc.AsRegister<Register>(),
2250 value_can_be_null);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002251 }
2252}
2253
2254void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002255 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002256}
2257void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002258 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002259}
2260void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002261 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002262}
2263void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002264 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002265}
2266void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002267 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002268}
2269void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002270 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002271}
2272void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002273 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002274}
2275void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002276 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002277}
2278void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
Roland Levillainbf84a3d2015-12-04 14:33:02 +00002279 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
Mark Mendell09ed1a32015-03-25 08:30:06 -04002280}
2281
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002282static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
2283 Primitive::Type type,
Mark Mendell58d25fd2015-04-03 14:52:31 -04002284 HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002285 bool can_call = kEmitCompilerReadBarrier &&
2286 kUseBakerReadBarrier &&
2287 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002288 LocationSummary* locations = new (arena) LocationSummary(invoke,
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002289 (can_call
2290 ? LocationSummary::kCallOnSlowPath
2291 : LocationSummary::kNoCall),
Mark Mendell58d25fd2015-04-03 14:52:31 -04002292 kIntrinsified);
2293 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2294 locations->SetInAt(1, Location::RequiresRegister());
2295 // Offset is a long, but in 32 bit mode, we only need the low word.
2296 // Can we update the invoke here to remove a TypeConvert to Long?
2297 locations->SetInAt(2, Location::RequiresRegister());
2298 // Expected value must be in EAX or EDX:EAX.
2299 // For long, new value must be in ECX:EBX.
2300 if (type == Primitive::kPrimLong) {
2301 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2302 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2303 } else {
2304 locations->SetInAt(3, Location::RegisterLocation(EAX));
2305 locations->SetInAt(4, Location::RequiresRegister());
2306 }
2307
2308 // Force a byte register for the output.
2309 locations->SetOut(Location::RegisterLocation(EAX));
2310 if (type == Primitive::kPrimNot) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002311 // Need temporary registers for card-marking, and possibly for
2312 // (Baker) read barrier.
Roland Levillainb488b782015-10-22 11:38:49 +01002313 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002314 // Need a byte register for marking.
2315 locations->AddTemp(Location::RegisterLocation(ECX));
2316 }
2317}
2318
2319void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2320 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2321}
2322
2323void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2324 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2325}
2326
2327void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002328 // The only read barrier implementation supporting the
2329 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2330 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Roland Levillain391b8662015-12-18 11:43:38 +00002331 return;
2332 }
2333
Mark Mendell58d25fd2015-04-03 14:52:31 -04002334 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2335}
2336
2337static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
Roland Levillainb488b782015-10-22 11:38:49 +01002338 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
Mark Mendell58d25fd2015-04-03 14:52:31 -04002339 LocationSummary* locations = invoke->GetLocations();
2340
2341 Register base = locations->InAt(1).AsRegister<Register>();
2342 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2343 Location out = locations->Out();
2344 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2345
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002346 // The address of the field within the holding object.
2347 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2348
Roland Levillainb488b782015-10-22 11:38:49 +01002349 if (type == Primitive::kPrimNot) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002350 // The only read barrier implementation supporting the
2351 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2352 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2353
2354 Location temp1_loc = locations->GetTemp(0);
2355 Register temp1 = temp1_loc.AsRegister<Register>();
2356 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2357
Roland Levillain4d027112015-07-01 15:41:14 +01002358 Register expected = locations->InAt(3).AsRegister<Register>();
Roland Levillainb488b782015-10-22 11:38:49 +01002359 // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
Roland Levillain4d027112015-07-01 15:41:14 +01002360 DCHECK_EQ(expected, EAX);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002361 Register value = locations->InAt(4).AsRegister<Register>();
Roland Levillain4d027112015-07-01 15:41:14 +01002362
Roland Levillainb488b782015-10-22 11:38:49 +01002363 // Mark card for object assuming new value is stored.
2364 bool value_can_be_null = true; // TODO: Worth finding out this information?
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002365 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2366
2367 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2368 // Need to make sure the reference stored in the field is a to-space
2369 // one before attempting the CAS or the CAS could fail incorrectly.
2370 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2371 invoke,
2372 temp1_loc, // Unused, used only as a "temporary" within the read barrier.
2373 base,
2374 field_addr,
2375 /* needs_null_check */ false,
2376 /* always_update_field */ true,
2377 &temp2);
2378 }
Roland Levillainb488b782015-10-22 11:38:49 +01002379
2380 bool base_equals_value = (base == value);
2381 if (kPoisonHeapReferences) {
2382 if (base_equals_value) {
2383 // If `base` and `value` are the same register location, move
2384 // `value` to a temporary register. This way, poisoning
2385 // `value` won't invalidate `base`.
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002386 value = temp1;
Roland Levillainb488b782015-10-22 11:38:49 +01002387 __ movl(value, base);
Roland Levillain4d027112015-07-01 15:41:14 +01002388 }
Roland Levillainb488b782015-10-22 11:38:49 +01002389
2390 // Check that the register allocator did not assign the location
2391 // of `expected` (EAX) to `value` nor to `base`, so that heap
2392 // poisoning (when enabled) works as intended below.
2393 // - If `value` were equal to `expected`, both references would
2394 // be poisoned twice, meaning they would not be poisoned at
2395 // all, as heap poisoning uses address negation.
2396 // - If `base` were equal to `expected`, poisoning `expected`
2397 // would invalidate `base`.
2398 DCHECK_NE(value, expected);
2399 DCHECK_NE(base, expected);
2400
2401 __ PoisonHeapReference(expected);
2402 __ PoisonHeapReference(value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002403 }
2404
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002405 __ LockCmpxchgl(field_addr, value);
Mark Mendell58d25fd2015-04-03 14:52:31 -04002406
Roland Levillain0d5a2812015-11-13 10:07:31 +00002407 // LOCK CMPXCHG has full barrier semantics, and we don't need
Roland Levillainb488b782015-10-22 11:38:49 +01002408 // scheduling barriers at this time.
Mark Mendell58d25fd2015-04-03 14:52:31 -04002409
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002410 // Convert ZF into the Boolean result.
Roland Levillainb488b782015-10-22 11:38:49 +01002411 __ setb(kZero, out.AsRegister<Register>());
2412 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002413
Roland Levillain391b8662015-12-18 11:43:38 +00002414 // If heap poisoning is enabled, we need to unpoison the values
2415 // that were poisoned earlier.
Roland Levillainb488b782015-10-22 11:38:49 +01002416 if (kPoisonHeapReferences) {
2417 if (base_equals_value) {
2418 // `value` has been moved to a temporary register, no need to
2419 // unpoison it.
2420 } else {
2421 // Ensure `value` is different from `out`, so that unpoisoning
2422 // the former does not invalidate the latter.
2423 DCHECK_NE(value, out.AsRegister<Register>());
2424 __ UnpoisonHeapReference(value);
2425 }
2426 // Do not unpoison the reference contained in register
2427 // `expected`, as it is the same as register `out` (EAX).
2428 }
2429 } else {
2430 if (type == Primitive::kPrimInt) {
2431 // Ensure the expected value is in EAX (required by the CMPXCHG
2432 // instruction).
2433 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002434 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
Roland Levillainb488b782015-10-22 11:38:49 +01002435 } else if (type == Primitive::kPrimLong) {
2436 // Ensure the expected value is in EAX:EDX and that the new
2437 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2438 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2439 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2440 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2441 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002442 __ LockCmpxchg8b(field_addr);
Roland Levillainb488b782015-10-22 11:38:49 +01002443 } else {
2444 LOG(FATAL) << "Unexpected CAS type " << type;
2445 }
2446
Roland Levillain0d5a2812015-11-13 10:07:31 +00002447 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2448 // don't need scheduling barriers at this time.
Roland Levillainb488b782015-10-22 11:38:49 +01002449
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002450 // Convert ZF into the Boolean result.
Roland Levillainb488b782015-10-22 11:38:49 +01002451 __ setb(kZero, out.AsRegister<Register>());
2452 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
Roland Levillain4d027112015-07-01 15:41:14 +01002453 }
Mark Mendell58d25fd2015-04-03 14:52:31 -04002454}
2455
2456void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2457 GenCAS(Primitive::kPrimInt, invoke, codegen_);
2458}
2459
2460void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2461 GenCAS(Primitive::kPrimLong, invoke, codegen_);
2462}
2463
2464void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
Roland Levillaina1aa3b12016-10-26 13:03:38 +01002465 // The only read barrier implementation supporting the
2466 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2467 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
Roland Levillain3d312422016-06-23 13:53:42 +01002468
Mark Mendell58d25fd2015-04-03 14:52:31 -04002469 GenCAS(Primitive::kPrimNot, invoke, codegen_);
2470}
2471
2472void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2473 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2474 LocationSummary::kNoCall,
2475 kIntrinsified);
2476 locations->SetInAt(0, Location::RequiresRegister());
2477 locations->SetOut(Location::SameAsFirstInput());
2478 locations->AddTemp(Location::RequiresRegister());
2479}
2480
2481static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2482 X86Assembler* assembler) {
2483 Immediate imm_shift(shift);
2484 Immediate imm_mask(mask);
2485 __ movl(temp, reg);
2486 __ shrl(reg, imm_shift);
2487 __ andl(temp, imm_mask);
2488 __ andl(reg, imm_mask);
2489 __ shll(temp, imm_shift);
2490 __ orl(reg, temp);
2491}
2492
2493void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002494 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002495 LocationSummary* locations = invoke->GetLocations();
2496
2497 Register reg = locations->InAt(0).AsRegister<Register>();
2498 Register temp = locations->GetTemp(0).AsRegister<Register>();
2499
2500 /*
2501 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2502 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2503 * compared to generic luni implementation which has 5 rounds of swapping bits.
2504 * x = bswap x
2505 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2506 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2507 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2508 */
2509 __ bswapl(reg);
2510 SwapBits(reg, temp, 1, 0x55555555, assembler);
2511 SwapBits(reg, temp, 2, 0x33333333, assembler);
2512 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2513}
2514
2515void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2516 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2517 LocationSummary::kNoCall,
2518 kIntrinsified);
2519 locations->SetInAt(0, Location::RequiresRegister());
2520 locations->SetOut(Location::SameAsFirstInput());
2521 locations->AddTemp(Location::RequiresRegister());
2522}
2523
2524void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002525 X86Assembler* assembler = GetAssembler();
Mark Mendell58d25fd2015-04-03 14:52:31 -04002526 LocationSummary* locations = invoke->GetLocations();
2527
2528 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2529 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2530 Register temp = locations->GetTemp(0).AsRegister<Register>();
2531
2532 // We want to swap high/low, then bswap each one, and then do the same
2533 // as a 32 bit reverse.
2534 // Exchange high and low.
2535 __ movl(temp, reg_low);
2536 __ movl(reg_low, reg_high);
2537 __ movl(reg_high, temp);
2538
2539 // bit-reverse low
2540 __ bswapl(reg_low);
2541 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2542 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2543 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2544
2545 // bit-reverse high
2546 __ bswapl(reg_high);
2547 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2548 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2549 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2550}
2551
Aart Bikc39dac12016-01-21 08:59:48 -08002552static void CreateBitCountLocations(
2553 ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2554 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2555 // Do nothing if there is no popcnt support. This results in generating
2556 // a call for the intrinsic rather than direct code.
2557 return;
2558 }
2559 LocationSummary* locations = new (arena) LocationSummary(invoke,
2560 LocationSummary::kNoCall,
2561 kIntrinsified);
2562 if (is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002563 locations->AddTemp(Location::RequiresRegister());
Aart Bikc39dac12016-01-21 08:59:48 -08002564 }
Aart Bik2a946072016-01-21 12:49:00 -08002565 locations->SetInAt(0, Location::Any());
Aart Bikc39dac12016-01-21 08:59:48 -08002566 locations->SetOut(Location::RequiresRegister());
2567}
2568
Aart Bika19616e2016-02-01 18:57:58 -08002569static void GenBitCount(X86Assembler* assembler,
2570 CodeGeneratorX86* codegen,
2571 HInvoke* invoke, bool is_long) {
Aart Bikc39dac12016-01-21 08:59:48 -08002572 LocationSummary* locations = invoke->GetLocations();
2573 Location src = locations->InAt(0);
2574 Register out = locations->Out().AsRegister<Register>();
2575
2576 if (invoke->InputAt(0)->IsConstant()) {
2577 // Evaluate this at compile time.
2578 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
Roland Levillainfa3912e2016-04-01 18:21:55 +01002579 int32_t result = is_long
Aart Bikc39dac12016-01-21 08:59:48 -08002580 ? POPCOUNT(static_cast<uint64_t>(value))
2581 : POPCOUNT(static_cast<uint32_t>(value));
Roland Levillainfa3912e2016-04-01 18:21:55 +01002582 codegen->Load32BitValue(out, result);
Aart Bikc39dac12016-01-21 08:59:48 -08002583 return;
2584 }
2585
2586 // Handle the non-constant cases.
2587 if (!is_long) {
2588 if (src.IsRegister()) {
2589 __ popcntl(out, src.AsRegister<Register>());
2590 } else {
2591 DCHECK(src.IsStackSlot());
2592 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2593 }
Aart Bik2a946072016-01-21 12:49:00 -08002594 } else {
2595 // The 64-bit case needs to worry about two parts.
2596 Register temp = locations->GetTemp(0).AsRegister<Register>();
2597 if (src.IsRegisterPair()) {
2598 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2599 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2600 } else {
2601 DCHECK(src.IsDoubleStackSlot());
2602 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2603 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2604 }
2605 __ addl(out, temp);
Aart Bikc39dac12016-01-21 08:59:48 -08002606 }
Aart Bikc39dac12016-01-21 08:59:48 -08002607}
2608
2609void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2610 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
2611}
2612
2613void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002614 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
Aart Bikc39dac12016-01-21 08:59:48 -08002615}
2616
2617void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2618 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
2619}
2620
2621void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002622 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
Aart Bikc39dac12016-01-21 08:59:48 -08002623}
2624
Mark Mendelld5897672015-08-12 21:16:41 -04002625static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2626 LocationSummary* locations = new (arena) LocationSummary(invoke,
2627 LocationSummary::kNoCall,
2628 kIntrinsified);
2629 if (is_long) {
2630 locations->SetInAt(0, Location::RequiresRegister());
2631 } else {
2632 locations->SetInAt(0, Location::Any());
2633 }
2634 locations->SetOut(Location::RequiresRegister());
2635}
2636
Aart Bika19616e2016-02-01 18:57:58 -08002637static void GenLeadingZeros(X86Assembler* assembler,
2638 CodeGeneratorX86* codegen,
2639 HInvoke* invoke, bool is_long) {
Mark Mendelld5897672015-08-12 21:16:41 -04002640 LocationSummary* locations = invoke->GetLocations();
2641 Location src = locations->InAt(0);
2642 Register out = locations->Out().AsRegister<Register>();
2643
2644 if (invoke->InputAt(0)->IsConstant()) {
2645 // Evaluate this at compile time.
2646 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2647 if (value == 0) {
2648 value = is_long ? 64 : 32;
2649 } else {
2650 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2651 }
Aart Bika19616e2016-02-01 18:57:58 -08002652 codegen->Load32BitValue(out, value);
Mark Mendelld5897672015-08-12 21:16:41 -04002653 return;
2654 }
2655
2656 // Handle the non-constant cases.
2657 if (!is_long) {
2658 if (src.IsRegister()) {
2659 __ bsrl(out, src.AsRegister<Register>());
2660 } else {
2661 DCHECK(src.IsStackSlot());
2662 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2663 }
2664
2665 // BSR sets ZF if the input was zero, and the output is undefined.
Mark Mendell0c9497d2015-08-21 09:30:05 -04002666 NearLabel all_zeroes, done;
Mark Mendelld5897672015-08-12 21:16:41 -04002667 __ j(kEqual, &all_zeroes);
2668
2669 // Correct the result from BSR to get the final CLZ result.
2670 __ xorl(out, Immediate(31));
2671 __ jmp(&done);
2672
2673 // Fix the zero case with the expected result.
2674 __ Bind(&all_zeroes);
2675 __ movl(out, Immediate(32));
2676
2677 __ Bind(&done);
2678 return;
2679 }
2680
2681 // 64 bit case needs to worry about both parts of the register.
2682 DCHECK(src.IsRegisterPair());
2683 Register src_lo = src.AsRegisterPairLow<Register>();
2684 Register src_hi = src.AsRegisterPairHigh<Register>();
Mark Mendell0c9497d2015-08-21 09:30:05 -04002685 NearLabel handle_low, done, all_zeroes;
Mark Mendelld5897672015-08-12 21:16:41 -04002686
2687 // Is the high word zero?
2688 __ testl(src_hi, src_hi);
2689 __ j(kEqual, &handle_low);
2690
2691 // High word is not zero. We know that the BSR result is defined in this case.
2692 __ bsrl(out, src_hi);
2693
2694 // Correct the result from BSR to get the final CLZ result.
2695 __ xorl(out, Immediate(31));
2696 __ jmp(&done);
2697
2698 // High word was zero. We have to compute the low word count and add 32.
2699 __ Bind(&handle_low);
2700 __ bsrl(out, src_lo);
2701 __ j(kEqual, &all_zeroes);
2702
2703 // We had a valid result. Use an XOR to both correct the result and add 32.
2704 __ xorl(out, Immediate(63));
2705 __ jmp(&done);
2706
2707 // All zero case.
2708 __ Bind(&all_zeroes);
2709 __ movl(out, Immediate(64));
2710
2711 __ Bind(&done);
2712}
2713
2714void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2715 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
2716}
2717
2718void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002719 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendelld5897672015-08-12 21:16:41 -04002720}
2721
2722void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2723 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
2724}
2725
2726void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002727 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendelld5897672015-08-12 21:16:41 -04002728}
2729
Mark Mendell2d554792015-09-15 21:45:18 -04002730static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2731 LocationSummary* locations = new (arena) LocationSummary(invoke,
2732 LocationSummary::kNoCall,
2733 kIntrinsified);
2734 if (is_long) {
2735 locations->SetInAt(0, Location::RequiresRegister());
2736 } else {
2737 locations->SetInAt(0, Location::Any());
2738 }
2739 locations->SetOut(Location::RequiresRegister());
2740}
2741
Aart Bika19616e2016-02-01 18:57:58 -08002742static void GenTrailingZeros(X86Assembler* assembler,
2743 CodeGeneratorX86* codegen,
2744 HInvoke* invoke, bool is_long) {
Mark Mendell2d554792015-09-15 21:45:18 -04002745 LocationSummary* locations = invoke->GetLocations();
2746 Location src = locations->InAt(0);
2747 Register out = locations->Out().AsRegister<Register>();
2748
2749 if (invoke->InputAt(0)->IsConstant()) {
2750 // Evaluate this at compile time.
2751 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2752 if (value == 0) {
2753 value = is_long ? 64 : 32;
2754 } else {
2755 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2756 }
Aart Bika19616e2016-02-01 18:57:58 -08002757 codegen->Load32BitValue(out, value);
Mark Mendell2d554792015-09-15 21:45:18 -04002758 return;
2759 }
2760
2761 // Handle the non-constant cases.
2762 if (!is_long) {
2763 if (src.IsRegister()) {
2764 __ bsfl(out, src.AsRegister<Register>());
2765 } else {
2766 DCHECK(src.IsStackSlot());
2767 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2768 }
2769
2770 // BSF sets ZF if the input was zero, and the output is undefined.
2771 NearLabel done;
2772 __ j(kNotEqual, &done);
2773
2774 // Fix the zero case with the expected result.
2775 __ movl(out, Immediate(32));
2776
2777 __ Bind(&done);
2778 return;
2779 }
2780
2781 // 64 bit case needs to worry about both parts of the register.
2782 DCHECK(src.IsRegisterPair());
2783 Register src_lo = src.AsRegisterPairLow<Register>();
2784 Register src_hi = src.AsRegisterPairHigh<Register>();
2785 NearLabel done, all_zeroes;
2786
2787 // If the low word is zero, then ZF will be set. If not, we have the answer.
2788 __ bsfl(out, src_lo);
2789 __ j(kNotEqual, &done);
2790
2791 // Low word was zero. We have to compute the high word count and add 32.
2792 __ bsfl(out, src_hi);
2793 __ j(kEqual, &all_zeroes);
2794
2795 // We had a valid result. Add 32 to account for the low word being zero.
2796 __ addl(out, Immediate(32));
2797 __ jmp(&done);
2798
2799 // All zero case.
2800 __ Bind(&all_zeroes);
2801 __ movl(out, Immediate(64));
2802
2803 __ Bind(&done);
2804}
2805
2806void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2807 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
2808}
2809
2810void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002811 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
Mark Mendell2d554792015-09-15 21:45:18 -04002812}
2813
2814void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2815 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
2816}
2817
2818void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
Aart Bika19616e2016-02-01 18:57:58 -08002819 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
Mark Mendell2d554792015-09-15 21:45:18 -04002820}
2821
Serguei Katkov288c7a82016-05-16 11:53:15 +06002822void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
2823 if (kEmitCompilerReadBarrier) {
2824 // Do not intrinsify this call with the read barrier configuration.
2825 return;
2826 }
2827 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2828 LocationSummary::kCallOnSlowPath,
2829 kIntrinsified);
2830 locations->SetInAt(0, Location::RequiresRegister());
2831 locations->SetOut(Location::SameAsFirstInput());
2832 locations->AddTemp(Location::RequiresRegister());
2833}
2834
2835void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
2836 DCHECK(!kEmitCompilerReadBarrier);
2837 LocationSummary* locations = invoke->GetLocations();
2838 X86Assembler* assembler = GetAssembler();
2839
2840 Register obj = locations->InAt(0).AsRegister<Register>();
2841 Register out = locations->Out().AsRegister<Register>();
2842
2843 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2844 codegen_->AddSlowPath(slow_path);
2845
2846 // Load ArtMethod first.
2847 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2848 DCHECK(invoke_direct != nullptr);
2849 Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2850 invoke_direct, locations->GetTemp(0));
2851 DCHECK(temp_loc.Equals(locations->GetTemp(0)));
2852 Register temp = temp_loc.AsRegister<Register>();
2853
2854 // Now get declaring class.
2855 __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2856
2857 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2858 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2859 DCHECK_NE(slow_path_flag_offset, 0u);
2860 DCHECK_NE(disable_flag_offset, 0u);
2861 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2862
2863 // Check static flags preventing us for using intrinsic.
2864 if (slow_path_flag_offset == disable_flag_offset + 1) {
2865 __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
2866 __ j(kNotEqual, slow_path->GetEntryLabel());
2867 } else {
2868 __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
2869 __ j(kNotEqual, slow_path->GetEntryLabel());
2870 __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
2871 __ j(kNotEqual, slow_path->GetEntryLabel());
2872 }
2873
2874 // Fast path.
2875 __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2876 codegen_->MaybeRecordImplicitNullCheck(invoke);
2877 __ MaybeUnpoisonHeapReference(out);
2878 __ Bind(slow_path->GetExitLabel());
2879}
2880
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002881static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2882 return instruction->InputAt(input0) == instruction->InputAt(input1);
2883}
2884
Roland Levillain9cc0ea82017-03-16 11:25:59 +00002885// Compute base address for the System.arraycopy intrinsic in `base`.
2886static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2887 Primitive::Type type,
2888 const Register& array,
2889 const Location& pos,
2890 const Register& base) {
2891 // This routine is only used by the SystemArrayCopy intrinsic at the
2892 // moment. We can allow Primitive::kPrimNot as `type` to implement
2893 // the SystemArrayCopyChar intrinsic.
2894 DCHECK_EQ(type, Primitive::kPrimNot);
2895 const int32_t element_size = Primitive::ComponentSize(type);
2896 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2897 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2898
2899 if (pos.IsConstant()) {
2900 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2901 __ leal(base, Address(array, element_size * constant + data_offset));
2902 } else {
2903 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2904 }
2905}
2906
2907// Compute end source address for the System.arraycopy intrinsic in `end`.
2908static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2909 Primitive::Type type,
2910 const Location& copy_length,
2911 const Register& base,
2912 const Register& end) {
2913 // This routine is only used by the SystemArrayCopy intrinsic at the
2914 // moment. We can allow Primitive::kPrimNot as `type` to implement
2915 // the SystemArrayCopyChar intrinsic.
2916 DCHECK_EQ(type, Primitive::kPrimNot);
2917 const int32_t element_size = Primitive::ComponentSize(type);
2918 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2919
2920 if (copy_length.IsConstant()) {
2921 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2922 __ leal(end, Address(base, element_size * constant));
2923 } else {
2924 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2925 }
2926}
2927
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002928void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002929 // The only read barrier implementation supporting the
2930 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2931 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002932 return;
2933 }
2934
2935 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2936 if (invoke->GetLocations() != nullptr) {
2937 // Need a byte register for marking.
2938 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2939
2940 static constexpr size_t kSrc = 0;
2941 static constexpr size_t kSrcPos = 1;
2942 static constexpr size_t kDest = 2;
2943 static constexpr size_t kDestPos = 3;
2944 static constexpr size_t kLength = 4;
2945
2946 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2947 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2948 !invoke->InputAt(kLength)->IsIntConstant()) {
2949 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2950 !IsSameInput(invoke, kSrcPos, kLength) &&
2951 !IsSameInput(invoke, kDestPos, kLength) &&
2952 !IsSameInput(invoke, kSrc, kDest)) {
2953 // Not enough registers, make the length also take a stack slot.
2954 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2955 }
2956 }
2957 }
2958}
2959
2960void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
Roland Levillain0b671c02016-08-19 12:02:34 +01002961 // The only read barrier implementation supporting the
2962 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2963 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002964
2965 X86Assembler* assembler = GetAssembler();
2966 LocationSummary* locations = invoke->GetLocations();
2967
2968 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2969 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2970 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2971 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
Roland Levillain0b671c02016-08-19 12:02:34 +01002972 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002973
2974 Register src = locations->InAt(0).AsRegister<Register>();
2975 Location src_pos = locations->InAt(1);
2976 Register dest = locations->InAt(2).AsRegister<Register>();
2977 Location dest_pos = locations->InAt(3);
Roland Levillain0b671c02016-08-19 12:02:34 +01002978 Location length_arg = locations->InAt(4);
2979 Location length = length_arg;
2980 Location temp1_loc = locations->GetTemp(0);
2981 Register temp1 = temp1_loc.AsRegister<Register>();
2982 Location temp2_loc = locations->GetTemp(1);
2983 Register temp2 = temp2_loc.AsRegister<Register>();
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002984
Roland Levillain0b671c02016-08-19 12:02:34 +01002985 SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2986 codegen_->AddSlowPath(intrinsic_slow_path);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01002987
2988 NearLabel conditions_on_positions_validated;
2989 SystemArrayCopyOptimizations optimizations(invoke);
2990
2991 // If source and destination are the same, we go to slow path if we need to do
2992 // forward copying.
2993 if (src_pos.IsConstant()) {
2994 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2995 if (dest_pos.IsConstant()) {
2996 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2997 if (optimizations.GetDestinationIsSource()) {
2998 // Checked when building locations.
2999 DCHECK_GE(src_pos_constant, dest_pos_constant);
3000 } else if (src_pos_constant < dest_pos_constant) {
3001 __ cmpl(src, dest);
Roland Levillain0b671c02016-08-19 12:02:34 +01003002 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003003 }
3004 } else {
3005 if (!optimizations.GetDestinationIsSource()) {
3006 __ cmpl(src, dest);
3007 __ j(kNotEqual, &conditions_on_positions_validated);
3008 }
3009 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
Roland Levillain0b671c02016-08-19 12:02:34 +01003010 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003011 }
3012 } else {
3013 if (!optimizations.GetDestinationIsSource()) {
3014 __ cmpl(src, dest);
3015 __ j(kNotEqual, &conditions_on_positions_validated);
3016 }
3017 if (dest_pos.IsConstant()) {
3018 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3019 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
Roland Levillain0b671c02016-08-19 12:02:34 +01003020 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003021 } else {
3022 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
Roland Levillain0b671c02016-08-19 12:02:34 +01003023 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003024 }
3025 }
3026
3027 __ Bind(&conditions_on_positions_validated);
3028
3029 if (!optimizations.GetSourceIsNotNull()) {
3030 // Bail out if the source is null.
3031 __ testl(src, src);
Roland Levillain0b671c02016-08-19 12:02:34 +01003032 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003033 }
3034
3035 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3036 // Bail out if the destination is null.
3037 __ testl(dest, dest);
Roland Levillain0b671c02016-08-19 12:02:34 +01003038 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003039 }
3040
Roland Levillain0b671c02016-08-19 12:02:34 +01003041 Location temp3_loc = locations->GetTemp(2);
3042 Register temp3 = temp3_loc.AsRegister<Register>();
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003043 if (length.IsStackSlot()) {
3044 __ movl(temp3, Address(ESP, length.GetStackIndex()));
3045 length = Location::RegisterLocation(temp3);
3046 }
3047
3048 // If the length is negative, bail out.
3049 // We have already checked in the LocationsBuilder for the constant case.
3050 if (!length.IsConstant() &&
3051 !optimizations.GetCountIsSourceLength() &&
3052 !optimizations.GetCountIsDestinationLength()) {
3053 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
Roland Levillain0b671c02016-08-19 12:02:34 +01003054 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003055 }
3056
3057 // Validity checks: source.
3058 CheckPosition(assembler,
3059 src_pos,
3060 src,
3061 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01003062 intrinsic_slow_path,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003063 temp1,
3064 optimizations.GetCountIsSourceLength());
3065
3066 // Validity checks: dest.
3067 CheckPosition(assembler,
3068 dest_pos,
3069 dest,
3070 length,
Roland Levillain0b671c02016-08-19 12:02:34 +01003071 intrinsic_slow_path,
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003072 temp1,
3073 optimizations.GetCountIsDestinationLength());
3074
3075 if (!optimizations.GetDoesNotNeedTypeCheck()) {
3076 // Check whether all elements of the source array are assignable to the component
3077 // type of the destination array. We do two checks: the classes are the same,
3078 // or the destination is Object[]. If none of these checks succeed, we go to the
3079 // slow path.
Roland Levillain0b671c02016-08-19 12:02:34 +01003080
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003081 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
Roland Levillain0b671c02016-08-19 12:02:34 +01003082 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3083 // /* HeapReference<Class> */ temp1 = src->klass_
3084 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003085 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003086 // Bail out if the source is not a non primitive array.
3087 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3088 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003089 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003090 __ testl(temp1, temp1);
3091 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3092 // If heap poisoning is enabled, `temp1` has been unpoisoned
3093 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3094 } else {
3095 // /* HeapReference<Class> */ temp1 = src->klass_
3096 __ movl(temp1, Address(src, class_offset));
3097 __ MaybeUnpoisonHeapReference(temp1);
3098 // Bail out if the source is not a non primitive array.
3099 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3100 __ movl(temp1, Address(temp1, component_offset));
3101 __ testl(temp1, temp1);
3102 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3103 __ MaybeUnpoisonHeapReference(temp1);
3104 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003105 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
Roland Levillain0b671c02016-08-19 12:02:34 +01003106 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003107 }
3108
Roland Levillain0b671c02016-08-19 12:02:34 +01003109 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3110 if (length.Equals(Location::RegisterLocation(temp3))) {
3111 // When Baker read barriers are enabled, register `temp3`,
3112 // which in the present case contains the `length` parameter,
3113 // will be overwritten below. Make the `length` location
3114 // reference the original stack location; it will be moved
3115 // back to `temp3` later if necessary.
3116 DCHECK(length_arg.IsStackSlot());
3117 length = length_arg;
3118 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003119
Roland Levillain0b671c02016-08-19 12:02:34 +01003120 // /* HeapReference<Class> */ temp1 = dest->klass_
3121 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003122 invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003123
Roland Levillain0b671c02016-08-19 12:02:34 +01003124 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3125 // Bail out if the destination is not a non primitive array.
3126 //
3127 // Register `temp1` is not trashed by the read barrier emitted
3128 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3129 // method produces a call to a ReadBarrierMarkRegX entry point,
3130 // which saves all potentially live registers, including
3131 // temporaries such a `temp1`.
3132 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3133 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003134 invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003135 __ testl(temp2, temp2);
3136 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3137 // If heap poisoning is enabled, `temp2` has been unpoisoned
3138 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3139 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3140 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3141 }
3142
3143 // For the same reason given earlier, `temp1` is not trashed by the
3144 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3145 // /* HeapReference<Class> */ temp2 = src->klass_
3146 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003147 invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003148 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3149 __ cmpl(temp1, temp2);
3150
3151 if (optimizations.GetDestinationIsTypedObjectArray()) {
3152 NearLabel do_copy;
3153 __ j(kEqual, &do_copy);
3154 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3155 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003156 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003157 // We do not need to emit a read barrier for the following
3158 // heap reference load, as `temp1` is only used in a
3159 // comparison with null below, and this reference is not
3160 // kept afterwards.
3161 __ cmpl(Address(temp1, super_offset), Immediate(0));
3162 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3163 __ Bind(&do_copy);
3164 } else {
3165 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3166 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003167 } else {
Roland Levillain0b671c02016-08-19 12:02:34 +01003168 // Non read barrier code.
3169
3170 // /* HeapReference<Class> */ temp1 = dest->klass_
3171 __ movl(temp1, Address(dest, class_offset));
3172 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3173 __ MaybeUnpoisonHeapReference(temp1);
3174 // Bail out if the destination is not a non primitive array.
3175 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3176 __ movl(temp2, Address(temp1, component_offset));
3177 __ testl(temp2, temp2);
3178 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3179 __ MaybeUnpoisonHeapReference(temp2);
3180 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3181 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3182 // Re-poison the heap reference to make the compare instruction below
3183 // compare two poisoned references.
3184 __ PoisonHeapReference(temp1);
3185 }
3186
3187 // Note: if heap poisoning is on, we are comparing two poisoned references here.
3188 __ cmpl(temp1, Address(src, class_offset));
3189
3190 if (optimizations.GetDestinationIsTypedObjectArray()) {
3191 NearLabel do_copy;
3192 __ j(kEqual, &do_copy);
3193 __ MaybeUnpoisonHeapReference(temp1);
3194 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3195 __ movl(temp1, Address(temp1, component_offset));
3196 __ MaybeUnpoisonHeapReference(temp1);
3197 __ cmpl(Address(temp1, super_offset), Immediate(0));
3198 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3199 __ Bind(&do_copy);
3200 } else {
3201 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3202 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003203 }
3204 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3205 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3206 // Bail out if the source is not a non primitive array.
Roland Levillain0b671c02016-08-19 12:02:34 +01003207 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3208 // /* HeapReference<Class> */ temp1 = src->klass_
3209 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003210 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003211 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3212 codegen_->GenerateFieldLoadWithBakerReadBarrier(
Vladimir Marko953437b2016-08-24 08:30:46 +00003213 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
Roland Levillain0b671c02016-08-19 12:02:34 +01003214 __ testl(temp1, temp1);
3215 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3216 // If heap poisoning is enabled, `temp1` has been unpoisoned
3217 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3218 } else {
3219 // /* HeapReference<Class> */ temp1 = src->klass_
3220 __ movl(temp1, Address(src, class_offset));
3221 __ MaybeUnpoisonHeapReference(temp1);
3222 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3223 __ movl(temp1, Address(temp1, component_offset));
3224 __ testl(temp1, temp1);
3225 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3226 __ MaybeUnpoisonHeapReference(temp1);
3227 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003228 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
Roland Levillain0b671c02016-08-19 12:02:34 +01003229 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003230 }
3231
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003232 const Primitive::Type type = Primitive::kPrimNot;
3233 const int32_t element_size = Primitive::ComponentSize(type);
3234
Roland Levillain0b671c02016-08-19 12:02:34 +01003235 // Compute the base source address in `temp1`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003236 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003237
Roland Levillain0b671c02016-08-19 12:02:34 +01003238 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3239 // If it is needed (in the case of the fast-path loop), the base
3240 // destination address is computed later, as `temp2` is used for
3241 // intermediate computations.
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003242
Roland Levillain0b671c02016-08-19 12:02:34 +01003243 // Compute the end source address in `temp3`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003244 if (length.IsStackSlot()) {
3245 // Location `length` is again pointing at a stack slot, as
3246 // register `temp3` (which was containing the length parameter
3247 // earlier) has been overwritten; restore it now
3248 DCHECK(length.Equals(length_arg));
3249 __ movl(temp3, Address(ESP, length.GetStackIndex()));
3250 length = Location::RegisterLocation(temp3);
Roland Levillain0b671c02016-08-19 12:02:34 +01003251 }
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003252 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003253
Roland Levillain0b671c02016-08-19 12:02:34 +01003254 // SystemArrayCopy implementation for Baker read barriers (see
3255 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3256 //
3257 // if (src_ptr != end_ptr) {
3258 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3259 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
Hiroshi Yamauchi12b58b22016-11-01 11:55:29 -07003260 // bool is_gray = (rb_state == ReadBarrier::GrayState());
Roland Levillain0b671c02016-08-19 12:02:34 +01003261 // if (is_gray) {
3262 // // Slow-path copy.
3263 // for (size_t i = 0; i != length; ++i) {
3264 // dest_array[dest_pos + i] =
3265 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3266 // }
3267 // } else {
3268 // // Fast-path copy.
3269 // do {
3270 // *dest_ptr++ = *src_ptr++;
3271 // } while (src_ptr != end_ptr)
3272 // }
3273 // }
3274
3275 NearLabel loop, done;
3276
3277 // Don't enter copy loop if `length == 0`.
3278 __ cmpl(temp1, temp3);
3279 __ j(kEqual, &done);
3280
Vladimir Marko953437b2016-08-24 08:30:46 +00003281 // Given the numeric representation, it's enough to check the low bit of the rb_state.
Hiroshi Yamauchi12b58b22016-11-01 11:55:29 -07003282 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
3283 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
Vladimir Marko953437b2016-08-24 08:30:46 +00003284 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3285 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3286 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3287
Hiroshi Yamauchi12b58b22016-11-01 11:55:29 -07003288 // if (rb_state == ReadBarrier::GrayState())
Vladimir Marko953437b2016-08-24 08:30:46 +00003289 // goto slow_path;
3290 // At this point, just do the "if" and make sure that flags are preserved until the branch.
3291 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
Roland Levillain0b671c02016-08-19 12:02:34 +01003292
3293 // Load fence to prevent load-load reordering.
3294 // Note that this is a no-op, thanks to the x86 memory model.
3295 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3296
3297 // Slow path used to copy array when `src` is gray.
3298 SlowPathCode* read_barrier_slow_path =
3299 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3300 codegen_->AddSlowPath(read_barrier_slow_path);
3301
Vladimir Marko953437b2016-08-24 08:30:46 +00003302 // We have done the "if" of the gray bit check above, now branch based on the flags.
3303 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
Roland Levillain0b671c02016-08-19 12:02:34 +01003304
3305 // Fast-path copy.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003306 // Compute the base destination address in `temp2`.
3307 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
Roland Levillain0b671c02016-08-19 12:02:34 +01003308 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3309 // poison/unpoison.
3310 __ Bind(&loop);
3311 __ pushl(Address(temp1, 0));
3312 __ cfi().AdjustCFAOffset(4);
3313 __ popl(Address(temp2, 0));
3314 __ cfi().AdjustCFAOffset(-4);
3315 __ addl(temp1, Immediate(element_size));
3316 __ addl(temp2, Immediate(element_size));
3317 __ cmpl(temp1, temp3);
3318 __ j(kNotEqual, &loop);
3319
3320 __ Bind(read_barrier_slow_path->GetExitLabel());
3321 __ Bind(&done);
3322 } else {
3323 // Non read barrier code.
Roland Levillain0b671c02016-08-19 12:02:34 +01003324 // Compute the base destination address in `temp2`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003325 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
Roland Levillain0b671c02016-08-19 12:02:34 +01003326 // Compute the end source address in `temp3`.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003327 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
Roland Levillain0b671c02016-08-19 12:02:34 +01003328 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3329 // poison/unpoison.
3330 NearLabel loop, done;
3331 __ cmpl(temp1, temp3);
3332 __ j(kEqual, &done);
3333 __ Bind(&loop);
3334 __ pushl(Address(temp1, 0));
3335 __ cfi().AdjustCFAOffset(4);
3336 __ popl(Address(temp2, 0));
3337 __ cfi().AdjustCFAOffset(-4);
3338 __ addl(temp1, Immediate(element_size));
3339 __ addl(temp2, Immediate(element_size));
3340 __ cmpl(temp1, temp3);
3341 __ j(kNotEqual, &loop);
3342 __ Bind(&done);
3343 }
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003344
3345 // We only need one card marking on the destination array.
Roland Levillain9cc0ea82017-03-16 11:25:59 +00003346 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003347
Roland Levillain0b671c02016-08-19 12:02:34 +01003348 __ Bind(intrinsic_slow_path->GetExitLabel());
Nicolas Geoffrayfea1abd2016-07-06 12:09:12 +01003349}
3350
Nicolas Geoffray331605a2017-03-01 11:01:41 +00003351void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3352 InvokeRuntimeCallingConvention calling_convention;
3353 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3354 invoke,
3355 codegen_,
3356 Location::RegisterLocation(EAX),
3357 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3358}
3359
3360void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3361 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3362 LocationSummary* locations = invoke->GetLocations();
3363 X86Assembler* assembler = GetAssembler();
3364
3365 Register out = locations->Out().AsRegister<Register>();
3366 InvokeRuntimeCallingConvention calling_convention;
3367 if (invoke->InputAt(0)->IsConstant()) {
3368 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3369 if (value >= info.low && value <= info.high) {
3370 // Just embed the j.l.Integer in the code.
3371 ScopedObjectAccess soa(Thread::Current());
3372 mirror::Object* boxed = info.cache->Get(value + (-info.low));
3373 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3374 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3375 __ movl(out, Immediate(address));
3376 } else {
3377 // Allocate and initialize a new j.l.Integer.
3378 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3379 // JIT object table.
3380 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3381 __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3382 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3383 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3384 __ movl(Address(out, info.value_offset), Immediate(value));
3385 }
3386 } else {
3387 Register in = locations->InAt(0).AsRegister<Register>();
3388 // Check bounds of our cache.
3389 __ leal(out, Address(in, -info.low));
3390 __ cmpl(out, Immediate(info.high - info.low + 1));
3391 NearLabel allocate, done;
3392 __ j(kAboveEqual, &allocate);
3393 // If the value is within the bounds, load the j.l.Integer directly from the array.
3394 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3395 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3396 __ movl(out, Address(out, TIMES_4, data_offset + address));
3397 __ MaybeUnpoisonHeapReference(out);
3398 __ jmp(&done);
3399 __ Bind(&allocate);
3400 // Otherwise allocate and initialize a new j.l.Integer.
3401 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3402 __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3403 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3404 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3405 __ movl(Address(out, info.value_offset), in);
3406 __ Bind(&done);
3407 }
3408}
3409
Aart Bik2f9fcc92016-03-01 15:16:54 -08003410UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
Aart Bik2f9fcc92016-03-01 15:16:54 -08003411UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3412UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3413UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3414UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3415UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
3416UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
Mark Mendell09ed1a32015-03-25 08:30:06 -04003417
Aart Bikff7d89c2016-11-07 08:49:28 -08003418UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3419UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
Aart Bik71bf7b42016-11-16 10:17:46 -08003420UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3421UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3422UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3423UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
3424UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3425UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
Aart Bikff7d89c2016-11-07 08:49:28 -08003426
Aart Bik0e54c012016-03-04 12:08:31 -08003427// 1.8.
3428UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3429UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3430UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3431UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3432UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
Aart Bik0e54c012016-03-04 12:08:31 -08003433
Aart Bik2f9fcc92016-03-01 15:16:54 -08003434UNREACHABLE_INTRINSICS(X86)
Roland Levillain4d027112015-07-01 15:41:14 +01003435
3436#undef __
3437
Mark Mendell09ed1a32015-03-25 08:30:06 -04003438} // namespace x86
3439} // namespace art