blob: dbaf9ea105c041944c7b446c6e268ee332ae5d30 [file] [log] [blame]
buzbee67bf8852011-08-17 17:51:35 -07001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains codegen for the Thumb2 ISA and is intended to be
19 * includes by:
20 *
21 * Codegen-$(TARGET_ARCH_VARIANT).c
22 *
23 */
24
25/*
26 * Construct an s4 from two consecutive half-words of switch data.
27 * This needs to check endianness because the DEX optimizer only swaps
28 * half-words in instruction stream.
29 *
30 * "switchData" must be 32-bit aligned.
31 */
32#if __BYTE_ORDER == __LITTLE_ENDIAN
33static inline s4 s4FromSwitchData(const void* switchData) {
34 return *(s4*) switchData;
35}
36#else
37static inline s4 s4FromSwitchData(const void* switchData) {
38 u2* data = switchData;
39 return data[0] | (((s4) data[1]) << 16);
40}
41#endif
42
buzbee1b4c8592011-08-31 10:43:51 -070043/* Generate unconditional branch instructions */
44static ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
45{
46 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
47 branch->generic.target = (LIR*) target;
48 return branch;
49}
50
buzbee67bf8852011-08-17 17:51:35 -070051/*
52 * Generate a Thumb2 IT instruction, which can nullify up to
53 * four subsequent instructions based on a condition and its
54 * inverse. The condition applies to the first instruction, which
55 * is executed if the condition is met. The string "guide" consists
56 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
57 * A "T" means the instruction is executed if the condition is
58 * met, and an "E" means the instruction is executed if the condition
59 * is not met.
60 */
61static ArmLIR* genIT(CompilationUnit* cUnit, ArmConditionCode code,
62 const char* guide)
63{
64 int mask;
65 int condBit = code & 1;
66 int altBit = condBit ^ 1;
67 int mask3 = 0;
68 int mask2 = 0;
69 int mask1 = 0;
70
71 //Note: case fallthroughs intentional
72 switch(strlen(guide)) {
73 case 3:
74 mask1 = (guide[2] == 'T') ? condBit : altBit;
75 case 2:
76 mask2 = (guide[1] == 'T') ? condBit : altBit;
77 case 1:
78 mask3 = (guide[0] == 'T') ? condBit : altBit;
79 break;
80 case 0:
81 break;
82 default:
83 LOG(FATAL) << "OAT: bad case in genIT";
84 }
85 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
86 (1 << (3 - strlen(guide)));
87 return newLIR2(cUnit, kThumb2It, code, mask);
88}
89
90/*
91 * Insert a kArmPseudoCaseLabel at the beginning of the Dalvik
92 * offset vaddr. This label will be used to fix up the case
93 * branch table during the assembly phase. Be sure to set
94 * all resource flags on this to prevent code motion across
95 * target boundaries. KeyVal is just there for debugging.
96 */
97static ArmLIR* insertCaseLabel(CompilationUnit* cUnit, int vaddr, int keyVal)
98{
99 ArmLIR* lir;
100 for (lir = (ArmLIR*)cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
101 if ((lir->opcode == kArmPseudoDalvikByteCodeBoundary) &&
102 (lir->generic.dalvikOffset == vaddr)) {
103 ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
104 newLabel->generic.dalvikOffset = vaddr;
105 newLabel->opcode = kArmPseudoCaseLabel;
106 newLabel->operands[0] = keyVal;
107 oatInsertLIRAfter((LIR*)lir, (LIR*)newLabel);
108 return newLabel;
109 }
110 }
111 oatCodegenDump(cUnit);
112 LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
113 return NULL; // Quiet gcc
114}
115
116static void markPackedCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
117{
118 const u2* table = tabRec->table;
119 int baseVaddr = tabRec->vaddr;
120 int *targets = (int*)&table[4];
121 int entries = table[1];
122 int lowKey = s4FromSwitchData(&table[2]);
123 for (int i = 0; i < entries; i++) {
124 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
125 i + lowKey);
126 }
127}
128
129static void markSparseCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
130{
131 const u2* table = tabRec->table;
132 int baseVaddr = tabRec->vaddr;
133 int entries = table[1];
134 int* keys = (int*)&table[2];
135 int* targets = &keys[entries];
136 for (int i = 0; i < entries; i++) {
137 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
138 keys[i]);
139 }
140}
141
142void oatProcessSwitchTables(CompilationUnit* cUnit)
143{
144 GrowableListIterator iterator;
145 oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
146 while (true) {
147 SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
148 &iterator);
149 if (tabRec == NULL) break;
150 if (tabRec->table[0] == kPackedSwitchSignature)
151 markPackedCaseLabels(cUnit, tabRec);
152 else if (tabRec->table[0] == kSparseSwitchSignature)
153 markSparseCaseLabels(cUnit, tabRec);
154 else {
155 LOG(FATAL) << "Invalid switch table";
156 }
157 }
158}
159
160static void dumpSparseSwitchTable(const u2* table)
161 /*
162 * Sparse switch data format:
163 * ushort ident = 0x0200 magic value
164 * ushort size number of entries in the table; > 0
165 * int keys[size] keys, sorted low-to-high; 32-bit aligned
166 * int targets[size] branch targets, relative to switch opcode
167 *
168 * Total size is (2+size*4) 16-bit code units.
169 */
170{
171 u2 ident = table[0];
172 int entries = table[1];
173 int* keys = (int*)&table[2];
174 int* targets = &keys[entries];
175 LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident <<
176 ", entries: " << std::dec << entries;
177 for (int i = 0; i < entries; i++) {
178 LOG(INFO) << " Key[" << keys[i] << "] -> 0x" << std::hex <<
179 targets[i];
180 }
181}
182
183static void dumpPackedSwitchTable(const u2* table)
184 /*
185 * Packed switch data format:
186 * ushort ident = 0x0100 magic value
187 * ushort size number of entries in the table
188 * int first_key first (and lowest) switch case value
189 * int targets[size] branch targets, relative to switch opcode
190 *
191 * Total size is (4+size*2) 16-bit code units.
192 */
193{
194 u2 ident = table[0];
195 int* targets = (int*)&table[4];
196 int entries = table[1];
197 int lowKey = s4FromSwitchData(&table[2]);
198 LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident <<
199 ", entries: " << std::dec << entries << ", lowKey: " << lowKey;
200 for (int i = 0; i < entries; i++) {
201 LOG(INFO) << " Key[" << (i + lowKey) << "] -> 0x" << std::hex <<
202 targets[i];
203 }
204}
205
206/*
207 * The sparse table in the literal pool is an array of <key,displacement>
208 * pairs. For each set, we'll load them as a pair using ldmia.
209 * This means that the register number of the temp we use for the key
210 * must be lower than the reg for the displacement.
211 *
212 * The test loop will look something like:
213 *
214 * adr rBase, <table>
215 * ldr rVal, [rSP, vRegOff]
216 * mov rIdx, #tableSize
217 * lp:
218 * ldmia rBase!, {rKey, rDisp}
219 * sub rIdx, #1
220 * cmp rVal, rKey
221 * ifeq
222 * add rPC, rDisp ; This is the branch from which we compute displacement
223 * cbnz rIdx, lp
224 */
225static void genSparseSwitch(CompilationUnit* cUnit, MIR* mir,
226 RegLocation rlSrc)
227{
228 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
229 if (cUnit->printMe) {
230 dumpSparseSwitchTable(table);
231 }
232 // Add the table to the list - we'll process it later
233 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
234 true);
235 tabRec->table = table;
236 tabRec->vaddr = mir->offset;
237 int size = table[1];
238 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
239 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
240
241 // Get the switch value
242 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
243 int rBase = oatAllocTemp(cUnit);
244 /* Allocate key and disp temps */
245 int rKey = oatAllocTemp(cUnit);
246 int rDisp = oatAllocTemp(cUnit);
247 // Make sure rKey's register number is less than rDisp's number for ldmia
248 if (rKey > rDisp) {
249 int tmp = rDisp;
250 rDisp = rKey;
251 rKey = tmp;
252 }
253 // Materialize a pointer to the switch table
254 newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec);
255 // Set up rIdx
256 int rIdx = oatAllocTemp(cUnit);
257 loadConstant(cUnit, rIdx, size);
258 // Establish loop branch target
259 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
260 target->defMask = ENCODE_ALL;
261 // Load next key/disp
262 newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
263 opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
264 // Go if match. NOTE: No instruction set switch here - must stay Thumb2
265 genIT(cUnit, kArmCondEq, "");
266 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
267 tabRec->bxInst = switchBranch;
268 // Needs to use setflags encoding here
269 newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
270 ArmLIR* branch = opCondBranch(cUnit, kArmCondNe);
271 branch->generic.target = (LIR*)target;
272}
273
274
275static void genPackedSwitch(CompilationUnit* cUnit, MIR* mir,
276 RegLocation rlSrc)
277{
278 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
279 if (cUnit->printMe) {
280 dumpPackedSwitchTable(table);
281 }
282 // Add the table to the list - we'll process it later
283 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
284 true);
285 tabRec->table = table;
286 tabRec->vaddr = mir->offset;
287 int size = table[1];
288 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
289 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
290
291 // Get the switch value
292 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
293 int tableBase = oatAllocTemp(cUnit);
294 // Materialize a pointer to the switch table
295 newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec);
296 int lowKey = s4FromSwitchData(&table[2]);
297 int keyReg;
298 // Remove the bias, if necessary
299 if (lowKey == 0) {
300 keyReg = rlSrc.lowReg;
301 } else {
302 keyReg = oatAllocTemp(cUnit);
303 opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
304 }
305 // Bounds check - if < 0 or >= size continue following switch
306 opRegImm(cUnit, kOpCmp, keyReg, size-1);
307 ArmLIR* branchOver = opCondBranch(cUnit, kArmCondHi);
308
309 // Load the displacement from the switch table
310 int dispReg = oatAllocTemp(cUnit);
311 loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
312
313 // ..and go! NOTE: No instruction set switch here - must stay Thumb2
314 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
315 tabRec->bxInst = switchBranch;
316
317 /* branchOver target here */
318 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
319 target->defMask = ENCODE_ALL;
320 branchOver->generic.target = (LIR*)target;
321}
322
323/*
324 * Array data table format:
325 * ushort ident = 0x0300 magic value
326 * ushort width width of each element in the table
327 * uint size number of elements in the table
328 * ubyte data[size*width] table of data values (may contain a single-byte
329 * padding at the end)
330 *
331 * Total size is 4+(width * size + 1)/2 16-bit code units.
332 */
333static void genFillArrayData(CompilationUnit* cUnit, MIR* mir,
334 RegLocation rlSrc)
335{
336 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
337 // Add the table to the list - we'll process it later
338 FillArrayData *tabRec = (FillArrayData *)
339 oatNew(sizeof(FillArrayData), true);
340 tabRec->table = table;
341 tabRec->vaddr = mir->offset;
342 u2 width = tabRec->table[1];
343 u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16);
344 tabRec->size = (size * width) + 8;
345
346 oatInsertGrowableList(&cUnit->fillArrayData, (intptr_t)tabRec);
347
348 // Making a call - use explicit registers
349 oatFlushAllRegs(cUnit); /* Everything to home location */
350 loadValueDirectFixed(cUnit, rlSrc, r0);
351 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -0700352 OFFSETOF_MEMBER(Thread, pHandleFillArrayDataFromCode), rLR);
buzbeee6d61962011-08-27 11:58:19 -0700353 // Materialize a pointer to the fill data image
buzbee67bf8852011-08-17 17:51:35 -0700354 newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec);
355 opReg(cUnit, kOpBlx, rLR);
356 oatClobberCallRegs(cUnit);
357}
358
359/*
360 * Mark garbage collection card. Skip if the value we're storing is null.
361 */
362static void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
363{
Elliott Hughes0f4c41d2011-09-04 14:58:03 -0700364#if 1
365 UNIMPLEMENTED(WARNING);
366#else
buzbee67bf8852011-08-17 17:51:35 -0700367 int regCardBase = oatAllocTemp(cUnit);
368 int regCardNo = oatAllocTemp(cUnit);
369 ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
buzbeec143c552011-08-20 17:38:58 -0700370 loadWordDisp(cUnit, rSELF, Thread::CardTableOffset().Int32Value(),
buzbee67bf8852011-08-17 17:51:35 -0700371 regCardBase);
372 opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
373 storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
374 kUnsignedByte);
375 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
376 target->defMask = ENCODE_ALL;
377 branchOver->generic.target = (LIR*)target;
378 oatFreeTemp(cUnit, regCardBase);
379 oatFreeTemp(cUnit, regCardNo);
Elliott Hughes0f4c41d2011-09-04 14:58:03 -0700380#endif
buzbee67bf8852011-08-17 17:51:35 -0700381}
382
383static void genIGetX(CompilationUnit* cUnit, MIR* mir, OpSize size,
384 RegLocation rlDest, RegLocation rlObj)
385{
buzbeec143c552011-08-20 17:38:58 -0700386 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
387 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700388 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700389 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700390 }
391#if ANDROID_SMP != 0
392 bool isVolatile = dvmIsVolatileField(fieldPtr);
393#else
394 bool isVolatile = false;
395#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700396 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700397 RegLocation rlResult;
398 RegisterClass regClass = oatRegClassBySize(size);
399 rlObj = loadValue(cUnit, rlObj, kCoreReg);
400 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
401 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
402 NULL);/* null object? */
403 loadBaseDisp(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg,
404 size, rlObj.sRegLow);
405 if (isVolatile) {
406 oatGenMemBarrier(cUnit, kSY);
407 }
408
409 storeValue(cUnit, rlDest, rlResult);
410}
411
412static void genIPutX(CompilationUnit* cUnit, MIR* mir, OpSize size,
413 RegLocation rlSrc, RegLocation rlObj, bool isObject)
414{
buzbeec143c552011-08-20 17:38:58 -0700415 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
416 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700417 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700418 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700419 }
420#if ANDROID_SMP != 0
421 bool isVolatile = dvmIsVolatileField(fieldPtr);
422#else
423 bool isVolatile = false;
424#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700425 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700426 RegisterClass regClass = oatRegClassBySize(size);
427 rlObj = loadValue(cUnit, rlObj, kCoreReg);
428 rlSrc = loadValue(cUnit, rlSrc, regClass);
429 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
430 NULL);/* null object? */
431
432 if (isVolatile) {
433 oatGenMemBarrier(cUnit, kSY);
434 }
435 storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
436 if (isObject) {
437 /* NOTE: marking card based on object head */
438 markGCCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
439 }
440}
441
442static void genIGetWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
443 RegLocation rlObj)
444{
buzbeec143c552011-08-20 17:38:58 -0700445 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
446 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700447 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700448 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700449 }
450#if ANDROID_SMP != 0
451 bool isVolatile = dvmIsVolatileField(fieldPtr);
452#else
453 bool isVolatile = false;
454#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700455 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700456 RegLocation rlResult;
457 rlObj = loadValue(cUnit, rlObj, kCoreReg);
458 int regPtr = oatAllocTemp(cUnit);
459
460 assert(rlDest.wide);
461
462 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
463 NULL);/* null object? */
464 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
465 rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
466
467 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
468
469 if (isVolatile) {
470 oatGenMemBarrier(cUnit, kSY);
471 }
472
473 oatFreeTemp(cUnit, regPtr);
474 storeValueWide(cUnit, rlDest, rlResult);
475}
476
477static void genIPutWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
478 RegLocation rlObj)
479{
buzbeec143c552011-08-20 17:38:58 -0700480 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
481 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700482 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700483 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700484 }
485#if ANDROID_SMP != 0
486 bool isVolatile = dvmIsVolatileField(fieldPtr);
487#else
488 bool isVolatile = false;
489#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700490 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700491
492 rlObj = loadValue(cUnit, rlObj, kCoreReg);
493 int regPtr;
494 rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
495 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
496 NULL);/* null object? */
497 regPtr = oatAllocTemp(cUnit);
498 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
499
500 if (isVolatile) {
501 oatGenMemBarrier(cUnit, kSY);
502 }
503 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
504
505 oatFreeTemp(cUnit, regPtr);
506}
507
508static void genConstClass(CompilationUnit* cUnit, MIR* mir,
509 RegLocation rlDest, RegLocation rlSrc)
510{
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700511 art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
buzbee1b4c8592011-08-31 10:43:51 -0700512 Get(mir->dalvikInsn.vB);
513 int mReg = loadCurrMethod(cUnit);
514 int resReg = oatAllocTemp(cUnit);
buzbee67bf8852011-08-17 17:51:35 -0700515 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
buzbee2a475e72011-09-07 17:19:17 -0700516 loadWordDisp(cUnit, mReg, Method::DexCacheResolvedTypesOffset().Int32Value(),
buzbee1b4c8592011-08-31 10:43:51 -0700517 resReg);
518 loadWordDisp(cUnit, resReg, Array::DataOffset().Int32Value() +
519 (sizeof(String*) * mir->dalvikInsn.vB), rlResult.lowReg);
520 if (classPtr != NULL) {
521 // Fast path, we're done - just store result
522 storeValue(cUnit, rlDest, rlResult);
523 } else {
524 // Slow path. Must test at runtime
525 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, rlResult.lowReg,
526 0);
527 // Resolved, store and hop over following code
528 storeValue(cUnit, rlDest, rlResult);
529 ArmLIR* branch2 = genUnconditionalBranch(cUnit,0);
530 // TUNING: move slow path to end & remove unconditional branch
531 ArmLIR* target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
532 target1->defMask = ENCODE_ALL;
533 // Call out to helper, which will return resolved type in r0
534 loadWordDisp(cUnit, rSELF,
535 OFFSETOF_MEMBER(Thread, pInitializeTypeFromCode), rLR);
536 genRegCopy(cUnit, r1, mReg);
537 loadConstant(cUnit, r0, mir->dalvikInsn.vB);
538 opReg(cUnit, kOpBlx, rLR); // resolveTypeFromCode(idx, method)
539 oatClobberCallRegs(cUnit);
540 RegLocation rlResult = oatGetReturn(cUnit);
541 storeValue(cUnit, rlDest, rlResult);
542 // Rejoin code paths
543 ArmLIR* target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
544 target2->defMask = ENCODE_ALL;
545 branch1->generic.target = (LIR*)target1;
546 branch2->generic.target = (LIR*)target2;
547 }
buzbee67bf8852011-08-17 17:51:35 -0700548}
549
550static void genConstString(CompilationUnit* cUnit, MIR* mir,
551 RegLocation rlDest, RegLocation rlSrc)
552{
buzbee1b4c8592011-08-31 10:43:51 -0700553 /* All strings should be available at compile time */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700554 const art::String* str = cUnit->method->GetDexCacheStrings()->
buzbee1b4c8592011-08-31 10:43:51 -0700555 Get(mir->dalvikInsn.vB);
556 DCHECK(str != NULL);
buzbee67bf8852011-08-17 17:51:35 -0700557
buzbee1b4c8592011-08-31 10:43:51 -0700558 int mReg = loadCurrMethod(cUnit);
559 int resReg = oatAllocTemp(cUnit);
buzbee67bf8852011-08-17 17:51:35 -0700560 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700561 loadWordDisp(cUnit, mReg, Method::DexCacheStringsOffset().Int32Value(),
buzbee1b4c8592011-08-31 10:43:51 -0700562 resReg);
563 loadWordDisp(cUnit, resReg, Array::DataOffset().Int32Value() +
564 (sizeof(String*) * mir->dalvikInsn.vB), rlResult.lowReg);
buzbee67bf8852011-08-17 17:51:35 -0700565 storeValue(cUnit, rlDest, rlResult);
566}
567
buzbeedfd3d702011-08-28 12:56:51 -0700568/*
569 * Let helper function take care of everything. Will
570 * call Class::NewInstanceFromCode(type_idx, method);
571 */
buzbee67bf8852011-08-17 17:51:35 -0700572static void genNewInstance(CompilationUnit* cUnit, MIR* mir,
573 RegLocation rlDest)
574{
buzbeedfd3d702011-08-28 12:56:51 -0700575 oatFlushAllRegs(cUnit); /* Everything to home location */
buzbee67bf8852011-08-17 17:51:35 -0700576 loadWordDisp(cUnit, rSELF,
Brian Carlstrom1f870082011-08-23 16:02:11 -0700577 OFFSETOF_MEMBER(Thread, pAllocObjectFromCode), rLR);
buzbeedfd3d702011-08-28 12:56:51 -0700578 loadCurrMethodDirect(cUnit, r1); // arg1 <= Method*
579 loadConstant(cUnit, r0, mir->dalvikInsn.vB); // arg0 <- type_id
buzbee67bf8852011-08-17 17:51:35 -0700580 opReg(cUnit, kOpBlx, rLR);
581 oatClobberCallRegs(cUnit);
582 RegLocation rlResult = oatGetReturn(cUnit);
583 storeValue(cUnit, rlDest, rlResult);
584}
585
586void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
587{
588 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -0700589 OFFSETOF_MEMBER(Thread, pThrowException), rLR);
590 loadValueDirectFixed(cUnit, rlSrc, r1); // Get exception object
buzbee67bf8852011-08-17 17:51:35 -0700591 genRegCopy(cUnit, r0, rSELF);
buzbee1b4c8592011-08-31 10:43:51 -0700592 opReg(cUnit, kOpBlx, rLR); // artThrowException(thread, exception);
buzbee67bf8852011-08-17 17:51:35 -0700593}
594
595static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
596 RegLocation rlSrc)
597{
buzbee2a475e72011-09-07 17:19:17 -0700598 // May generate a call - use explicit registers
599 oatLockCallTemps(cUnit);
600 art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
601 Get(mir->dalvikInsn.vC);
602 int classReg = r2; // Fixed usage
603 loadCurrMethodDirect(cUnit, r1); // r1 <= current Method*
604 loadWordDisp(cUnit, r1, Method::DexCacheResolvedTypesOffset().Int32Value(),
605 classReg);
606 loadWordDisp(cUnit, classReg, Array::DataOffset().Int32Value() +
607 (sizeof(String*) * mir->dalvikInsn.vC), classReg);
buzbee67bf8852011-08-17 17:51:35 -0700608 if (classPtr == NULL) {
buzbee2a475e72011-09-07 17:19:17 -0700609 // Generate a runtime test
610 ArmLIR* hopBranch = genCmpImmBranch(cUnit, kArmCondNe, classReg, 0);
611 // Not resolved
612 // Call out to helper, which will return resolved type in r0
613 loadWordDisp(cUnit, rSELF,
614 OFFSETOF_MEMBER(Thread, pInitializeTypeFromCode), rLR);
615 loadConstant(cUnit, r0, mir->dalvikInsn.vC);
616 opReg(cUnit, kOpBlx, rLR); // resolveTypeFromCode(idx, method)
617 genRegCopy(cUnit, r2, r0); // Align usage with fast path
618 // Rejoin code paths
619 ArmLIR* hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
620 hopTarget->defMask = ENCODE_ALL;
621 hopBranch->generic.target = (LIR*)hopTarget;
buzbee67bf8852011-08-17 17:51:35 -0700622 }
buzbee2a475e72011-09-07 17:19:17 -0700623 // At this point, r2 has class
624 loadValueDirectFixed(cUnit, rlSrc, r3); /* Ref */
buzbee67bf8852011-08-17 17:51:35 -0700625 /* When taken r0 has NULL which can be used for store directly */
buzbee2a475e72011-09-07 17:19:17 -0700626 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, r3, 0);
627 /* load object->clazz */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700628 assert(Object::ClassOffset().Int32Value() == 0);
buzbee2a475e72011-09-07 17:19:17 -0700629 loadWordDisp(cUnit, r3, Object::ClassOffset().Int32Value(), r1);
buzbee67bf8852011-08-17 17:51:35 -0700630 /* r1 now contains object->clazz */
631 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -0700632 OFFSETOF_MEMBER(Thread, pInstanceofNonTrivialFromCode), rLR);
buzbee67bf8852011-08-17 17:51:35 -0700633 loadConstant(cUnit, r0, 1); /* Assume true */
634 opRegReg(cUnit, kOpCmp, r1, r2);
635 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
buzbee2a475e72011-09-07 17:19:17 -0700636 genRegCopy(cUnit, r0, r3);
buzbee67bf8852011-08-17 17:51:35 -0700637 genRegCopy(cUnit, r1, r2);
638 opReg(cUnit, kOpBlx, rLR);
639 oatClobberCallRegs(cUnit);
640 /* branch target here */
641 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
642 target->defMask = ENCODE_ALL;
buzbee2a475e72011-09-07 17:19:17 -0700643 RegLocation rlResult = oatGetReturn(cUnit);
buzbee67bf8852011-08-17 17:51:35 -0700644 storeValue(cUnit, rlDest, rlResult);
645 branch1->generic.target = (LIR*)target;
646 branch2->generic.target = (LIR*)target;
647}
648
649static void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
650{
buzbee2a475e72011-09-07 17:19:17 -0700651 // May generate a call - use explicit registers
652 oatLockCallTemps(cUnit);
653 art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
654 Get(mir->dalvikInsn.vB);
655 int classReg = r2; // Fixed usage
656 loadCurrMethodDirect(cUnit, r1); // r1 <= current Method*
657 loadWordDisp(cUnit, r1, Method::DexCacheResolvedTypesOffset().Int32Value(),
658 classReg);
659 loadWordDisp(cUnit, classReg, Array::DataOffset().Int32Value() +
660 (sizeof(String*) * mir->dalvikInsn.vB), classReg);
buzbee67bf8852011-08-17 17:51:35 -0700661 if (classPtr == NULL) {
buzbee2a475e72011-09-07 17:19:17 -0700662 // Generate a runtime test
663 ArmLIR* hopBranch = genCmpImmBranch(cUnit, kArmCondNe, classReg, 0);
664 // Not resolved
665 // Call out to helper, which will return resolved type in r0
666 loadWordDisp(cUnit, rSELF,
667 OFFSETOF_MEMBER(Thread, pInitializeTypeFromCode), rLR);
668 loadConstant(cUnit, r0, mir->dalvikInsn.vB);
669 opReg(cUnit, kOpBlx, rLR); // resolveTypeFromCode(idx, method)
670 genRegCopy(cUnit, r2, r0); // Align usage with fast path
671 // Rejoin code paths
672 ArmLIR* hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
673 hopTarget->defMask = ENCODE_ALL;
674 hopBranch->generic.target = (LIR*)hopTarget;
buzbee67bf8852011-08-17 17:51:35 -0700675 }
buzbee2a475e72011-09-07 17:19:17 -0700676 // At this point, r2 has class
677 loadValueDirectFixed(cUnit, rlSrc, r0); /* Ref */
678 /* Null is OK - continue */
679 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, r0, 0);
680 /* load object->clazz */
681 assert(Object::ClassOffset().Int32Value() == 0);
682 loadWordDisp(cUnit, r0, Object::ClassOffset().Int32Value(), r1);
683 /* r1 now contains object->clazz */
buzbee67bf8852011-08-17 17:51:35 -0700684 loadWordDisp(cUnit, rSELF,
buzbee2a475e72011-09-07 17:19:17 -0700685 OFFSETOF_MEMBER(Thread, pCheckCastFromCode), rLR);
686 opRegReg(cUnit, kOpCmp, r1, r2);
687 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq); /* If equal, trivial yes */
688 genRegCopy(cUnit, r0, r1);
689 genRegCopy(cUnit, r1, r2);
buzbee67bf8852011-08-17 17:51:35 -0700690 opReg(cUnit, kOpBlx, rLR);
691 oatClobberCallRegs(cUnit);
buzbee2a475e72011-09-07 17:19:17 -0700692 /* branch target here */
buzbee67bf8852011-08-17 17:51:35 -0700693 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
694 target->defMask = ENCODE_ALL;
695 branch1->generic.target = (LIR*)target;
696 branch2->generic.target = (LIR*)target;
697}
698
699static void genNegFloat(CompilationUnit* cUnit, RegLocation rlDest,
700 RegLocation rlSrc)
701{
702 RegLocation rlResult;
703 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
704 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
705 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
706 storeValue(cUnit, rlDest, rlResult);
707}
708
709static void genNegDouble(CompilationUnit* cUnit, RegLocation rlDest,
710 RegLocation rlSrc)
711{
712 RegLocation rlResult;
713 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
714 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
715 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
716 S2D(rlSrc.lowReg, rlSrc.highReg));
717 storeValueWide(cUnit, rlDest, rlResult);
718}
719
buzbee439c4fa2011-08-27 15:59:07 -0700720static void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
721 RegLocation rlFree)
buzbee67bf8852011-08-17 17:51:35 -0700722{
buzbee439c4fa2011-08-27 15:59:07 -0700723 if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg))
724 oatFreeTemp(cUnit, rlFree.lowReg);
725 if ((rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg))
726 oatFreeTemp(cUnit, rlFree.lowReg);
buzbee67bf8852011-08-17 17:51:35 -0700727}
728
729static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
730 OpKind secondOp, RegLocation rlDest,
731 RegLocation rlSrc1, RegLocation rlSrc2)
732{
buzbee9e0f9b02011-08-24 15:32:46 -0700733 /*
734 * NOTE: This is the one place in the code in which we might have
735 * as many as six live temporary registers. There are 5 in the normal
736 * set for Arm. Until we have spill capabilities, temporarily add
737 * lr to the temp set. It is safe to do this locally, but note that
738 * lr is used explicitly elsewhere in the code generator and cannot
739 * normally be used as a general temp register.
740 */
buzbee67bf8852011-08-17 17:51:35 -0700741 RegLocation rlResult;
buzbee9e0f9b02011-08-24 15:32:46 -0700742 oatMarkTemp(cUnit, rLR); // Add lr to the temp pool
743 oatFreeTemp(cUnit, rLR); // and make it available
buzbee67bf8852011-08-17 17:51:35 -0700744 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
745 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
746 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
747 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
748 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
749 rlSrc2.highReg);
buzbee439c4fa2011-08-27 15:59:07 -0700750 /*
751 * NOTE: If rlDest refers to a frame variable in a large frame, the
752 * following storeValueWide might need to allocate a temp register.
753 * To further work around the lack of a spill capability, explicitly
754 * free any temps from rlSrc1 & rlSrc2 that aren't still live in rlResult.
755 * Remove when spill is functional.
756 */
757 freeRegLocTemps(cUnit, rlResult, rlSrc1);
758 freeRegLocTemps(cUnit, rlResult, rlSrc2);
buzbee67bf8852011-08-17 17:51:35 -0700759 storeValueWide(cUnit, rlDest, rlResult);
buzbee9e0f9b02011-08-24 15:32:46 -0700760 oatClobber(cUnit, rLR);
761 oatUnmarkTemp(cUnit, rLR); // Remove lr from the temp pool
buzbee67bf8852011-08-17 17:51:35 -0700762}
763
764void oatInitializeRegAlloc(CompilationUnit* cUnit)
765{
766 int numRegs = sizeof(coreRegs)/sizeof(*coreRegs);
767 int numReserved = sizeof(reservedRegs)/sizeof(*reservedRegs);
768 int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
769 int numFPRegs = sizeof(fpRegs)/sizeof(*fpRegs);
770 int numFPTemps = sizeof(fpTemps)/sizeof(*fpTemps);
771 RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true);
772 cUnit->regPool = pool;
773 pool->numCoreRegs = numRegs;
774 pool->coreRegs = (RegisterInfo *)
775 oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true);
776 pool->numFPRegs = numFPRegs;
777 pool->FPRegs = (RegisterInfo *)
778 oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true);
779 oatInitPool(pool->coreRegs, coreRegs, pool->numCoreRegs);
780 oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
781 // Keep special registers from being allocated
782 for (int i = 0; i < numReserved; i++) {
783 oatMarkInUse(cUnit, reservedRegs[i]);
784 }
785 // Mark temp regs - all others not in use can be used for promotion
786 for (int i = 0; i < numTemps; i++) {
787 oatMarkTemp(cUnit, coreTemps[i]);
788 }
789 for (int i = 0; i < numFPTemps; i++) {
790 oatMarkTemp(cUnit, fpTemps[i]);
791 }
792 pool->nullCheckedRegs =
793 oatAllocBitVector(cUnit->numSSARegs, false);
794}
795
796/*
797 * Handle simple case (thin lock) inline. If it's complicated, bail
798 * out to the heavyweight lock/unlock routines. We'll use dedicated
799 * registers here in order to be in the right position in case we
800 * to bail to dvm[Lock/Unlock]Object(self, object)
801 *
802 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
803 * r1 -> object [arg1 for dvm[Lock/Unlock]Object
804 * r2 -> intial contents of object->lock, later result of strex
805 * r3 -> self->threadId
806 * r12 -> allow to be used by utilities as general temp
807 *
808 * The result of the strex is 0 if we acquire the lock.
809 *
810 * See comments in Sync.c for the layout of the lock word.
811 * Of particular interest to this code is the test for the
812 * simple case - which we handle inline. For monitor enter, the
813 * simple case is thin lock, held by no-one. For monitor exit,
814 * the simple case is thin lock, held by the unlocking thread with
815 * a recurse count of 0.
816 *
817 * A minor complication is that there is a field in the lock word
818 * unrelated to locking: the hash state. This field must be ignored, but
819 * preserved.
820 *
821 */
822static void genMonitorEnter(CompilationUnit* cUnit, MIR* mir,
823 RegLocation rlSrc)
824{
825 ArmLIR* target;
826 ArmLIR* hopTarget;
827 ArmLIR* branch;
828 ArmLIR* hopBranch;
829
830 oatFlushAllRegs(cUnit);
buzbeec143c552011-08-20 17:38:58 -0700831 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700832 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
buzbee2e748f32011-08-29 21:02:19 -0700833 oatLockCallTemps(cUnit); // Prepare for explicit register usage
buzbee67bf8852011-08-17 17:51:35 -0700834 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
buzbeec143c552011-08-20 17:38:58 -0700835 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700836 newLIR3(cUnit, kThumb2Ldrex, r2, r1,
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700837 Object::MonitorOffset().Int32Value() >> 2); // Get object->lock
buzbeec143c552011-08-20 17:38:58 -0700838 // Align owner
839 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
buzbee67bf8852011-08-17 17:51:35 -0700840 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700841 newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, art::Monitor::kLwLockOwnerShift
842 - 1);
843 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
844 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700845 hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0);
buzbeec143c552011-08-20 17:38:58 -0700846 newLIR4(cUnit, kThumb2Strex, r2, r3, r1,
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700847 Object::MonitorOffset().Int32Value() >> 2);
buzbee67bf8852011-08-17 17:51:35 -0700848 oatGenMemBarrier(cUnit, kSY);
849 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
850
851 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
852 hopTarget->defMask = ENCODE_ALL;
853 hopBranch->generic.target = (LIR*)hopTarget;
854
buzbee1b4c8592011-08-31 10:43:51 -0700855 // Go expensive route - artLockObjectFromCode(self, obj);
856 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pLockObjectFromCode),
buzbee67bf8852011-08-17 17:51:35 -0700857 rLR);
858 genRegCopy(cUnit, r0, rSELF);
859 newLIR1(cUnit, kThumbBlxR, rLR);
860
861 // Resume here
862 target = newLIR0(cUnit, kArmPseudoTargetLabel);
863 target->defMask = ENCODE_ALL;
864 branch->generic.target = (LIR*)target;
865}
866
867/*
868 * For monitor unlock, we don't have to use ldrex/strex. Once
869 * we've determined that the lock is thin and that we own it with
870 * a zero recursion count, it's safe to punch it back to the
871 * initial, unlock thin state with a store word.
872 */
873static void genMonitorExit(CompilationUnit* cUnit, MIR* mir,
874 RegLocation rlSrc)
875{
876 ArmLIR* target;
877 ArmLIR* branch;
878 ArmLIR* hopTarget;
879 ArmLIR* hopBranch;
880
buzbeec143c552011-08-20 17:38:58 -0700881 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700882 oatFlushAllRegs(cUnit);
883 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
buzbee2e748f32011-08-29 21:02:19 -0700884 oatLockCallTemps(cUnit); // Prepare for explicit register usage
buzbee67bf8852011-08-17 17:51:35 -0700885 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700886 loadWordDisp(cUnit, r1, Object::MonitorOffset().Int32Value(), r2); // Get lock
buzbeec143c552011-08-20 17:38:58 -0700887 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700888 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700889 opRegRegImm(cUnit, kOpAnd, r12, r2, (art::Monitor::kLwHashStateMask <<
890 art::Monitor::kLwHashStateShift));
891 // Align owner
892 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
893 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
894 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700895 opRegReg(cUnit, kOpSub, r2, r3);
896 hopBranch = opCondBranch(cUnit, kArmCondNe);
897 oatGenMemBarrier(cUnit, kSY);
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700898 storeWordDisp(cUnit, r1, Object::MonitorOffset().Int32Value(), r12);
buzbee67bf8852011-08-17 17:51:35 -0700899 branch = opNone(cUnit, kOpUncondBr);
900
901 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
902 hopTarget->defMask = ENCODE_ALL;
903 hopBranch->generic.target = (LIR*)hopTarget;
904
buzbee1b4c8592011-08-31 10:43:51 -0700905 // Go expensive route - UnlockObjectFromCode(self, obj);
906 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pUnlockObjectFromCode),
buzbee67bf8852011-08-17 17:51:35 -0700907 rLR);
908 genRegCopy(cUnit, r0, rSELF);
909 newLIR1(cUnit, kThumbBlxR, rLR);
910
911 // Resume here
912 target = newLIR0(cUnit, kArmPseudoTargetLabel);
913 target->defMask = ENCODE_ALL;
914 branch->generic.target = (LIR*)target;
915}
916
917/*
918 * 64-bit 3way compare function.
919 * mov rX, #-1
920 * cmp op1hi, op2hi
921 * blt done
922 * bgt flip
923 * sub rX, op1lo, op2lo (treat as unsigned)
924 * beq done
925 * ite hi
926 * mov(hi) rX, #-1
927 * mov(!hi) rX, #1
928 * flip:
929 * neg rX
930 * done:
931 */
932static void genCmpLong(CompilationUnit* cUnit, MIR* mir,
933 RegLocation rlDest, RegLocation rlSrc1,
934 RegLocation rlSrc2)
935{
936 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
937 ArmLIR* target1;
938 ArmLIR* target2;
939 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
940 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
941 rlTemp.lowReg = oatAllocTemp(cUnit);
942 loadConstant(cUnit, rlTemp.lowReg, -1);
943 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
944 ArmLIR* branch1 = opCondBranch(cUnit, kArmCondLt);
945 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondGt);
946 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
947 ArmLIR* branch3 = opCondBranch(cUnit, kArmCondEq);
948
949 genIT(cUnit, kArmCondHi, "E");
950 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
951 loadConstant(cUnit, rlTemp.lowReg, 1);
952 genBarrier(cUnit);
953
954 target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
955 target2->defMask = -1;
956 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
957
958 target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
959 target1->defMask = -1;
960
961 storeValue(cUnit, rlDest, rlTemp);
962
963 branch1->generic.target = (LIR*)target1;
964 branch2->generic.target = (LIR*)target2;
965 branch3->generic.target = branch1->generic.target;
966}
967
968static void genMultiplyByTwoBitMultiplier(CompilationUnit* cUnit,
969 RegLocation rlSrc, RegLocation rlResult, int lit,
970 int firstBit, int secondBit)
971{
972 opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
973 encodeShift(kArmLsl, secondBit - firstBit));
974 if (firstBit != 0) {
975 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
976 }
977}
978
979static bool genConversionCall(CompilationUnit* cUnit, MIR* mir, int funcOffset,
980 int srcSize, int tgtSize)
981{
982 /*
983 * Don't optimize the register usage since it calls out to support
984 * functions
985 */
986 RegLocation rlSrc;
987 RegLocation rlDest;
988 oatFlushAllRegs(cUnit); /* Send everything to home location */
989 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
990 if (srcSize == 1) {
991 rlSrc = oatGetSrc(cUnit, mir, 0);
992 loadValueDirectFixed(cUnit, rlSrc, r0);
993 } else {
994 rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
995 loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
996 }
997 opReg(cUnit, kOpBlx, rLR);
998 oatClobberCallRegs(cUnit);
999 if (tgtSize == 1) {
1000 RegLocation rlResult;
1001 rlDest = oatGetDest(cUnit, mir, 0);
1002 rlResult = oatGetReturn(cUnit);
1003 storeValue(cUnit, rlDest, rlResult);
1004 } else {
1005 RegLocation rlResult;
1006 rlDest = oatGetDestWide(cUnit, mir, 0, 1);
1007 rlResult = oatGetReturnWide(cUnit);
1008 storeValueWide(cUnit, rlDest, rlResult);
1009 }
1010 return false;
1011}
1012
1013static bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
1014 RegLocation rlDest, RegLocation rlSrc1,
1015 RegLocation rlSrc2)
1016{
1017 RegLocation rlResult;
1018 int funcOffset;
1019
1020 switch (mir->dalvikInsn.opcode) {
1021 case OP_ADD_FLOAT_2ADDR:
1022 case OP_ADD_FLOAT:
1023 funcOffset = OFFSETOF_MEMBER(Thread, pFadd);
1024 break;
1025 case OP_SUB_FLOAT_2ADDR:
1026 case OP_SUB_FLOAT:
1027 funcOffset = OFFSETOF_MEMBER(Thread, pFsub);
1028 break;
1029 case OP_DIV_FLOAT_2ADDR:
1030 case OP_DIV_FLOAT:
1031 funcOffset = OFFSETOF_MEMBER(Thread, pFdiv);
1032 break;
1033 case OP_MUL_FLOAT_2ADDR:
1034 case OP_MUL_FLOAT:
1035 funcOffset = OFFSETOF_MEMBER(Thread, pFmul);
1036 break;
1037 case OP_REM_FLOAT_2ADDR:
1038 case OP_REM_FLOAT:
1039 funcOffset = OFFSETOF_MEMBER(Thread, pFmodf);
1040 break;
1041 case OP_NEG_FLOAT: {
1042 genNegFloat(cUnit, rlDest, rlSrc1);
1043 return false;
1044 }
1045 default:
1046 return true;
1047 }
1048 oatFlushAllRegs(cUnit); /* Send everything to home location */
1049 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1050 loadValueDirectFixed(cUnit, rlSrc1, r0);
1051 loadValueDirectFixed(cUnit, rlSrc2, r1);
1052 opReg(cUnit, kOpBlx, rLR);
1053 oatClobberCallRegs(cUnit);
1054 rlResult = oatGetReturn(cUnit);
1055 storeValue(cUnit, rlDest, rlResult);
1056 return false;
1057}
1058
1059static bool genArithOpDoublePortable(CompilationUnit* cUnit, MIR* mir,
1060 RegLocation rlDest, RegLocation rlSrc1,
1061 RegLocation rlSrc2)
1062{
1063 RegLocation rlResult;
1064 int funcOffset;
1065
1066 switch (mir->dalvikInsn.opcode) {
1067 case OP_ADD_DOUBLE_2ADDR:
1068 case OP_ADD_DOUBLE:
1069 funcOffset = OFFSETOF_MEMBER(Thread, pDadd);
1070 break;
1071 case OP_SUB_DOUBLE_2ADDR:
1072 case OP_SUB_DOUBLE:
1073 funcOffset = OFFSETOF_MEMBER(Thread, pDsub);
1074 break;
1075 case OP_DIV_DOUBLE_2ADDR:
1076 case OP_DIV_DOUBLE:
1077 funcOffset = OFFSETOF_MEMBER(Thread, pDdiv);
1078 break;
1079 case OP_MUL_DOUBLE_2ADDR:
1080 case OP_MUL_DOUBLE:
1081 funcOffset = OFFSETOF_MEMBER(Thread, pDmul);
1082 break;
1083 case OP_REM_DOUBLE_2ADDR:
1084 case OP_REM_DOUBLE:
1085 funcOffset = OFFSETOF_MEMBER(Thread, pFmod);
1086 break;
1087 case OP_NEG_DOUBLE: {
1088 genNegDouble(cUnit, rlDest, rlSrc1);
1089 return false;
1090 }
1091 default:
1092 return true;
1093 }
1094 oatFlushAllRegs(cUnit); /* Send everything to home location */
1095 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1096 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1097 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1098 opReg(cUnit, kOpBlx, rLR);
1099 oatClobberCallRegs(cUnit);
1100 rlResult = oatGetReturnWide(cUnit);
1101 storeValueWide(cUnit, rlDest, rlResult);
1102 return false;
1103}
1104
1105static bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
1106{
1107 Opcode opcode = mir->dalvikInsn.opcode;
1108
1109 switch (opcode) {
1110 case OP_INT_TO_FLOAT:
1111 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2f),
1112 1, 1);
1113 case OP_FLOAT_TO_INT:
1114 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2iz),
1115 1, 1);
1116 case OP_DOUBLE_TO_FLOAT:
1117 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2f),
1118 2, 1);
1119 case OP_FLOAT_TO_DOUBLE:
1120 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2d),
1121 1, 2);
1122 case OP_INT_TO_DOUBLE:
1123 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2d),
1124 1, 2);
1125 case OP_DOUBLE_TO_INT:
1126 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2iz),
1127 2, 1);
1128 case OP_FLOAT_TO_LONG:
1129 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
buzbee1b4c8592011-08-31 10:43:51 -07001130 pF2l), 1, 2);
buzbee67bf8852011-08-17 17:51:35 -07001131 case OP_LONG_TO_FLOAT:
1132 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2f),
1133 2, 1);
1134 case OP_DOUBLE_TO_LONG:
1135 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
buzbee1b4c8592011-08-31 10:43:51 -07001136 pD2l), 2, 2);
buzbee67bf8852011-08-17 17:51:35 -07001137 case OP_LONG_TO_DOUBLE:
1138 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2d),
1139 2, 2);
1140 default:
1141 return true;
1142 }
1143 return false;
1144}
1145
1146/* Generate conditional branch instructions */
1147static ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
1148 ArmConditionCode cond,
1149 ArmLIR* target)
1150{
1151 ArmLIR* branch = opCondBranch(cUnit, cond);
1152 branch->generic.target = (LIR*) target;
1153 return branch;
1154}
1155
1156/* Generate a unconditional branch to go to the interpreter */
1157static inline ArmLIR* genTrap(CompilationUnit* cUnit, int dOffset,
1158 ArmLIR* pcrLabel)
1159{
1160 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1161 return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
1162}
1163
1164/*
1165 * Generate array store
1166 *
1167 */
buzbee1b4c8592011-08-31 10:43:51 -07001168static void genArrayObjPut(CompilationUnit* cUnit, MIR* mir,
1169 RegLocation rlArray, RegLocation rlIndex,
1170 RegLocation rlSrc, int scale)
buzbee67bf8852011-08-17 17:51:35 -07001171{
1172 RegisterClass regClass = oatRegClassBySize(kWord);
buzbeec143c552011-08-20 17:38:58 -07001173 int lenOffset = Array::LengthOffset().Int32Value();
1174 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001175
1176 /* Make sure it's a legal object Put. Use direct regs at first */
1177 loadValueDirectFixed(cUnit, rlArray, r1);
1178 loadValueDirectFixed(cUnit, rlSrc, r0);
1179
1180 /* null array object? */
1181 ArmLIR* pcrLabel = NULL;
1182
1183 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1184 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, r1,
1185 mir->offset, NULL);
1186 }
1187 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -07001188 OFFSETOF_MEMBER(Thread, pCanPutArrayElementFromCode), rLR);
buzbee67bf8852011-08-17 17:51:35 -07001189 /* Get the array's clazz */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -07001190 loadWordDisp(cUnit, r1, Object::ClassOffset().Int32Value(), r1);
buzbee67bf8852011-08-17 17:51:35 -07001191 /* Get the object's clazz */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -07001192 loadWordDisp(cUnit, r0, Object::ClassOffset().Int32Value(), r0);
buzbee67bf8852011-08-17 17:51:35 -07001193 opReg(cUnit, kOpBlx, rLR);
1194 oatClobberCallRegs(cUnit);
1195
1196 // Now, redo loadValues in case they didn't survive the call
1197
1198 int regPtr;
1199 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1200 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1201
1202 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1203 oatClobber(cUnit, rlArray.lowReg);
1204 regPtr = rlArray.lowReg;
1205 } else {
1206 regPtr = oatAllocTemp(cUnit);
1207 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1208 }
1209
1210 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1211 int regLen = oatAllocTemp(cUnit);
1212 //NOTE: max live temps(4) here.
1213 /* Get len */
1214 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1215 /* regPtr -> array data */
1216 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1217 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1218 pcrLabel);
1219 oatFreeTemp(cUnit, regLen);
1220 } else {
1221 /* regPtr -> array data */
1222 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1223 }
1224 /* at this point, regPtr points to array, 2 live temps */
1225 rlSrc = loadValue(cUnit, rlSrc, regClass);
1226 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1227 scale, kWord);
1228}
1229
1230/*
1231 * Generate array load
1232 */
1233static void genArrayGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
1234 RegLocation rlArray, RegLocation rlIndex,
1235 RegLocation rlDest, int scale)
1236{
1237 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001238 int lenOffset = Array::LengthOffset().Int32Value();
1239 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001240 RegLocation rlResult;
1241 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1242 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1243 int regPtr;
1244
1245 /* null object? */
1246 ArmLIR* pcrLabel = NULL;
1247
1248 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1249 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
1250 rlArray.lowReg, mir->offset, NULL);
1251 }
1252
1253 regPtr = oatAllocTemp(cUnit);
1254
1255 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1256 int regLen = oatAllocTemp(cUnit);
1257 /* Get len */
1258 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1259 /* regPtr -> array data */
1260 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1261 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1262 pcrLabel);
1263 oatFreeTemp(cUnit, regLen);
1264 } else {
1265 /* regPtr -> array data */
1266 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1267 }
buzbeee9a72f62011-09-04 17:59:07 -07001268 oatFreeTemp(cUnit, rlArray.lowReg);
buzbee67bf8852011-08-17 17:51:35 -07001269 if ((size == kLong) || (size == kDouble)) {
1270 if (scale) {
1271 int rNewIndex = oatAllocTemp(cUnit);
1272 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1273 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1274 oatFreeTemp(cUnit, rNewIndex);
1275 } else {
1276 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1277 }
buzbeee9a72f62011-09-04 17:59:07 -07001278 oatFreeTemp(cUnit, rlIndex.lowReg);
buzbee67bf8852011-08-17 17:51:35 -07001279 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1280
1281 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
1282
1283 oatFreeTemp(cUnit, regPtr);
1284 storeValueWide(cUnit, rlDest, rlResult);
1285 } else {
1286 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1287
1288 loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
1289 scale, size);
1290
1291 oatFreeTemp(cUnit, regPtr);
1292 storeValue(cUnit, rlDest, rlResult);
1293 }
1294}
1295
1296/*
1297 * Generate array store
1298 *
1299 */
1300static void genArrayPut(CompilationUnit* cUnit, MIR* mir, OpSize size,
1301 RegLocation rlArray, RegLocation rlIndex,
1302 RegLocation rlSrc, int scale)
1303{
1304 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001305 int lenOffset = Array::LengthOffset().Int32Value();
1306 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001307
1308 int regPtr;
1309 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1310 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1311
1312 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1313 oatClobber(cUnit, rlArray.lowReg);
1314 regPtr = rlArray.lowReg;
1315 } else {
1316 regPtr = oatAllocTemp(cUnit);
1317 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1318 }
1319
1320 /* null object? */
1321 ArmLIR* pcrLabel = NULL;
1322
1323 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1324 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
1325 mir->offset, NULL);
1326 }
1327
1328 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1329 int regLen = oatAllocTemp(cUnit);
1330 //NOTE: max live temps(4) here.
1331 /* Get len */
1332 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1333 /* regPtr -> array data */
1334 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1335 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1336 pcrLabel);
1337 oatFreeTemp(cUnit, regLen);
1338 } else {
1339 /* regPtr -> array data */
1340 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1341 }
1342 /* at this point, regPtr points to array, 2 live temps */
1343 if ((size == kLong) || (size == kDouble)) {
1344 //TODO: need specific wide routine that can handle fp regs
1345 if (scale) {
1346 int rNewIndex = oatAllocTemp(cUnit);
1347 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1348 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1349 oatFreeTemp(cUnit, rNewIndex);
1350 } else {
1351 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1352 }
1353 rlSrc = loadValueWide(cUnit, rlSrc, regClass);
1354
1355 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
1356
1357 oatFreeTemp(cUnit, regPtr);
1358 } else {
1359 rlSrc = loadValue(cUnit, rlSrc, regClass);
1360
1361 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1362 scale, size);
1363 }
1364}
1365
1366static bool genShiftOpLong(CompilationUnit* cUnit, MIR* mir,
1367 RegLocation rlDest, RegLocation rlSrc1,
1368 RegLocation rlShift)
1369{
buzbee54330722011-08-23 16:46:55 -07001370 int funcOffset;
buzbee67bf8852011-08-17 17:51:35 -07001371
buzbee67bf8852011-08-17 17:51:35 -07001372 switch( mir->dalvikInsn.opcode) {
1373 case OP_SHL_LONG:
1374 case OP_SHL_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001375 funcOffset = OFFSETOF_MEMBER(Thread, pShlLong);
buzbee67bf8852011-08-17 17:51:35 -07001376 break;
1377 case OP_SHR_LONG:
1378 case OP_SHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001379 funcOffset = OFFSETOF_MEMBER(Thread, pShrLong);
buzbee67bf8852011-08-17 17:51:35 -07001380 break;
1381 case OP_USHR_LONG:
1382 case OP_USHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001383 funcOffset = OFFSETOF_MEMBER(Thread, pUshrLong);
buzbee67bf8852011-08-17 17:51:35 -07001384 break;
1385 default:
buzbee54330722011-08-23 16:46:55 -07001386 LOG(FATAL) << "Unexpected case";
buzbee67bf8852011-08-17 17:51:35 -07001387 return true;
1388 }
buzbee54330722011-08-23 16:46:55 -07001389 oatFlushAllRegs(cUnit); /* Send everything to home location */
1390 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1391 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1392 loadValueDirect(cUnit, rlShift, r2);
1393 opReg(cUnit, kOpBlx, rLR);
1394 oatClobberCallRegs(cUnit);
1395 RegLocation rlResult = oatGetReturnWide(cUnit);
buzbee67bf8852011-08-17 17:51:35 -07001396 storeValueWide(cUnit, rlDest, rlResult);
1397 return false;
1398}
1399
1400static bool genArithOpLong(CompilationUnit* cUnit, MIR* mir,
1401 RegLocation rlDest, RegLocation rlSrc1,
1402 RegLocation rlSrc2)
1403{
1404 RegLocation rlResult;
1405 OpKind firstOp = kOpBkpt;
1406 OpKind secondOp = kOpBkpt;
1407 bool callOut = false;
1408 int funcOffset;
1409 int retReg = r0;
1410
1411 switch (mir->dalvikInsn.opcode) {
1412 case OP_NOT_LONG:
1413 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1414 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1415 opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg);
1416 opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg);
1417 storeValueWide(cUnit, rlDest, rlResult);
1418 return false;
1419 break;
1420 case OP_ADD_LONG:
1421 case OP_ADD_LONG_2ADDR:
1422 firstOp = kOpAdd;
1423 secondOp = kOpAdc;
1424 break;
1425 case OP_SUB_LONG:
1426 case OP_SUB_LONG_2ADDR:
1427 firstOp = kOpSub;
1428 secondOp = kOpSbc;
1429 break;
1430 case OP_MUL_LONG:
1431 case OP_MUL_LONG_2ADDR:
buzbee439c4fa2011-08-27 15:59:07 -07001432 callOut = true;
1433 retReg = r0;
1434 funcOffset = OFFSETOF_MEMBER(Thread, pLmul);
1435 break;
buzbee67bf8852011-08-17 17:51:35 -07001436 case OP_DIV_LONG:
1437 case OP_DIV_LONG_2ADDR:
1438 callOut = true;
1439 retReg = r0;
1440 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1441 break;
1442 /* NOTE - result is in r2/r3 instead of r0/r1 */
1443 case OP_REM_LONG:
1444 case OP_REM_LONG_2ADDR:
1445 callOut = true;
1446 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1447 retReg = r2;
1448 break;
1449 case OP_AND_LONG_2ADDR:
1450 case OP_AND_LONG:
1451 firstOp = kOpAnd;
1452 secondOp = kOpAnd;
1453 break;
1454 case OP_OR_LONG:
1455 case OP_OR_LONG_2ADDR:
1456 firstOp = kOpOr;
1457 secondOp = kOpOr;
1458 break;
1459 case OP_XOR_LONG:
1460 case OP_XOR_LONG_2ADDR:
1461 firstOp = kOpXor;
1462 secondOp = kOpXor;
1463 break;
1464 case OP_NEG_LONG: {
1465 //TUNING: can improve this using Thumb2 code
1466 int tReg = oatAllocTemp(cUnit);
1467 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1468 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1469 loadConstantNoClobber(cUnit, tReg, 0);
1470 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1471 tReg, rlSrc2.lowReg);
1472 opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
1473 genRegCopy(cUnit, rlResult.highReg, tReg);
1474 storeValueWide(cUnit, rlDest, rlResult);
1475 return false;
1476 }
1477 default:
1478 LOG(FATAL) << "Invalid long arith op";
1479 }
1480 if (!callOut) {
1481 genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2);
1482 } else {
1483 // Adjust return regs in to handle case of rem returning r2/r3
1484 oatFlushAllRegs(cUnit); /* Send everything to home location */
1485 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1486 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1487 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1488 opReg(cUnit, kOpBlx, rLR);
1489 oatClobberCallRegs(cUnit);
1490 if (retReg == r0)
1491 rlResult = oatGetReturnWide(cUnit);
1492 else
1493 rlResult = oatGetReturnWideAlt(cUnit);
1494 storeValueWide(cUnit, rlDest, rlResult);
1495 }
1496 return false;
1497}
1498
1499static bool genArithOpInt(CompilationUnit* cUnit, MIR* mir,
1500 RegLocation rlDest, RegLocation rlSrc1,
1501 RegLocation rlSrc2)
1502{
1503 OpKind op = kOpBkpt;
1504 bool callOut = false;
1505 bool checkZero = false;
1506 bool unary = false;
1507 int retReg = r0;
1508 int funcOffset;
1509 RegLocation rlResult;
1510 bool shiftOp = false;
1511
1512 switch (mir->dalvikInsn.opcode) {
1513 case OP_NEG_INT:
1514 op = kOpNeg;
1515 unary = true;
1516 break;
1517 case OP_NOT_INT:
1518 op = kOpMvn;
1519 unary = true;
1520 break;
1521 case OP_ADD_INT:
1522 case OP_ADD_INT_2ADDR:
1523 op = kOpAdd;
1524 break;
1525 case OP_SUB_INT:
1526 case OP_SUB_INT_2ADDR:
1527 op = kOpSub;
1528 break;
1529 case OP_MUL_INT:
1530 case OP_MUL_INT_2ADDR:
1531 op = kOpMul;
1532 break;
1533 case OP_DIV_INT:
1534 case OP_DIV_INT_2ADDR:
1535 callOut = true;
1536 checkZero = true;
1537 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1538 retReg = r0;
1539 break;
1540 /* NOTE: returns in r1 */
1541 case OP_REM_INT:
1542 case OP_REM_INT_2ADDR:
1543 callOut = true;
1544 checkZero = true;
1545 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1546 retReg = r1;
1547 break;
1548 case OP_AND_INT:
1549 case OP_AND_INT_2ADDR:
1550 op = kOpAnd;
1551 break;
1552 case OP_OR_INT:
1553 case OP_OR_INT_2ADDR:
1554 op = kOpOr;
1555 break;
1556 case OP_XOR_INT:
1557 case OP_XOR_INT_2ADDR:
1558 op = kOpXor;
1559 break;
1560 case OP_SHL_INT:
1561 case OP_SHL_INT_2ADDR:
1562 shiftOp = true;
1563 op = kOpLsl;
1564 break;
1565 case OP_SHR_INT:
1566 case OP_SHR_INT_2ADDR:
1567 shiftOp = true;
1568 op = kOpAsr;
1569 break;
1570 case OP_USHR_INT:
1571 case OP_USHR_INT_2ADDR:
1572 shiftOp = true;
1573 op = kOpLsr;
1574 break;
1575 default:
1576 LOG(FATAL) << "Invalid word arith op: " <<
1577 (int)mir->dalvikInsn.opcode;
1578 }
1579 if (!callOut) {
1580 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
1581 if (unary) {
1582 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1583 opRegReg(cUnit, op, rlResult.lowReg,
1584 rlSrc1.lowReg);
1585 } else {
1586 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
1587 if (shiftOp) {
1588 int tReg = oatAllocTemp(cUnit);
1589 opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31);
1590 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1591 opRegRegReg(cUnit, op, rlResult.lowReg,
1592 rlSrc1.lowReg, tReg);
1593 oatFreeTemp(cUnit, tReg);
1594 } else {
1595 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1596 opRegRegReg(cUnit, op, rlResult.lowReg,
1597 rlSrc1.lowReg, rlSrc2.lowReg);
1598 }
1599 }
1600 storeValue(cUnit, rlDest, rlResult);
1601 } else {
1602 RegLocation rlResult;
1603 oatFlushAllRegs(cUnit); /* Send everything to home location */
1604 loadValueDirectFixed(cUnit, rlSrc2, r1);
1605 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1606 loadValueDirectFixed(cUnit, rlSrc1, r0);
1607 if (checkZero) {
1608 genNullCheck(cUnit, rlSrc2.sRegLow, r1, mir->offset, NULL);
1609 }
1610 opReg(cUnit, kOpBlx, rLR);
1611 oatClobberCallRegs(cUnit);
1612 if (retReg == r0)
1613 rlResult = oatGetReturn(cUnit);
1614 else
1615 rlResult = oatGetReturnAlt(cUnit);
1616 storeValue(cUnit, rlDest, rlResult);
1617 }
1618 return false;
1619}
1620
buzbee67bf8852011-08-17 17:51:35 -07001621/*
1622 * Fetch *self->info.breakFlags. If the breakFlags are non-zero,
1623 * punt to the interpreter.
1624 */
1625static void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
1626{
1627 UNIMPLEMENTED(WARNING);
1628#if 0
1629 int rTemp = oatAllocTemp(cUnit);
1630 ArmLIR* ld;
1631 ld = loadBaseDisp(cUnit, NULL, rSELF,
1632 offsetof(Thread, interpBreak.ctl.breakFlags),
1633 rTemp, kUnsignedByte, INVALID_SREG);
1634 setMemRefType(ld, true /* isLoad */, kMustNotAlias);
1635 genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
1636#endif
1637}
1638
1639/*
1640 * The following are the first-level codegen routines that analyze the format
1641 * of each bytecode then either dispatch special purpose codegen routines
1642 * or produce corresponding Thumb instructions directly.
1643 */
1644
1645static bool isPowerOfTwo(int x)
1646{
1647 return (x & (x - 1)) == 0;
1648}
1649
1650// Returns true if no more than two bits are set in 'x'.
1651static bool isPopCountLE2(unsigned int x)
1652{
1653 x &= x - 1;
1654 return (x & (x - 1)) == 0;
1655}
1656
1657// Returns the index of the lowest set bit in 'x'.
1658static int lowestSetBit(unsigned int x) {
1659 int bit_posn = 0;
1660 while ((x & 0xf) == 0) {
1661 bit_posn += 4;
1662 x >>= 4;
1663 }
1664 while ((x & 1) == 0) {
1665 bit_posn++;
1666 x >>= 1;
1667 }
1668 return bit_posn;
1669}
1670
1671// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit'
1672// and store the result in 'rlDest'.
1673static bool handleEasyDivide(CompilationUnit* cUnit, Opcode dalvikOpcode,
1674 RegLocation rlSrc, RegLocation rlDest, int lit)
1675{
1676 if (lit < 2 || !isPowerOfTwo(lit)) {
1677 return false;
1678 }
1679 int k = lowestSetBit(lit);
1680 if (k >= 30) {
1681 // Avoid special cases.
1682 return false;
1683 }
1684 bool div = (dalvikOpcode == OP_DIV_INT_LIT8 ||
1685 dalvikOpcode == OP_DIV_INT_LIT16);
1686 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1687 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1688 if (div) {
1689 int tReg = oatAllocTemp(cUnit);
1690 if (lit == 2) {
1691 // Division by 2 is by far the most common division by constant.
1692 opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
1693 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1694 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1695 } else {
1696 opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
1697 opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
1698 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1699 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1700 }
1701 } else {
1702 int cReg = oatAllocTemp(cUnit);
1703 loadConstant(cUnit, cReg, lit - 1);
1704 int tReg1 = oatAllocTemp(cUnit);
1705 int tReg2 = oatAllocTemp(cUnit);
1706 if (lit == 2) {
1707 opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
1708 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1709 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1710 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1711 } else {
1712 opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
1713 opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
1714 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1715 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1716 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1717 }
1718 }
1719 storeValue(cUnit, rlDest, rlResult);
1720 return true;
1721}
1722
1723// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
1724// and store the result in 'rlDest'.
1725static bool handleEasyMultiply(CompilationUnit* cUnit,
1726 RegLocation rlSrc, RegLocation rlDest, int lit)
1727{
1728 // Can we simplify this multiplication?
1729 bool powerOfTwo = false;
1730 bool popCountLE2 = false;
1731 bool powerOfTwoMinusOne = false;
1732 if (lit < 2) {
1733 // Avoid special cases.
1734 return false;
1735 } else if (isPowerOfTwo(lit)) {
1736 powerOfTwo = true;
1737 } else if (isPopCountLE2(lit)) {
1738 popCountLE2 = true;
1739 } else if (isPowerOfTwo(lit + 1)) {
1740 powerOfTwoMinusOne = true;
1741 } else {
1742 return false;
1743 }
1744 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1745 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1746 if (powerOfTwo) {
1747 // Shift.
1748 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg,
1749 lowestSetBit(lit));
1750 } else if (popCountLE2) {
1751 // Shift and add and shift.
1752 int firstBit = lowestSetBit(lit);
1753 int secondBit = lowestSetBit(lit ^ (1 << firstBit));
1754 genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit,
1755 firstBit, secondBit);
1756 } else {
1757 // Reverse subtract: (src << (shift + 1)) - src.
1758 assert(powerOfTwoMinusOne);
1759 // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
1760 int tReg = oatAllocTemp(cUnit);
1761 opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
1762 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
1763 }
1764 storeValue(cUnit, rlDest, rlResult);
1765 return true;
1766}
1767
1768static bool genArithOpIntLit(CompilationUnit* cUnit, MIR* mir,
1769 RegLocation rlDest, RegLocation rlSrc,
1770 int lit)
1771{
1772 Opcode dalvikOpcode = mir->dalvikInsn.opcode;
1773 RegLocation rlResult;
1774 OpKind op = (OpKind)0; /* Make gcc happy */
1775 int shiftOp = false;
1776 bool isDiv = false;
1777 int funcOffset;
1778
1779 switch (dalvikOpcode) {
1780 case OP_RSUB_INT_LIT8:
1781 case OP_RSUB_INT: {
1782 int tReg;
1783 //TUNING: add support for use of Arm rsub op
1784 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1785 tReg = oatAllocTemp(cUnit);
1786 loadConstant(cUnit, tReg, lit);
1787 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1788 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1789 tReg, rlSrc.lowReg);
1790 storeValue(cUnit, rlDest, rlResult);
1791 return false;
1792 break;
1793 }
1794
1795 case OP_ADD_INT_LIT8:
1796 case OP_ADD_INT_LIT16:
1797 op = kOpAdd;
1798 break;
1799 case OP_MUL_INT_LIT8:
1800 case OP_MUL_INT_LIT16: {
1801 if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) {
1802 return false;
1803 }
1804 op = kOpMul;
1805 break;
1806 }
1807 case OP_AND_INT_LIT8:
1808 case OP_AND_INT_LIT16:
1809 op = kOpAnd;
1810 break;
1811 case OP_OR_INT_LIT8:
1812 case OP_OR_INT_LIT16:
1813 op = kOpOr;
1814 break;
1815 case OP_XOR_INT_LIT8:
1816 case OP_XOR_INT_LIT16:
1817 op = kOpXor;
1818 break;
1819 case OP_SHL_INT_LIT8:
1820 lit &= 31;
1821 shiftOp = true;
1822 op = kOpLsl;
1823 break;
1824 case OP_SHR_INT_LIT8:
1825 lit &= 31;
1826 shiftOp = true;
1827 op = kOpAsr;
1828 break;
1829 case OP_USHR_INT_LIT8:
1830 lit &= 31;
1831 shiftOp = true;
1832 op = kOpLsr;
1833 break;
1834
1835 case OP_DIV_INT_LIT8:
1836 case OP_DIV_INT_LIT16:
1837 case OP_REM_INT_LIT8:
1838 case OP_REM_INT_LIT16:
1839 if (lit == 0) {
1840 UNIMPLEMENTED(FATAL);
1841 // FIXME: generate an explicit throw here
1842 return false;
1843 }
1844 if (handleEasyDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit)) {
1845 return false;
1846 }
1847 oatFlushAllRegs(cUnit); /* Everything to home location */
1848 loadValueDirectFixed(cUnit, rlSrc, r0);
1849 oatClobber(cUnit, r0);
1850 if ((dalvikOpcode == OP_DIV_INT_LIT8) ||
1851 (dalvikOpcode == OP_DIV_INT_LIT16)) {
1852 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1853 isDiv = true;
1854 } else {
1855 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1856 isDiv = false;
1857 }
1858 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1859 loadConstant(cUnit, r1, lit);
1860 opReg(cUnit, kOpBlx, rLR);
1861 oatClobberCallRegs(cUnit);
1862 if (isDiv)
1863 rlResult = oatGetReturn(cUnit);
1864 else
1865 rlResult = oatGetReturnAlt(cUnit);
1866 storeValue(cUnit, rlDest, rlResult);
1867 return false;
1868 break;
1869 default:
1870 return true;
1871 }
1872 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1873 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1874 // Avoid shifts by literal 0 - no support in Thumb. Change to copy
1875 if (shiftOp && (lit == 0)) {
1876 genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
1877 } else {
1878 opRegRegImm(cUnit, op, rlResult.lowReg, rlSrc.lowReg, lit);
1879 }
1880 storeValue(cUnit, rlDest, rlResult);
1881 return false;
1882}
1883
1884/* Architectural-specific debugging helpers go here */
1885void oatArchDump(void)
1886{
1887 /* Print compiled opcode in this VM instance */
1888 int i, start, streak;
1889 char buf[1024];
1890
1891 streak = i = 0;
1892 buf[0] = 0;
1893 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1894 i++;
1895 }
1896 if (i == kNumPackedOpcodes) {
1897 return;
1898 }
1899 for (start = i++, streak = 1; i < kNumPackedOpcodes; i++) {
1900 if (opcodeCoverage[i]) {
1901 streak++;
1902 } else {
1903 if (streak == 1) {
1904 sprintf(buf+strlen(buf), "%x,", start);
1905 } else {
1906 sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
1907 }
1908 streak = 0;
1909 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1910 i++;
1911 }
1912 if (i < kNumPackedOpcodes) {
1913 streak = 1;
1914 start = i;
1915 }
1916 }
1917 }
1918 if (streak) {
1919 if (streak == 1) {
1920 sprintf(buf+strlen(buf), "%x", start);
1921 } else {
1922 sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
1923 }
1924 }
1925 if (strlen(buf)) {
1926 LOG(INFO) << "dalvik.vm.oat.op = " << buf;
1927 }
1928}