blob: c9d72f709c8cedef40c87edd8f134acd32a36ba2 [file] [log] [blame]
buzbee67bf8852011-08-17 17:51:35 -07001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains codegen for the Thumb2 ISA and is intended to be
19 * includes by:
20 *
21 * Codegen-$(TARGET_ARCH_VARIANT).c
22 *
23 */
24
25/*
26 * Construct an s4 from two consecutive half-words of switch data.
27 * This needs to check endianness because the DEX optimizer only swaps
28 * half-words in instruction stream.
29 *
30 * "switchData" must be 32-bit aligned.
31 */
32#if __BYTE_ORDER == __LITTLE_ENDIAN
33static inline s4 s4FromSwitchData(const void* switchData) {
34 return *(s4*) switchData;
35}
36#else
37static inline s4 s4FromSwitchData(const void* switchData) {
38 u2* data = switchData;
39 return data[0] | (((s4) data[1]) << 16);
40}
41#endif
42
43/*
44 * Generate a Thumb2 IT instruction, which can nullify up to
45 * four subsequent instructions based on a condition and its
46 * inverse. The condition applies to the first instruction, which
47 * is executed if the condition is met. The string "guide" consists
48 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
49 * A "T" means the instruction is executed if the condition is
50 * met, and an "E" means the instruction is executed if the condition
51 * is not met.
52 */
53static ArmLIR* genIT(CompilationUnit* cUnit, ArmConditionCode code,
54 const char* guide)
55{
56 int mask;
57 int condBit = code & 1;
58 int altBit = condBit ^ 1;
59 int mask3 = 0;
60 int mask2 = 0;
61 int mask1 = 0;
62
63 //Note: case fallthroughs intentional
64 switch(strlen(guide)) {
65 case 3:
66 mask1 = (guide[2] == 'T') ? condBit : altBit;
67 case 2:
68 mask2 = (guide[1] == 'T') ? condBit : altBit;
69 case 1:
70 mask3 = (guide[0] == 'T') ? condBit : altBit;
71 break;
72 case 0:
73 break;
74 default:
75 LOG(FATAL) << "OAT: bad case in genIT";
76 }
77 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
78 (1 << (3 - strlen(guide)));
79 return newLIR2(cUnit, kThumb2It, code, mask);
80}
81
82/*
83 * Insert a kArmPseudoCaseLabel at the beginning of the Dalvik
84 * offset vaddr. This label will be used to fix up the case
85 * branch table during the assembly phase. Be sure to set
86 * all resource flags on this to prevent code motion across
87 * target boundaries. KeyVal is just there for debugging.
88 */
89static ArmLIR* insertCaseLabel(CompilationUnit* cUnit, int vaddr, int keyVal)
90{
91 ArmLIR* lir;
92 for (lir = (ArmLIR*)cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
93 if ((lir->opcode == kArmPseudoDalvikByteCodeBoundary) &&
94 (lir->generic.dalvikOffset == vaddr)) {
95 ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
96 newLabel->generic.dalvikOffset = vaddr;
97 newLabel->opcode = kArmPseudoCaseLabel;
98 newLabel->operands[0] = keyVal;
99 oatInsertLIRAfter((LIR*)lir, (LIR*)newLabel);
100 return newLabel;
101 }
102 }
103 oatCodegenDump(cUnit);
104 LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
105 return NULL; // Quiet gcc
106}
107
108static void markPackedCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
109{
110 const u2* table = tabRec->table;
111 int baseVaddr = tabRec->vaddr;
112 int *targets = (int*)&table[4];
113 int entries = table[1];
114 int lowKey = s4FromSwitchData(&table[2]);
115 for (int i = 0; i < entries; i++) {
116 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
117 i + lowKey);
118 }
119}
120
121static void markSparseCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
122{
123 const u2* table = tabRec->table;
124 int baseVaddr = tabRec->vaddr;
125 int entries = table[1];
126 int* keys = (int*)&table[2];
127 int* targets = &keys[entries];
128 for (int i = 0; i < entries; i++) {
129 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
130 keys[i]);
131 }
132}
133
134void oatProcessSwitchTables(CompilationUnit* cUnit)
135{
136 GrowableListIterator iterator;
137 oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
138 while (true) {
139 SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
140 &iterator);
141 if (tabRec == NULL) break;
142 if (tabRec->table[0] == kPackedSwitchSignature)
143 markPackedCaseLabels(cUnit, tabRec);
144 else if (tabRec->table[0] == kSparseSwitchSignature)
145 markSparseCaseLabels(cUnit, tabRec);
146 else {
147 LOG(FATAL) << "Invalid switch table";
148 }
149 }
150}
151
152static void dumpSparseSwitchTable(const u2* table)
153 /*
154 * Sparse switch data format:
155 * ushort ident = 0x0200 magic value
156 * ushort size number of entries in the table; > 0
157 * int keys[size] keys, sorted low-to-high; 32-bit aligned
158 * int targets[size] branch targets, relative to switch opcode
159 *
160 * Total size is (2+size*4) 16-bit code units.
161 */
162{
163 u2 ident = table[0];
164 int entries = table[1];
165 int* keys = (int*)&table[2];
166 int* targets = &keys[entries];
167 LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident <<
168 ", entries: " << std::dec << entries;
169 for (int i = 0; i < entries; i++) {
170 LOG(INFO) << " Key[" << keys[i] << "] -> 0x" << std::hex <<
171 targets[i];
172 }
173}
174
175static void dumpPackedSwitchTable(const u2* table)
176 /*
177 * Packed switch data format:
178 * ushort ident = 0x0100 magic value
179 * ushort size number of entries in the table
180 * int first_key first (and lowest) switch case value
181 * int targets[size] branch targets, relative to switch opcode
182 *
183 * Total size is (4+size*2) 16-bit code units.
184 */
185{
186 u2 ident = table[0];
187 int* targets = (int*)&table[4];
188 int entries = table[1];
189 int lowKey = s4FromSwitchData(&table[2]);
190 LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident <<
191 ", entries: " << std::dec << entries << ", lowKey: " << lowKey;
192 for (int i = 0; i < entries; i++) {
193 LOG(INFO) << " Key[" << (i + lowKey) << "] -> 0x" << std::hex <<
194 targets[i];
195 }
196}
197
198/*
199 * The sparse table in the literal pool is an array of <key,displacement>
200 * pairs. For each set, we'll load them as a pair using ldmia.
201 * This means that the register number of the temp we use for the key
202 * must be lower than the reg for the displacement.
203 *
204 * The test loop will look something like:
205 *
206 * adr rBase, <table>
207 * ldr rVal, [rSP, vRegOff]
208 * mov rIdx, #tableSize
209 * lp:
210 * ldmia rBase!, {rKey, rDisp}
211 * sub rIdx, #1
212 * cmp rVal, rKey
213 * ifeq
214 * add rPC, rDisp ; This is the branch from which we compute displacement
215 * cbnz rIdx, lp
216 */
217static void genSparseSwitch(CompilationUnit* cUnit, MIR* mir,
218 RegLocation rlSrc)
219{
220 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
221 if (cUnit->printMe) {
222 dumpSparseSwitchTable(table);
223 }
224 // Add the table to the list - we'll process it later
225 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
226 true);
227 tabRec->table = table;
228 tabRec->vaddr = mir->offset;
229 int size = table[1];
230 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
231 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
232
233 // Get the switch value
234 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
235 int rBase = oatAllocTemp(cUnit);
236 /* Allocate key and disp temps */
237 int rKey = oatAllocTemp(cUnit);
238 int rDisp = oatAllocTemp(cUnit);
239 // Make sure rKey's register number is less than rDisp's number for ldmia
240 if (rKey > rDisp) {
241 int tmp = rDisp;
242 rDisp = rKey;
243 rKey = tmp;
244 }
245 // Materialize a pointer to the switch table
246 newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec);
247 // Set up rIdx
248 int rIdx = oatAllocTemp(cUnit);
249 loadConstant(cUnit, rIdx, size);
250 // Establish loop branch target
251 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
252 target->defMask = ENCODE_ALL;
253 // Load next key/disp
254 newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
255 opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
256 // Go if match. NOTE: No instruction set switch here - must stay Thumb2
257 genIT(cUnit, kArmCondEq, "");
258 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
259 tabRec->bxInst = switchBranch;
260 // Needs to use setflags encoding here
261 newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
262 ArmLIR* branch = opCondBranch(cUnit, kArmCondNe);
263 branch->generic.target = (LIR*)target;
264}
265
266
267static void genPackedSwitch(CompilationUnit* cUnit, MIR* mir,
268 RegLocation rlSrc)
269{
270 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
271 if (cUnit->printMe) {
272 dumpPackedSwitchTable(table);
273 }
274 // Add the table to the list - we'll process it later
275 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
276 true);
277 tabRec->table = table;
278 tabRec->vaddr = mir->offset;
279 int size = table[1];
280 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
281 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
282
283 // Get the switch value
284 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
285 int tableBase = oatAllocTemp(cUnit);
286 // Materialize a pointer to the switch table
287 newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec);
288 int lowKey = s4FromSwitchData(&table[2]);
289 int keyReg;
290 // Remove the bias, if necessary
291 if (lowKey == 0) {
292 keyReg = rlSrc.lowReg;
293 } else {
294 keyReg = oatAllocTemp(cUnit);
295 opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
296 }
297 // Bounds check - if < 0 or >= size continue following switch
298 opRegImm(cUnit, kOpCmp, keyReg, size-1);
299 ArmLIR* branchOver = opCondBranch(cUnit, kArmCondHi);
300
301 // Load the displacement from the switch table
302 int dispReg = oatAllocTemp(cUnit);
303 loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
304
305 // ..and go! NOTE: No instruction set switch here - must stay Thumb2
306 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
307 tabRec->bxInst = switchBranch;
308
309 /* branchOver target here */
310 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
311 target->defMask = ENCODE_ALL;
312 branchOver->generic.target = (LIR*)target;
313}
314
315/*
316 * Array data table format:
317 * ushort ident = 0x0300 magic value
318 * ushort width width of each element in the table
319 * uint size number of elements in the table
320 * ubyte data[size*width] table of data values (may contain a single-byte
321 * padding at the end)
322 *
323 * Total size is 4+(width * size + 1)/2 16-bit code units.
324 */
325static void genFillArrayData(CompilationUnit* cUnit, MIR* mir,
326 RegLocation rlSrc)
327{
328 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
329 // Add the table to the list - we'll process it later
330 FillArrayData *tabRec = (FillArrayData *)
331 oatNew(sizeof(FillArrayData), true);
332 tabRec->table = table;
333 tabRec->vaddr = mir->offset;
334 u2 width = tabRec->table[1];
335 u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16);
336 tabRec->size = (size * width) + 8;
337
338 oatInsertGrowableList(&cUnit->fillArrayData, (intptr_t)tabRec);
339
340 // Making a call - use explicit registers
341 oatFlushAllRegs(cUnit); /* Everything to home location */
342 loadValueDirectFixed(cUnit, rlSrc, r0);
343 loadWordDisp(cUnit, rSELF,
344 OFFSETOF_MEMBER(Thread, pArtHandleFillArrayDataNoThrow), rLR);
345 // Materialize a pointer to the switch table
346 newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec);
347 opReg(cUnit, kOpBlx, rLR);
348 oatClobberCallRegs(cUnit);
349}
350
351/*
352 * Mark garbage collection card. Skip if the value we're storing is null.
353 */
354static void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
355{
356 int regCardBase = oatAllocTemp(cUnit);
357 int regCardNo = oatAllocTemp(cUnit);
358 ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
359 loadWordDisp(cUnit, rSELF, offsetof(Thread, cardTable),
360 regCardBase);
361 opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
362 storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
363 kUnsignedByte);
364 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
365 target->defMask = ENCODE_ALL;
366 branchOver->generic.target = (LIR*)target;
367 oatFreeTemp(cUnit, regCardBase);
368 oatFreeTemp(cUnit, regCardNo);
369}
370
371static void genIGetX(CompilationUnit* cUnit, MIR* mir, OpSize size,
372 RegLocation rlDest, RegLocation rlObj)
373{
374 Field* fieldPtr =
375 cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
376 if (fieldPtr == NULL) {
377 /*
378 * With current scheme, we should never be in a situation
379 * in which the fieldPtr is null here. If something changes
380 * and we need to handle it, generate code to load the field
381 * pointer at run-time.
382 */
383 LOG(FATAL) << "Unexpected null field pointer";
384 }
385#if ANDROID_SMP != 0
386 bool isVolatile = dvmIsVolatileField(fieldPtr);
387#else
388 bool isVolatile = false;
389#endif
390 int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
391 RegLocation rlResult;
392 RegisterClass regClass = oatRegClassBySize(size);
393 rlObj = loadValue(cUnit, rlObj, kCoreReg);
394 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
395 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
396 NULL);/* null object? */
397 loadBaseDisp(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg,
398 size, rlObj.sRegLow);
399 if (isVolatile) {
400 oatGenMemBarrier(cUnit, kSY);
401 }
402
403 storeValue(cUnit, rlDest, rlResult);
404}
405
406static void genIPutX(CompilationUnit* cUnit, MIR* mir, OpSize size,
407 RegLocation rlSrc, RegLocation rlObj, bool isObject)
408{
409 Field* fieldPtr =
410 cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
411 if (fieldPtr == NULL) {
412 /*
413 * With current scheme, we should never be in a situation
414 * in which the fieldPtr is null here. If something changes
415 * and we need to handle it, generate code to load the field
416 * pointer at run-time.
417 */
418 LOG(FATAL) << "Unexpected null field pointer";
419 }
420#if ANDROID_SMP != 0
421 bool isVolatile = dvmIsVolatileField(fieldPtr);
422#else
423 bool isVolatile = false;
424#endif
425 int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
426 RegisterClass regClass = oatRegClassBySize(size);
427 rlObj = loadValue(cUnit, rlObj, kCoreReg);
428 rlSrc = loadValue(cUnit, rlSrc, regClass);
429 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
430 NULL);/* null object? */
431
432 if (isVolatile) {
433 oatGenMemBarrier(cUnit, kSY);
434 }
435 storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
436 if (isObject) {
437 /* NOTE: marking card based on object head */
438 markGCCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
439 }
440}
441
442static void genIGetWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
443 RegLocation rlObj)
444{
445 Field* fieldPtr =
446 cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
447 if (fieldPtr == NULL) {
448 /*
449 * With current scheme, we should never be in a situation
450 * in which the fieldPtr is null here. If something changes
451 * and we need to handle it, generate code to load the field
452 * pointer at run-time.
453 */
454 LOG(FATAL) << "Unexpected null field pointer";
455 }
456#if ANDROID_SMP != 0
457 bool isVolatile = dvmIsVolatileField(fieldPtr);
458#else
459 bool isVolatile = false;
460#endif
461 int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
462 RegLocation rlResult;
463 rlObj = loadValue(cUnit, rlObj, kCoreReg);
464 int regPtr = oatAllocTemp(cUnit);
465
466 assert(rlDest.wide);
467
468 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
469 NULL);/* null object? */
470 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
471 rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
472
473 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
474
475 if (isVolatile) {
476 oatGenMemBarrier(cUnit, kSY);
477 }
478
479 oatFreeTemp(cUnit, regPtr);
480 storeValueWide(cUnit, rlDest, rlResult);
481}
482
483static void genIPutWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
484 RegLocation rlObj)
485{
486 Field* fieldPtr =
487 cUnit->method->clazz->pDvmDex->pResFields[mir->dalvikInsn.vC];
488 if (fieldPtr == NULL) {
489 /*
490 * With current scheme, we should never be in a situation
491 * in which the fieldPtr is null here. If something changes
492 * and we need to handle it, generate code to load the field
493 * pointer at run-time.
494 */
495 LOG(FATAL) << "Unexpected null field pointer";
496 }
497#if ANDROID_SMP != 0
498 bool isVolatile = dvmIsVolatileField(fieldPtr);
499#else
500 bool isVolatile = false;
501#endif
502 int fieldOffset = ((InstField *)fieldPtr)->byteOffset;
503
504 rlObj = loadValue(cUnit, rlObj, kCoreReg);
505 int regPtr;
506 rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
507 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
508 NULL);/* null object? */
509 regPtr = oatAllocTemp(cUnit);
510 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
511
512 if (isVolatile) {
513 oatGenMemBarrier(cUnit, kSY);
514 }
515 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
516
517 oatFreeTemp(cUnit, regPtr);
518}
519
520static void genConstClass(CompilationUnit* cUnit, MIR* mir,
521 RegLocation rlDest, RegLocation rlSrc)
522{
523 void* classPtr = (void*)
524 (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
525
526 if (classPtr == NULL) {
527 LOG(FATAL) << "Unexpected null class pointer";
528 }
529
530 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
531 loadConstantNoClobber(cUnit, rlResult.lowReg, (int) classPtr );
532 storeValue(cUnit, rlDest, rlResult);
533}
534
535static void genConstString(CompilationUnit* cUnit, MIR* mir,
536 RegLocation rlDest, RegLocation rlSrc)
537{
538 void* strPtr = (void*)
539 (cUnit->method->clazz->pDvmDex->pResStrings[mir->dalvikInsn.vB]);
540
541 if (strPtr == NULL) {
542 /* Shouldn't happen */
543 LOG(FATAL) << "Unexpected null const string pointer";
544 }
545
546 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
547 loadConstantNoClobber(cUnit, rlResult.lowReg, (int) strPtr );
548 storeValue(cUnit, rlDest, rlResult);
549}
550
551static void genNewInstance(CompilationUnit* cUnit, MIR* mir,
552 RegLocation rlDest)
553{
554 ClassObject* classPtr = (ClassObject *)
555 (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
556
557 if (classPtr == NULL) {
558 /* Shouldn't happen */
559 LOG(FATAL) << "Unexpected null class pointer";
560 }
561
562 // Verifier should have already rejected abstract/interface
563 assert((classPtr->accessFlags & (ACC_INTERFACE|ACC_ABSTRACT)) == 0);
564 oatFlushAllRegs(cUnit); /* Everything to home location */
565 loadWordDisp(cUnit, rSELF,
566 OFFSETOF_MEMBER(Thread, pArtAllocObjectNoThrow), rLR);
567 loadConstant(cUnit, r0, (int) classPtr);
568 loadConstant(cUnit, r1, ALLOC_DONT_TRACK);
569 opReg(cUnit, kOpBlx, rLR);
570 oatClobberCallRegs(cUnit);
571 RegLocation rlResult = oatGetReturn(cUnit);
572 storeValue(cUnit, rlDest, rlResult);
573}
574
575void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
576{
577 loadWordDisp(cUnit, rSELF,
578 OFFSETOF_MEMBER(Thread, pArtAllocObjectNoThrow), rLR);
579 loadValueDirectFixed(cUnit, rlSrc, r1); /* Exception object */
580 genRegCopy(cUnit, r0, rSELF);
581 opReg(cUnit, kOpBlx, rLR);
582}
583
584static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
585 RegLocation rlSrc)
586{
587 // May generate a call - use explicit registers
588 RegLocation rlResult;
589 ClassObject* classPtr =
590 (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vC]);
591 if (classPtr == NULL) {
592 /* Shouldn't happen */
593 LOG(FATAL) << "Unexpected null class pointer";
594 }
595 oatFlushAllRegs(cUnit); /* Everything to home location */
596 loadValueDirectFixed(cUnit, rlSrc, r0); /* Ref */
597 loadConstant(cUnit, r2, (int) classPtr );
598 /* When taken r0 has NULL which can be used for store directly */
599 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, r0, 0);
600 /* r1 now contains object->clazz */
601 loadWordDisp(cUnit, r0, offsetof(Object, clazz), r1);
602 /* r1 now contains object->clazz */
603 loadWordDisp(cUnit, rSELF,
604 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivial), rLR);
605 loadConstant(cUnit, r0, 1); /* Assume true */
606 opRegReg(cUnit, kOpCmp, r1, r2);
607 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
608 genRegCopy(cUnit, r0, r1);
609 genRegCopy(cUnit, r1, r2);
610 opReg(cUnit, kOpBlx, rLR);
611 oatClobberCallRegs(cUnit);
612 /* branch target here */
613 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
614 target->defMask = ENCODE_ALL;
615 rlResult = oatGetReturn(cUnit);
616 storeValue(cUnit, rlDest, rlResult);
617 branch1->generic.target = (LIR*)target;
618 branch2->generic.target = (LIR*)target;
619}
620
621static void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
622{
623 ClassObject* classPtr =
624 (cUnit->method->clazz->pDvmDex->pResClasses[mir->dalvikInsn.vB]);
625 if (classPtr == NULL) {
626 /* Shouldn't happen with our current model */
627 LOG(FATAL) << "Unexpected null class pointer";
628 }
629 oatFlushAllRegs(cUnit); /* Everything to home location */
630 loadConstant(cUnit, r1, (int) classPtr );
631 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
632 /* Null? */
633 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq,
634 rlSrc.lowReg, 0);
635 /*
636 * rlSrc.lowReg now contains object->clazz. Note that
637 * it could have been allocated r0, but we're okay so long
638 * as we don't do anything desctructive until r0 is loaded
639 * with clazz.
640 */
641 /* r0 now contains object->clazz */
642 loadWordDisp(cUnit, rlSrc.lowReg, offsetof(Object, clazz), r0);
643 loadWordDisp(cUnit, rSELF,
644 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivialNoThrow), rLR);
645 opRegReg(cUnit, kOpCmp, r0, r1);
646 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
647 // Assume success - if not, artInstanceOfNonTrivial will handle throw
648 opReg(cUnit, kOpBlx, rLR);
649 oatClobberCallRegs(cUnit);
650 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
651 target->defMask = ENCODE_ALL;
652 branch1->generic.target = (LIR*)target;
653 branch2->generic.target = (LIR*)target;
654}
655
656static void genNegFloat(CompilationUnit* cUnit, RegLocation rlDest,
657 RegLocation rlSrc)
658{
659 RegLocation rlResult;
660 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
661 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
662 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
663 storeValue(cUnit, rlDest, rlResult);
664}
665
666static void genNegDouble(CompilationUnit* cUnit, RegLocation rlDest,
667 RegLocation rlSrc)
668{
669 RegLocation rlResult;
670 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
671 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
672 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
673 S2D(rlSrc.lowReg, rlSrc.highReg));
674 storeValueWide(cUnit, rlDest, rlResult);
675}
676
677/*
678 * To avoid possible conflicts, we use a lot of temps here. Note that
679 * our usage of Thumb2 instruction forms avoids the problems with register
680 * reuse for multiply instructions prior to arm6.
681 */
682static void genMulLong(CompilationUnit* cUnit, RegLocation rlDest,
683 RegLocation rlSrc1, RegLocation rlSrc2)
684{
685 RegLocation rlResult;
686 int resLo = oatAllocTemp(cUnit);
687 int resHi = oatAllocTemp(cUnit);
688 int tmp1 = oatAllocTemp(cUnit);
689
690 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
691 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
692
693 newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg);
694 newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg);
695 newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1);
696 newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0);
697 oatFreeTemp(cUnit, tmp1);
698
699 rlResult = oatGetReturnWide(cUnit);
700 rlResult.lowReg = resLo;
701 rlResult.highReg = resHi;
702 storeValueWide(cUnit, rlDest, rlResult);
703}
704
705static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
706 OpKind secondOp, RegLocation rlDest,
707 RegLocation rlSrc1, RegLocation rlSrc2)
708{
709 RegLocation rlResult;
710 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
711 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
712 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
713 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
714 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
715 rlSrc2.highReg);
716 storeValueWide(cUnit, rlDest, rlResult);
717}
718
719void oatInitializeRegAlloc(CompilationUnit* cUnit)
720{
721 int numRegs = sizeof(coreRegs)/sizeof(*coreRegs);
722 int numReserved = sizeof(reservedRegs)/sizeof(*reservedRegs);
723 int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
724 int numFPRegs = sizeof(fpRegs)/sizeof(*fpRegs);
725 int numFPTemps = sizeof(fpTemps)/sizeof(*fpTemps);
726 RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true);
727 cUnit->regPool = pool;
728 pool->numCoreRegs = numRegs;
729 pool->coreRegs = (RegisterInfo *)
730 oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true);
731 pool->numFPRegs = numFPRegs;
732 pool->FPRegs = (RegisterInfo *)
733 oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true);
734 oatInitPool(pool->coreRegs, coreRegs, pool->numCoreRegs);
735 oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
736 // Keep special registers from being allocated
737 for (int i = 0; i < numReserved; i++) {
738 oatMarkInUse(cUnit, reservedRegs[i]);
739 }
740 // Mark temp regs - all others not in use can be used for promotion
741 for (int i = 0; i < numTemps; i++) {
742 oatMarkTemp(cUnit, coreTemps[i]);
743 }
744 for (int i = 0; i < numFPTemps; i++) {
745 oatMarkTemp(cUnit, fpTemps[i]);
746 }
747 pool->nullCheckedRegs =
748 oatAllocBitVector(cUnit->numSSARegs, false);
749}
750
751/*
752 * Handle simple case (thin lock) inline. If it's complicated, bail
753 * out to the heavyweight lock/unlock routines. We'll use dedicated
754 * registers here in order to be in the right position in case we
755 * to bail to dvm[Lock/Unlock]Object(self, object)
756 *
757 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
758 * r1 -> object [arg1 for dvm[Lock/Unlock]Object
759 * r2 -> intial contents of object->lock, later result of strex
760 * r3 -> self->threadId
761 * r12 -> allow to be used by utilities as general temp
762 *
763 * The result of the strex is 0 if we acquire the lock.
764 *
765 * See comments in Sync.c for the layout of the lock word.
766 * Of particular interest to this code is the test for the
767 * simple case - which we handle inline. For monitor enter, the
768 * simple case is thin lock, held by no-one. For monitor exit,
769 * the simple case is thin lock, held by the unlocking thread with
770 * a recurse count of 0.
771 *
772 * A minor complication is that there is a field in the lock word
773 * unrelated to locking: the hash state. This field must be ignored, but
774 * preserved.
775 *
776 */
777static void genMonitorEnter(CompilationUnit* cUnit, MIR* mir,
778 RegLocation rlSrc)
779{
780 ArmLIR* target;
781 ArmLIR* hopTarget;
782 ArmLIR* branch;
783 ArmLIR* hopBranch;
784
785 oatFlushAllRegs(cUnit);
786 assert(LW_SHAPE_THIN == 0);
787 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
788 oatLockAllTemps(cUnit); // Prepare for explicit register usage
789 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
790 loadWordDisp(cUnit, rSELF, offsetof(Thread, threadId), r3); // Get threadId
791 newLIR3(cUnit, kThumb2Ldrex, r2, r1,
792 offsetof(Object, lock) >> 2); // Get object->lock
793 opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
794 // Is lock unheld on lock or held by us (==threadId) on unlock?
795 newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, LW_LOCK_OWNER_SHIFT - 1);
796 newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
797 LW_LOCK_OWNER_SHIFT - 1);
798 hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0);
799 newLIR4(cUnit, kThumb2Strex, r2, r3, r1, offsetof(Object, lock) >> 2);
800 oatGenMemBarrier(cUnit, kSY);
801 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
802
803 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
804 hopTarget->defMask = ENCODE_ALL;
805 hopBranch->generic.target = (LIR*)hopTarget;
806
807 // Go expensive route - artLockObjectNoThrow(self, obj);
808 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtLockObjectNoThrow),
809 rLR);
810 genRegCopy(cUnit, r0, rSELF);
811 newLIR1(cUnit, kThumbBlxR, rLR);
812
813 // Resume here
814 target = newLIR0(cUnit, kArmPseudoTargetLabel);
815 target->defMask = ENCODE_ALL;
816 branch->generic.target = (LIR*)target;
817}
818
819/*
820 * For monitor unlock, we don't have to use ldrex/strex. Once
821 * we've determined that the lock is thin and that we own it with
822 * a zero recursion count, it's safe to punch it back to the
823 * initial, unlock thin state with a store word.
824 */
825static void genMonitorExit(CompilationUnit* cUnit, MIR* mir,
826 RegLocation rlSrc)
827{
828 ArmLIR* target;
829 ArmLIR* branch;
830 ArmLIR* hopTarget;
831 ArmLIR* hopBranch;
832
833 assert(LW_SHAPE_THIN == 0);
834 oatFlushAllRegs(cUnit);
835 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
836 oatLockAllTemps(cUnit); // Prepare for explicit register usage
837 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
838 loadWordDisp(cUnit, r1, offsetof(Object, lock), r2); // Get object->lock
839 loadWordDisp(cUnit, rSELF, offsetof(Thread, threadId), r3); // Get threadId
840 // Is lock unheld on lock or held by us (==threadId) on unlock?
841 opRegRegImm(cUnit, kOpAnd, r12, r2,
842 (LW_HASH_STATE_MASK << LW_HASH_STATE_SHIFT));
843 opRegImm(cUnit, kOpLsl, r3, LW_LOCK_OWNER_SHIFT); // Align owner
844 newLIR3(cUnit, kThumb2Bfc, r2, LW_HASH_STATE_SHIFT,
845 LW_LOCK_OWNER_SHIFT - 1);
846 opRegReg(cUnit, kOpSub, r2, r3);
847 hopBranch = opCondBranch(cUnit, kArmCondNe);
848 oatGenMemBarrier(cUnit, kSY);
849 storeWordDisp(cUnit, r1, offsetof(Object, lock), r12);
850 branch = opNone(cUnit, kOpUncondBr);
851
852 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
853 hopTarget->defMask = ENCODE_ALL;
854 hopBranch->generic.target = (LIR*)hopTarget;
855
856 // Go expensive route - artUnlockObjectNoThrow(self, obj);
857 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtUnlockObjectNoThrow),
858 rLR);
859 genRegCopy(cUnit, r0, rSELF);
860 newLIR1(cUnit, kThumbBlxR, rLR);
861
862 // Resume here
863 target = newLIR0(cUnit, kArmPseudoTargetLabel);
864 target->defMask = ENCODE_ALL;
865 branch->generic.target = (LIR*)target;
866}
867
868/*
869 * 64-bit 3way compare function.
870 * mov rX, #-1
871 * cmp op1hi, op2hi
872 * blt done
873 * bgt flip
874 * sub rX, op1lo, op2lo (treat as unsigned)
875 * beq done
876 * ite hi
877 * mov(hi) rX, #-1
878 * mov(!hi) rX, #1
879 * flip:
880 * neg rX
881 * done:
882 */
883static void genCmpLong(CompilationUnit* cUnit, MIR* mir,
884 RegLocation rlDest, RegLocation rlSrc1,
885 RegLocation rlSrc2)
886{
887 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
888 ArmLIR* target1;
889 ArmLIR* target2;
890 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
891 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
892 rlTemp.lowReg = oatAllocTemp(cUnit);
893 loadConstant(cUnit, rlTemp.lowReg, -1);
894 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
895 ArmLIR* branch1 = opCondBranch(cUnit, kArmCondLt);
896 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondGt);
897 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
898 ArmLIR* branch3 = opCondBranch(cUnit, kArmCondEq);
899
900 genIT(cUnit, kArmCondHi, "E");
901 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
902 loadConstant(cUnit, rlTemp.lowReg, 1);
903 genBarrier(cUnit);
904
905 target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
906 target2->defMask = -1;
907 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
908
909 target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
910 target1->defMask = -1;
911
912 storeValue(cUnit, rlDest, rlTemp);
913
914 branch1->generic.target = (LIR*)target1;
915 branch2->generic.target = (LIR*)target2;
916 branch3->generic.target = branch1->generic.target;
917}
918
919static void genMultiplyByTwoBitMultiplier(CompilationUnit* cUnit,
920 RegLocation rlSrc, RegLocation rlResult, int lit,
921 int firstBit, int secondBit)
922{
923 opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
924 encodeShift(kArmLsl, secondBit - firstBit));
925 if (firstBit != 0) {
926 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
927 }
928}
929
930static bool genConversionCall(CompilationUnit* cUnit, MIR* mir, int funcOffset,
931 int srcSize, int tgtSize)
932{
933 /*
934 * Don't optimize the register usage since it calls out to support
935 * functions
936 */
937 RegLocation rlSrc;
938 RegLocation rlDest;
939 oatFlushAllRegs(cUnit); /* Send everything to home location */
940 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
941 if (srcSize == 1) {
942 rlSrc = oatGetSrc(cUnit, mir, 0);
943 loadValueDirectFixed(cUnit, rlSrc, r0);
944 } else {
945 rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
946 loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
947 }
948 opReg(cUnit, kOpBlx, rLR);
949 oatClobberCallRegs(cUnit);
950 if (tgtSize == 1) {
951 RegLocation rlResult;
952 rlDest = oatGetDest(cUnit, mir, 0);
953 rlResult = oatGetReturn(cUnit);
954 storeValue(cUnit, rlDest, rlResult);
955 } else {
956 RegLocation rlResult;
957 rlDest = oatGetDestWide(cUnit, mir, 0, 1);
958 rlResult = oatGetReturnWide(cUnit);
959 storeValueWide(cUnit, rlDest, rlResult);
960 }
961 return false;
962}
963
964static bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
965 RegLocation rlDest, RegLocation rlSrc1,
966 RegLocation rlSrc2)
967{
968 RegLocation rlResult;
969 int funcOffset;
970
971 switch (mir->dalvikInsn.opcode) {
972 case OP_ADD_FLOAT_2ADDR:
973 case OP_ADD_FLOAT:
974 funcOffset = OFFSETOF_MEMBER(Thread, pFadd);
975 break;
976 case OP_SUB_FLOAT_2ADDR:
977 case OP_SUB_FLOAT:
978 funcOffset = OFFSETOF_MEMBER(Thread, pFsub);
979 break;
980 case OP_DIV_FLOAT_2ADDR:
981 case OP_DIV_FLOAT:
982 funcOffset = OFFSETOF_MEMBER(Thread, pFdiv);
983 break;
984 case OP_MUL_FLOAT_2ADDR:
985 case OP_MUL_FLOAT:
986 funcOffset = OFFSETOF_MEMBER(Thread, pFmul);
987 break;
988 case OP_REM_FLOAT_2ADDR:
989 case OP_REM_FLOAT:
990 funcOffset = OFFSETOF_MEMBER(Thread, pFmodf);
991 break;
992 case OP_NEG_FLOAT: {
993 genNegFloat(cUnit, rlDest, rlSrc1);
994 return false;
995 }
996 default:
997 return true;
998 }
999 oatFlushAllRegs(cUnit); /* Send everything to home location */
1000 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1001 loadValueDirectFixed(cUnit, rlSrc1, r0);
1002 loadValueDirectFixed(cUnit, rlSrc2, r1);
1003 opReg(cUnit, kOpBlx, rLR);
1004 oatClobberCallRegs(cUnit);
1005 rlResult = oatGetReturn(cUnit);
1006 storeValue(cUnit, rlDest, rlResult);
1007 return false;
1008}
1009
1010static bool genArithOpDoublePortable(CompilationUnit* cUnit, MIR* mir,
1011 RegLocation rlDest, RegLocation rlSrc1,
1012 RegLocation rlSrc2)
1013{
1014 RegLocation rlResult;
1015 int funcOffset;
1016
1017 switch (mir->dalvikInsn.opcode) {
1018 case OP_ADD_DOUBLE_2ADDR:
1019 case OP_ADD_DOUBLE:
1020 funcOffset = OFFSETOF_MEMBER(Thread, pDadd);
1021 break;
1022 case OP_SUB_DOUBLE_2ADDR:
1023 case OP_SUB_DOUBLE:
1024 funcOffset = OFFSETOF_MEMBER(Thread, pDsub);
1025 break;
1026 case OP_DIV_DOUBLE_2ADDR:
1027 case OP_DIV_DOUBLE:
1028 funcOffset = OFFSETOF_MEMBER(Thread, pDdiv);
1029 break;
1030 case OP_MUL_DOUBLE_2ADDR:
1031 case OP_MUL_DOUBLE:
1032 funcOffset = OFFSETOF_MEMBER(Thread, pDmul);
1033 break;
1034 case OP_REM_DOUBLE_2ADDR:
1035 case OP_REM_DOUBLE:
1036 funcOffset = OFFSETOF_MEMBER(Thread, pFmod);
1037 break;
1038 case OP_NEG_DOUBLE: {
1039 genNegDouble(cUnit, rlDest, rlSrc1);
1040 return false;
1041 }
1042 default:
1043 return true;
1044 }
1045 oatFlushAllRegs(cUnit); /* Send everything to home location */
1046 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1047 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1048 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1049 opReg(cUnit, kOpBlx, rLR);
1050 oatClobberCallRegs(cUnit);
1051 rlResult = oatGetReturnWide(cUnit);
1052 storeValueWide(cUnit, rlDest, rlResult);
1053 return false;
1054}
1055
1056static bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
1057{
1058 Opcode opcode = mir->dalvikInsn.opcode;
1059
1060 switch (opcode) {
1061 case OP_INT_TO_FLOAT:
1062 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2f),
1063 1, 1);
1064 case OP_FLOAT_TO_INT:
1065 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2iz),
1066 1, 1);
1067 case OP_DOUBLE_TO_FLOAT:
1068 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2f),
1069 2, 1);
1070 case OP_FLOAT_TO_DOUBLE:
1071 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2d),
1072 1, 2);
1073 case OP_INT_TO_DOUBLE:
1074 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2d),
1075 1, 2);
1076 case OP_DOUBLE_TO_INT:
1077 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2iz),
1078 2, 1);
1079 case OP_FLOAT_TO_LONG:
1080 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
1081 pArtF2l), 1, 2);
1082 case OP_LONG_TO_FLOAT:
1083 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2f),
1084 2, 1);
1085 case OP_DOUBLE_TO_LONG:
1086 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
1087 pArtD2l), 2, 2);
1088 case OP_LONG_TO_DOUBLE:
1089 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2d),
1090 2, 2);
1091 default:
1092 return true;
1093 }
1094 return false;
1095}
1096
1097/* Generate conditional branch instructions */
1098static ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
1099 ArmConditionCode cond,
1100 ArmLIR* target)
1101{
1102 ArmLIR* branch = opCondBranch(cUnit, cond);
1103 branch->generic.target = (LIR*) target;
1104 return branch;
1105}
1106
1107/* Generate a unconditional branch to go to the interpreter */
1108static inline ArmLIR* genTrap(CompilationUnit* cUnit, int dOffset,
1109 ArmLIR* pcrLabel)
1110{
1111 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1112 return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
1113}
1114
1115/*
1116 * Generate array store
1117 *
1118 */
1119static void genArrayObjectPut(CompilationUnit* cUnit, MIR* mir,
1120 RegLocation rlArray, RegLocation rlIndex,
1121 RegLocation rlSrc, int scale)
1122{
1123 RegisterClass regClass = oatRegClassBySize(kWord);
1124 int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
1125 int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
1126
1127 /* Make sure it's a legal object Put. Use direct regs at first */
1128 loadValueDirectFixed(cUnit, rlArray, r1);
1129 loadValueDirectFixed(cUnit, rlSrc, r0);
1130
1131 /* null array object? */
1132 ArmLIR* pcrLabel = NULL;
1133
1134 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1135 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, r1,
1136 mir->offset, NULL);
1137 }
1138 loadWordDisp(cUnit, rSELF,
1139 OFFSETOF_MEMBER(Thread, pArtCanPutArrayElementNoThrow), rLR);
1140 /* Get the array's clazz */
1141 loadWordDisp(cUnit, r1, offsetof(Object, clazz), r1);
1142 /* Get the object's clazz */
1143 loadWordDisp(cUnit, r0, offsetof(Object, clazz), r0);
1144 opReg(cUnit, kOpBlx, rLR);
1145 oatClobberCallRegs(cUnit);
1146
1147 // Now, redo loadValues in case they didn't survive the call
1148
1149 int regPtr;
1150 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1151 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1152
1153 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1154 oatClobber(cUnit, rlArray.lowReg);
1155 regPtr = rlArray.lowReg;
1156 } else {
1157 regPtr = oatAllocTemp(cUnit);
1158 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1159 }
1160
1161 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1162 int regLen = oatAllocTemp(cUnit);
1163 //NOTE: max live temps(4) here.
1164 /* Get len */
1165 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1166 /* regPtr -> array data */
1167 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1168 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1169 pcrLabel);
1170 oatFreeTemp(cUnit, regLen);
1171 } else {
1172 /* regPtr -> array data */
1173 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1174 }
1175 /* at this point, regPtr points to array, 2 live temps */
1176 rlSrc = loadValue(cUnit, rlSrc, regClass);
1177 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1178 scale, kWord);
1179}
1180
1181/*
1182 * Generate array load
1183 */
1184static void genArrayGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
1185 RegLocation rlArray, RegLocation rlIndex,
1186 RegLocation rlDest, int scale)
1187{
1188 RegisterClass regClass = oatRegClassBySize(size);
1189 int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
1190 int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
1191 RegLocation rlResult;
1192 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1193 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1194 int regPtr;
1195
1196 /* null object? */
1197 ArmLIR* pcrLabel = NULL;
1198
1199 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1200 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
1201 rlArray.lowReg, mir->offset, NULL);
1202 }
1203
1204 regPtr = oatAllocTemp(cUnit);
1205
1206 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1207 int regLen = oatAllocTemp(cUnit);
1208 /* Get len */
1209 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1210 /* regPtr -> array data */
1211 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1212 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1213 pcrLabel);
1214 oatFreeTemp(cUnit, regLen);
1215 } else {
1216 /* regPtr -> array data */
1217 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1218 }
1219 if ((size == kLong) || (size == kDouble)) {
1220 if (scale) {
1221 int rNewIndex = oatAllocTemp(cUnit);
1222 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1223 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1224 oatFreeTemp(cUnit, rNewIndex);
1225 } else {
1226 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1227 }
1228 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1229
1230 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
1231
1232 oatFreeTemp(cUnit, regPtr);
1233 storeValueWide(cUnit, rlDest, rlResult);
1234 } else {
1235 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1236
1237 loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
1238 scale, size);
1239
1240 oatFreeTemp(cUnit, regPtr);
1241 storeValue(cUnit, rlDest, rlResult);
1242 }
1243}
1244
1245/*
1246 * Generate array store
1247 *
1248 */
1249static void genArrayPut(CompilationUnit* cUnit, MIR* mir, OpSize size,
1250 RegLocation rlArray, RegLocation rlIndex,
1251 RegLocation rlSrc, int scale)
1252{
1253 RegisterClass regClass = oatRegClassBySize(size);
1254 int lenOffset = OFFSETOF_MEMBER(ArrayObject, length);
1255 int dataOffset = OFFSETOF_MEMBER(ArrayObject, contents);
1256
1257 int regPtr;
1258 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1259 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1260
1261 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1262 oatClobber(cUnit, rlArray.lowReg);
1263 regPtr = rlArray.lowReg;
1264 } else {
1265 regPtr = oatAllocTemp(cUnit);
1266 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1267 }
1268
1269 /* null object? */
1270 ArmLIR* pcrLabel = NULL;
1271
1272 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1273 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
1274 mir->offset, NULL);
1275 }
1276
1277 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1278 int regLen = oatAllocTemp(cUnit);
1279 //NOTE: max live temps(4) here.
1280 /* Get len */
1281 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1282 /* regPtr -> array data */
1283 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1284 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1285 pcrLabel);
1286 oatFreeTemp(cUnit, regLen);
1287 } else {
1288 /* regPtr -> array data */
1289 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1290 }
1291 /* at this point, regPtr points to array, 2 live temps */
1292 if ((size == kLong) || (size == kDouble)) {
1293 //TODO: need specific wide routine that can handle fp regs
1294 if (scale) {
1295 int rNewIndex = oatAllocTemp(cUnit);
1296 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1297 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1298 oatFreeTemp(cUnit, rNewIndex);
1299 } else {
1300 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1301 }
1302 rlSrc = loadValueWide(cUnit, rlSrc, regClass);
1303
1304 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
1305
1306 oatFreeTemp(cUnit, regPtr);
1307 } else {
1308 rlSrc = loadValue(cUnit, rlSrc, regClass);
1309
1310 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1311 scale, size);
1312 }
1313}
1314
1315static bool genShiftOpLong(CompilationUnit* cUnit, MIR* mir,
1316 RegLocation rlDest, RegLocation rlSrc1,
1317 RegLocation rlShift)
1318{
1319 /*
1320 * Don't mess with the regsiters here as there is a particular calling
1321 * convention to the out-of-line handler.
1322 */
1323 RegLocation rlResult;
1324
1325 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1326 loadValueDirect(cUnit, rlShift, r2);
1327 switch( mir->dalvikInsn.opcode) {
1328 case OP_SHL_LONG:
1329 case OP_SHL_LONG_2ADDR:
1330 //genDispatchToHandler(cUnit, TEMPLATE_SHL_LONG);
1331 assert(0); // unimp
1332 break;
1333 case OP_SHR_LONG:
1334 case OP_SHR_LONG_2ADDR:
1335 //genDispatchToHandler(cUnit, TEMPLATE_SHR_LONG);
1336 assert(0); // unimp
1337 break;
1338 case OP_USHR_LONG:
1339 case OP_USHR_LONG_2ADDR:
1340 //genDispatchToHandler(cUnit, TEMPLATE_USHR_LONG);
1341 assert(0); // unimp
1342 break;
1343 default:
1344 return true;
1345 }
1346 rlResult = oatGetReturnWide(cUnit);
1347 storeValueWide(cUnit, rlDest, rlResult);
1348 return false;
1349}
1350
1351static bool genArithOpLong(CompilationUnit* cUnit, MIR* mir,
1352 RegLocation rlDest, RegLocation rlSrc1,
1353 RegLocation rlSrc2)
1354{
1355 RegLocation rlResult;
1356 OpKind firstOp = kOpBkpt;
1357 OpKind secondOp = kOpBkpt;
1358 bool callOut = false;
1359 int funcOffset;
1360 int retReg = r0;
1361
1362 switch (mir->dalvikInsn.opcode) {
1363 case OP_NOT_LONG:
1364 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1365 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1366 opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg);
1367 opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg);
1368 storeValueWide(cUnit, rlDest, rlResult);
1369 return false;
1370 break;
1371 case OP_ADD_LONG:
1372 case OP_ADD_LONG_2ADDR:
1373 firstOp = kOpAdd;
1374 secondOp = kOpAdc;
1375 break;
1376 case OP_SUB_LONG:
1377 case OP_SUB_LONG_2ADDR:
1378 firstOp = kOpSub;
1379 secondOp = kOpSbc;
1380 break;
1381 case OP_MUL_LONG:
1382 case OP_MUL_LONG_2ADDR:
1383 genMulLong(cUnit, rlDest, rlSrc1, rlSrc2);
1384 return false;
1385 case OP_DIV_LONG:
1386 case OP_DIV_LONG_2ADDR:
1387 callOut = true;
1388 retReg = r0;
1389 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1390 break;
1391 /* NOTE - result is in r2/r3 instead of r0/r1 */
1392 case OP_REM_LONG:
1393 case OP_REM_LONG_2ADDR:
1394 callOut = true;
1395 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1396 retReg = r2;
1397 break;
1398 case OP_AND_LONG_2ADDR:
1399 case OP_AND_LONG:
1400 firstOp = kOpAnd;
1401 secondOp = kOpAnd;
1402 break;
1403 case OP_OR_LONG:
1404 case OP_OR_LONG_2ADDR:
1405 firstOp = kOpOr;
1406 secondOp = kOpOr;
1407 break;
1408 case OP_XOR_LONG:
1409 case OP_XOR_LONG_2ADDR:
1410 firstOp = kOpXor;
1411 secondOp = kOpXor;
1412 break;
1413 case OP_NEG_LONG: {
1414 //TUNING: can improve this using Thumb2 code
1415 int tReg = oatAllocTemp(cUnit);
1416 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1417 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1418 loadConstantNoClobber(cUnit, tReg, 0);
1419 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1420 tReg, rlSrc2.lowReg);
1421 opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
1422 genRegCopy(cUnit, rlResult.highReg, tReg);
1423 storeValueWide(cUnit, rlDest, rlResult);
1424 return false;
1425 }
1426 default:
1427 LOG(FATAL) << "Invalid long arith op";
1428 }
1429 if (!callOut) {
1430 genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2);
1431 } else {
1432 // Adjust return regs in to handle case of rem returning r2/r3
1433 oatFlushAllRegs(cUnit); /* Send everything to home location */
1434 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1435 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1436 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1437 opReg(cUnit, kOpBlx, rLR);
1438 oatClobberCallRegs(cUnit);
1439 if (retReg == r0)
1440 rlResult = oatGetReturnWide(cUnit);
1441 else
1442 rlResult = oatGetReturnWideAlt(cUnit);
1443 storeValueWide(cUnit, rlDest, rlResult);
1444 }
1445 return false;
1446}
1447
1448static bool genArithOpInt(CompilationUnit* cUnit, MIR* mir,
1449 RegLocation rlDest, RegLocation rlSrc1,
1450 RegLocation rlSrc2)
1451{
1452 OpKind op = kOpBkpt;
1453 bool callOut = false;
1454 bool checkZero = false;
1455 bool unary = false;
1456 int retReg = r0;
1457 int funcOffset;
1458 RegLocation rlResult;
1459 bool shiftOp = false;
1460
1461 switch (mir->dalvikInsn.opcode) {
1462 case OP_NEG_INT:
1463 op = kOpNeg;
1464 unary = true;
1465 break;
1466 case OP_NOT_INT:
1467 op = kOpMvn;
1468 unary = true;
1469 break;
1470 case OP_ADD_INT:
1471 case OP_ADD_INT_2ADDR:
1472 op = kOpAdd;
1473 break;
1474 case OP_SUB_INT:
1475 case OP_SUB_INT_2ADDR:
1476 op = kOpSub;
1477 break;
1478 case OP_MUL_INT:
1479 case OP_MUL_INT_2ADDR:
1480 op = kOpMul;
1481 break;
1482 case OP_DIV_INT:
1483 case OP_DIV_INT_2ADDR:
1484 callOut = true;
1485 checkZero = true;
1486 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1487 retReg = r0;
1488 break;
1489 /* NOTE: returns in r1 */
1490 case OP_REM_INT:
1491 case OP_REM_INT_2ADDR:
1492 callOut = true;
1493 checkZero = true;
1494 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1495 retReg = r1;
1496 break;
1497 case OP_AND_INT:
1498 case OP_AND_INT_2ADDR:
1499 op = kOpAnd;
1500 break;
1501 case OP_OR_INT:
1502 case OP_OR_INT_2ADDR:
1503 op = kOpOr;
1504 break;
1505 case OP_XOR_INT:
1506 case OP_XOR_INT_2ADDR:
1507 op = kOpXor;
1508 break;
1509 case OP_SHL_INT:
1510 case OP_SHL_INT_2ADDR:
1511 shiftOp = true;
1512 op = kOpLsl;
1513 break;
1514 case OP_SHR_INT:
1515 case OP_SHR_INT_2ADDR:
1516 shiftOp = true;
1517 op = kOpAsr;
1518 break;
1519 case OP_USHR_INT:
1520 case OP_USHR_INT_2ADDR:
1521 shiftOp = true;
1522 op = kOpLsr;
1523 break;
1524 default:
1525 LOG(FATAL) << "Invalid word arith op: " <<
1526 (int)mir->dalvikInsn.opcode;
1527 }
1528 if (!callOut) {
1529 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
1530 if (unary) {
1531 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1532 opRegReg(cUnit, op, rlResult.lowReg,
1533 rlSrc1.lowReg);
1534 } else {
1535 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
1536 if (shiftOp) {
1537 int tReg = oatAllocTemp(cUnit);
1538 opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31);
1539 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1540 opRegRegReg(cUnit, op, rlResult.lowReg,
1541 rlSrc1.lowReg, tReg);
1542 oatFreeTemp(cUnit, tReg);
1543 } else {
1544 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1545 opRegRegReg(cUnit, op, rlResult.lowReg,
1546 rlSrc1.lowReg, rlSrc2.lowReg);
1547 }
1548 }
1549 storeValue(cUnit, rlDest, rlResult);
1550 } else {
1551 RegLocation rlResult;
1552 oatFlushAllRegs(cUnit); /* Send everything to home location */
1553 loadValueDirectFixed(cUnit, rlSrc2, r1);
1554 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1555 loadValueDirectFixed(cUnit, rlSrc1, r0);
1556 if (checkZero) {
1557 genNullCheck(cUnit, rlSrc2.sRegLow, r1, mir->offset, NULL);
1558 }
1559 opReg(cUnit, kOpBlx, rLR);
1560 oatClobberCallRegs(cUnit);
1561 if (retReg == r0)
1562 rlResult = oatGetReturn(cUnit);
1563 else
1564 rlResult = oatGetReturnAlt(cUnit);
1565 storeValue(cUnit, rlDest, rlResult);
1566 }
1567 return false;
1568}
1569
1570/* Generate unconditional branch instructions */
1571static ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
1572{
1573 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1574 branch->generic.target = (LIR*) target;
1575 return branch;
1576}
1577
1578/*
1579 * Fetch *self->info.breakFlags. If the breakFlags are non-zero,
1580 * punt to the interpreter.
1581 */
1582static void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
1583{
1584 UNIMPLEMENTED(WARNING);
1585#if 0
1586 int rTemp = oatAllocTemp(cUnit);
1587 ArmLIR* ld;
1588 ld = loadBaseDisp(cUnit, NULL, rSELF,
1589 offsetof(Thread, interpBreak.ctl.breakFlags),
1590 rTemp, kUnsignedByte, INVALID_SREG);
1591 setMemRefType(ld, true /* isLoad */, kMustNotAlias);
1592 genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
1593#endif
1594}
1595
1596/*
1597 * The following are the first-level codegen routines that analyze the format
1598 * of each bytecode then either dispatch special purpose codegen routines
1599 * or produce corresponding Thumb instructions directly.
1600 */
1601
1602static bool isPowerOfTwo(int x)
1603{
1604 return (x & (x - 1)) == 0;
1605}
1606
1607// Returns true if no more than two bits are set in 'x'.
1608static bool isPopCountLE2(unsigned int x)
1609{
1610 x &= x - 1;
1611 return (x & (x - 1)) == 0;
1612}
1613
1614// Returns the index of the lowest set bit in 'x'.
1615static int lowestSetBit(unsigned int x) {
1616 int bit_posn = 0;
1617 while ((x & 0xf) == 0) {
1618 bit_posn += 4;
1619 x >>= 4;
1620 }
1621 while ((x & 1) == 0) {
1622 bit_posn++;
1623 x >>= 1;
1624 }
1625 return bit_posn;
1626}
1627
1628// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit'
1629// and store the result in 'rlDest'.
1630static bool handleEasyDivide(CompilationUnit* cUnit, Opcode dalvikOpcode,
1631 RegLocation rlSrc, RegLocation rlDest, int lit)
1632{
1633 if (lit < 2 || !isPowerOfTwo(lit)) {
1634 return false;
1635 }
1636 int k = lowestSetBit(lit);
1637 if (k >= 30) {
1638 // Avoid special cases.
1639 return false;
1640 }
1641 bool div = (dalvikOpcode == OP_DIV_INT_LIT8 ||
1642 dalvikOpcode == OP_DIV_INT_LIT16);
1643 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1644 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1645 if (div) {
1646 int tReg = oatAllocTemp(cUnit);
1647 if (lit == 2) {
1648 // Division by 2 is by far the most common division by constant.
1649 opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
1650 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1651 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1652 } else {
1653 opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
1654 opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
1655 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1656 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1657 }
1658 } else {
1659 int cReg = oatAllocTemp(cUnit);
1660 loadConstant(cUnit, cReg, lit - 1);
1661 int tReg1 = oatAllocTemp(cUnit);
1662 int tReg2 = oatAllocTemp(cUnit);
1663 if (lit == 2) {
1664 opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
1665 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1666 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1667 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1668 } else {
1669 opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
1670 opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
1671 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1672 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1673 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1674 }
1675 }
1676 storeValue(cUnit, rlDest, rlResult);
1677 return true;
1678}
1679
1680// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
1681// and store the result in 'rlDest'.
1682static bool handleEasyMultiply(CompilationUnit* cUnit,
1683 RegLocation rlSrc, RegLocation rlDest, int lit)
1684{
1685 // Can we simplify this multiplication?
1686 bool powerOfTwo = false;
1687 bool popCountLE2 = false;
1688 bool powerOfTwoMinusOne = false;
1689 if (lit < 2) {
1690 // Avoid special cases.
1691 return false;
1692 } else if (isPowerOfTwo(lit)) {
1693 powerOfTwo = true;
1694 } else if (isPopCountLE2(lit)) {
1695 popCountLE2 = true;
1696 } else if (isPowerOfTwo(lit + 1)) {
1697 powerOfTwoMinusOne = true;
1698 } else {
1699 return false;
1700 }
1701 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1702 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1703 if (powerOfTwo) {
1704 // Shift.
1705 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg,
1706 lowestSetBit(lit));
1707 } else if (popCountLE2) {
1708 // Shift and add and shift.
1709 int firstBit = lowestSetBit(lit);
1710 int secondBit = lowestSetBit(lit ^ (1 << firstBit));
1711 genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit,
1712 firstBit, secondBit);
1713 } else {
1714 // Reverse subtract: (src << (shift + 1)) - src.
1715 assert(powerOfTwoMinusOne);
1716 // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
1717 int tReg = oatAllocTemp(cUnit);
1718 opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
1719 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
1720 }
1721 storeValue(cUnit, rlDest, rlResult);
1722 return true;
1723}
1724
1725static bool genArithOpIntLit(CompilationUnit* cUnit, MIR* mir,
1726 RegLocation rlDest, RegLocation rlSrc,
1727 int lit)
1728{
1729 Opcode dalvikOpcode = mir->dalvikInsn.opcode;
1730 RegLocation rlResult;
1731 OpKind op = (OpKind)0; /* Make gcc happy */
1732 int shiftOp = false;
1733 bool isDiv = false;
1734 int funcOffset;
1735
1736 switch (dalvikOpcode) {
1737 case OP_RSUB_INT_LIT8:
1738 case OP_RSUB_INT: {
1739 int tReg;
1740 //TUNING: add support for use of Arm rsub op
1741 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1742 tReg = oatAllocTemp(cUnit);
1743 loadConstant(cUnit, tReg, lit);
1744 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1745 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1746 tReg, rlSrc.lowReg);
1747 storeValue(cUnit, rlDest, rlResult);
1748 return false;
1749 break;
1750 }
1751
1752 case OP_ADD_INT_LIT8:
1753 case OP_ADD_INT_LIT16:
1754 op = kOpAdd;
1755 break;
1756 case OP_MUL_INT_LIT8:
1757 case OP_MUL_INT_LIT16: {
1758 if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) {
1759 return false;
1760 }
1761 op = kOpMul;
1762 break;
1763 }
1764 case OP_AND_INT_LIT8:
1765 case OP_AND_INT_LIT16:
1766 op = kOpAnd;
1767 break;
1768 case OP_OR_INT_LIT8:
1769 case OP_OR_INT_LIT16:
1770 op = kOpOr;
1771 break;
1772 case OP_XOR_INT_LIT8:
1773 case OP_XOR_INT_LIT16:
1774 op = kOpXor;
1775 break;
1776 case OP_SHL_INT_LIT8:
1777 lit &= 31;
1778 shiftOp = true;
1779 op = kOpLsl;
1780 break;
1781 case OP_SHR_INT_LIT8:
1782 lit &= 31;
1783 shiftOp = true;
1784 op = kOpAsr;
1785 break;
1786 case OP_USHR_INT_LIT8:
1787 lit &= 31;
1788 shiftOp = true;
1789 op = kOpLsr;
1790 break;
1791
1792 case OP_DIV_INT_LIT8:
1793 case OP_DIV_INT_LIT16:
1794 case OP_REM_INT_LIT8:
1795 case OP_REM_INT_LIT16:
1796 if (lit == 0) {
1797 UNIMPLEMENTED(FATAL);
1798 // FIXME: generate an explicit throw here
1799 return false;
1800 }
1801 if (handleEasyDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit)) {
1802 return false;
1803 }
1804 oatFlushAllRegs(cUnit); /* Everything to home location */
1805 loadValueDirectFixed(cUnit, rlSrc, r0);
1806 oatClobber(cUnit, r0);
1807 if ((dalvikOpcode == OP_DIV_INT_LIT8) ||
1808 (dalvikOpcode == OP_DIV_INT_LIT16)) {
1809 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1810 isDiv = true;
1811 } else {
1812 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1813 isDiv = false;
1814 }
1815 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1816 loadConstant(cUnit, r1, lit);
1817 opReg(cUnit, kOpBlx, rLR);
1818 oatClobberCallRegs(cUnit);
1819 if (isDiv)
1820 rlResult = oatGetReturn(cUnit);
1821 else
1822 rlResult = oatGetReturnAlt(cUnit);
1823 storeValue(cUnit, rlDest, rlResult);
1824 return false;
1825 break;
1826 default:
1827 return true;
1828 }
1829 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1830 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1831 // Avoid shifts by literal 0 - no support in Thumb. Change to copy
1832 if (shiftOp && (lit == 0)) {
1833 genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
1834 } else {
1835 opRegRegImm(cUnit, op, rlResult.lowReg, rlSrc.lowReg, lit);
1836 }
1837 storeValue(cUnit, rlDest, rlResult);
1838 return false;
1839}
1840
1841/* Architectural-specific debugging helpers go here */
1842void oatArchDump(void)
1843{
1844 /* Print compiled opcode in this VM instance */
1845 int i, start, streak;
1846 char buf[1024];
1847
1848 streak = i = 0;
1849 buf[0] = 0;
1850 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1851 i++;
1852 }
1853 if (i == kNumPackedOpcodes) {
1854 return;
1855 }
1856 for (start = i++, streak = 1; i < kNumPackedOpcodes; i++) {
1857 if (opcodeCoverage[i]) {
1858 streak++;
1859 } else {
1860 if (streak == 1) {
1861 sprintf(buf+strlen(buf), "%x,", start);
1862 } else {
1863 sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
1864 }
1865 streak = 0;
1866 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1867 i++;
1868 }
1869 if (i < kNumPackedOpcodes) {
1870 streak = 1;
1871 start = i;
1872 }
1873 }
1874 }
1875 if (streak) {
1876 if (streak == 1) {
1877 sprintf(buf+strlen(buf), "%x", start);
1878 } else {
1879 sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
1880 }
1881 }
1882 if (strlen(buf)) {
1883 LOG(INFO) << "dalvik.vm.oat.op = " << buf;
1884 }
1885}