blob: 934a3033368d4cb389e757effcfc02673d6ff9e4 [file] [log] [blame]
buzbee67bf8852011-08-17 17:51:35 -07001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains codegen for the Thumb2 ISA and is intended to be
19 * includes by:
20 *
21 * Codegen-$(TARGET_ARCH_VARIANT).c
22 *
23 */
24
25/*
26 * Construct an s4 from two consecutive half-words of switch data.
27 * This needs to check endianness because the DEX optimizer only swaps
28 * half-words in instruction stream.
29 *
30 * "switchData" must be 32-bit aligned.
31 */
32#if __BYTE_ORDER == __LITTLE_ENDIAN
33static inline s4 s4FromSwitchData(const void* switchData) {
34 return *(s4*) switchData;
35}
36#else
37static inline s4 s4FromSwitchData(const void* switchData) {
38 u2* data = switchData;
39 return data[0] | (((s4) data[1]) << 16);
40}
41#endif
42
43/*
44 * Generate a Thumb2 IT instruction, which can nullify up to
45 * four subsequent instructions based on a condition and its
46 * inverse. The condition applies to the first instruction, which
47 * is executed if the condition is met. The string "guide" consists
48 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
49 * A "T" means the instruction is executed if the condition is
50 * met, and an "E" means the instruction is executed if the condition
51 * is not met.
52 */
53static ArmLIR* genIT(CompilationUnit* cUnit, ArmConditionCode code,
54 const char* guide)
55{
56 int mask;
57 int condBit = code & 1;
58 int altBit = condBit ^ 1;
59 int mask3 = 0;
60 int mask2 = 0;
61 int mask1 = 0;
62
63 //Note: case fallthroughs intentional
64 switch(strlen(guide)) {
65 case 3:
66 mask1 = (guide[2] == 'T') ? condBit : altBit;
67 case 2:
68 mask2 = (guide[1] == 'T') ? condBit : altBit;
69 case 1:
70 mask3 = (guide[0] == 'T') ? condBit : altBit;
71 break;
72 case 0:
73 break;
74 default:
75 LOG(FATAL) << "OAT: bad case in genIT";
76 }
77 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
78 (1 << (3 - strlen(guide)));
79 return newLIR2(cUnit, kThumb2It, code, mask);
80}
81
82/*
83 * Insert a kArmPseudoCaseLabel at the beginning of the Dalvik
84 * offset vaddr. This label will be used to fix up the case
85 * branch table during the assembly phase. Be sure to set
86 * all resource flags on this to prevent code motion across
87 * target boundaries. KeyVal is just there for debugging.
88 */
89static ArmLIR* insertCaseLabel(CompilationUnit* cUnit, int vaddr, int keyVal)
90{
91 ArmLIR* lir;
92 for (lir = (ArmLIR*)cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
93 if ((lir->opcode == kArmPseudoDalvikByteCodeBoundary) &&
94 (lir->generic.dalvikOffset == vaddr)) {
95 ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
96 newLabel->generic.dalvikOffset = vaddr;
97 newLabel->opcode = kArmPseudoCaseLabel;
98 newLabel->operands[0] = keyVal;
99 oatInsertLIRAfter((LIR*)lir, (LIR*)newLabel);
100 return newLabel;
101 }
102 }
103 oatCodegenDump(cUnit);
104 LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
105 return NULL; // Quiet gcc
106}
107
108static void markPackedCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
109{
110 const u2* table = tabRec->table;
111 int baseVaddr = tabRec->vaddr;
112 int *targets = (int*)&table[4];
113 int entries = table[1];
114 int lowKey = s4FromSwitchData(&table[2]);
115 for (int i = 0; i < entries; i++) {
116 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
117 i + lowKey);
118 }
119}
120
121static void markSparseCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
122{
123 const u2* table = tabRec->table;
124 int baseVaddr = tabRec->vaddr;
125 int entries = table[1];
126 int* keys = (int*)&table[2];
127 int* targets = &keys[entries];
128 for (int i = 0; i < entries; i++) {
129 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
130 keys[i]);
131 }
132}
133
134void oatProcessSwitchTables(CompilationUnit* cUnit)
135{
136 GrowableListIterator iterator;
137 oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
138 while (true) {
139 SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
140 &iterator);
141 if (tabRec == NULL) break;
142 if (tabRec->table[0] == kPackedSwitchSignature)
143 markPackedCaseLabels(cUnit, tabRec);
144 else if (tabRec->table[0] == kSparseSwitchSignature)
145 markSparseCaseLabels(cUnit, tabRec);
146 else {
147 LOG(FATAL) << "Invalid switch table";
148 }
149 }
150}
151
152static void dumpSparseSwitchTable(const u2* table)
153 /*
154 * Sparse switch data format:
155 * ushort ident = 0x0200 magic value
156 * ushort size number of entries in the table; > 0
157 * int keys[size] keys, sorted low-to-high; 32-bit aligned
158 * int targets[size] branch targets, relative to switch opcode
159 *
160 * Total size is (2+size*4) 16-bit code units.
161 */
162{
163 u2 ident = table[0];
164 int entries = table[1];
165 int* keys = (int*)&table[2];
166 int* targets = &keys[entries];
167 LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident <<
168 ", entries: " << std::dec << entries;
169 for (int i = 0; i < entries; i++) {
170 LOG(INFO) << " Key[" << keys[i] << "] -> 0x" << std::hex <<
171 targets[i];
172 }
173}
174
175static void dumpPackedSwitchTable(const u2* table)
176 /*
177 * Packed switch data format:
178 * ushort ident = 0x0100 magic value
179 * ushort size number of entries in the table
180 * int first_key first (and lowest) switch case value
181 * int targets[size] branch targets, relative to switch opcode
182 *
183 * Total size is (4+size*2) 16-bit code units.
184 */
185{
186 u2 ident = table[0];
187 int* targets = (int*)&table[4];
188 int entries = table[1];
189 int lowKey = s4FromSwitchData(&table[2]);
190 LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident <<
191 ", entries: " << std::dec << entries << ", lowKey: " << lowKey;
192 for (int i = 0; i < entries; i++) {
193 LOG(INFO) << " Key[" << (i + lowKey) << "] -> 0x" << std::hex <<
194 targets[i];
195 }
196}
197
198/*
199 * The sparse table in the literal pool is an array of <key,displacement>
200 * pairs. For each set, we'll load them as a pair using ldmia.
201 * This means that the register number of the temp we use for the key
202 * must be lower than the reg for the displacement.
203 *
204 * The test loop will look something like:
205 *
206 * adr rBase, <table>
207 * ldr rVal, [rSP, vRegOff]
208 * mov rIdx, #tableSize
209 * lp:
210 * ldmia rBase!, {rKey, rDisp}
211 * sub rIdx, #1
212 * cmp rVal, rKey
213 * ifeq
214 * add rPC, rDisp ; This is the branch from which we compute displacement
215 * cbnz rIdx, lp
216 */
217static void genSparseSwitch(CompilationUnit* cUnit, MIR* mir,
218 RegLocation rlSrc)
219{
220 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
221 if (cUnit->printMe) {
222 dumpSparseSwitchTable(table);
223 }
224 // Add the table to the list - we'll process it later
225 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
226 true);
227 tabRec->table = table;
228 tabRec->vaddr = mir->offset;
229 int size = table[1];
230 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
231 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
232
233 // Get the switch value
234 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
235 int rBase = oatAllocTemp(cUnit);
236 /* Allocate key and disp temps */
237 int rKey = oatAllocTemp(cUnit);
238 int rDisp = oatAllocTemp(cUnit);
239 // Make sure rKey's register number is less than rDisp's number for ldmia
240 if (rKey > rDisp) {
241 int tmp = rDisp;
242 rDisp = rKey;
243 rKey = tmp;
244 }
245 // Materialize a pointer to the switch table
246 newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec);
247 // Set up rIdx
248 int rIdx = oatAllocTemp(cUnit);
249 loadConstant(cUnit, rIdx, size);
250 // Establish loop branch target
251 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
252 target->defMask = ENCODE_ALL;
253 // Load next key/disp
254 newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
255 opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
256 // Go if match. NOTE: No instruction set switch here - must stay Thumb2
257 genIT(cUnit, kArmCondEq, "");
258 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
259 tabRec->bxInst = switchBranch;
260 // Needs to use setflags encoding here
261 newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
262 ArmLIR* branch = opCondBranch(cUnit, kArmCondNe);
263 branch->generic.target = (LIR*)target;
264}
265
266
267static void genPackedSwitch(CompilationUnit* cUnit, MIR* mir,
268 RegLocation rlSrc)
269{
270 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
271 if (cUnit->printMe) {
272 dumpPackedSwitchTable(table);
273 }
274 // Add the table to the list - we'll process it later
275 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
276 true);
277 tabRec->table = table;
278 tabRec->vaddr = mir->offset;
279 int size = table[1];
280 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
281 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
282
283 // Get the switch value
284 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
285 int tableBase = oatAllocTemp(cUnit);
286 // Materialize a pointer to the switch table
287 newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec);
288 int lowKey = s4FromSwitchData(&table[2]);
289 int keyReg;
290 // Remove the bias, if necessary
291 if (lowKey == 0) {
292 keyReg = rlSrc.lowReg;
293 } else {
294 keyReg = oatAllocTemp(cUnit);
295 opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
296 }
297 // Bounds check - if < 0 or >= size continue following switch
298 opRegImm(cUnit, kOpCmp, keyReg, size-1);
299 ArmLIR* branchOver = opCondBranch(cUnit, kArmCondHi);
300
301 // Load the displacement from the switch table
302 int dispReg = oatAllocTemp(cUnit);
303 loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
304
305 // ..and go! NOTE: No instruction set switch here - must stay Thumb2
306 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
307 tabRec->bxInst = switchBranch;
308
309 /* branchOver target here */
310 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
311 target->defMask = ENCODE_ALL;
312 branchOver->generic.target = (LIR*)target;
313}
314
315/*
316 * Array data table format:
317 * ushort ident = 0x0300 magic value
318 * ushort width width of each element in the table
319 * uint size number of elements in the table
320 * ubyte data[size*width] table of data values (may contain a single-byte
321 * padding at the end)
322 *
323 * Total size is 4+(width * size + 1)/2 16-bit code units.
324 */
325static void genFillArrayData(CompilationUnit* cUnit, MIR* mir,
326 RegLocation rlSrc)
327{
328 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
329 // Add the table to the list - we'll process it later
330 FillArrayData *tabRec = (FillArrayData *)
331 oatNew(sizeof(FillArrayData), true);
332 tabRec->table = table;
333 tabRec->vaddr = mir->offset;
334 u2 width = tabRec->table[1];
335 u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16);
336 tabRec->size = (size * width) + 8;
337
338 oatInsertGrowableList(&cUnit->fillArrayData, (intptr_t)tabRec);
339
340 // Making a call - use explicit registers
341 oatFlushAllRegs(cUnit); /* Everything to home location */
342 loadValueDirectFixed(cUnit, rlSrc, r0);
343 loadWordDisp(cUnit, rSELF,
344 OFFSETOF_MEMBER(Thread, pArtHandleFillArrayDataNoThrow), rLR);
345 // Materialize a pointer to the switch table
346 newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec);
347 opReg(cUnit, kOpBlx, rLR);
348 oatClobberCallRegs(cUnit);
349}
350
351/*
352 * Mark garbage collection card. Skip if the value we're storing is null.
353 */
354static void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
355{
356 int regCardBase = oatAllocTemp(cUnit);
357 int regCardNo = oatAllocTemp(cUnit);
358 ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
buzbeec143c552011-08-20 17:38:58 -0700359 loadWordDisp(cUnit, rSELF, Thread::CardTableOffset().Int32Value(),
buzbee67bf8852011-08-17 17:51:35 -0700360 regCardBase);
361 opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
362 storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
363 kUnsignedByte);
364 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
365 target->defMask = ENCODE_ALL;
366 branchOver->generic.target = (LIR*)target;
367 oatFreeTemp(cUnit, regCardBase);
368 oatFreeTemp(cUnit, regCardNo);
369}
370
371static void genIGetX(CompilationUnit* cUnit, MIR* mir, OpSize size,
372 RegLocation rlDest, RegLocation rlObj)
373{
buzbeec143c552011-08-20 17:38:58 -0700374 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
375 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700376 if (fieldPtr == NULL) {
377 /*
378 * With current scheme, we should never be in a situation
379 * in which the fieldPtr is null here. If something changes
380 * and we need to handle it, generate code to load the field
381 * pointer at run-time.
382 */
383 LOG(FATAL) << "Unexpected null field pointer";
384 }
385#if ANDROID_SMP != 0
386 bool isVolatile = dvmIsVolatileField(fieldPtr);
387#else
388 bool isVolatile = false;
389#endif
buzbeec143c552011-08-20 17:38:58 -0700390 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700391 RegLocation rlResult;
392 RegisterClass regClass = oatRegClassBySize(size);
393 rlObj = loadValue(cUnit, rlObj, kCoreReg);
394 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
395 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
396 NULL);/* null object? */
397 loadBaseDisp(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg,
398 size, rlObj.sRegLow);
399 if (isVolatile) {
400 oatGenMemBarrier(cUnit, kSY);
401 }
402
403 storeValue(cUnit, rlDest, rlResult);
404}
405
406static void genIPutX(CompilationUnit* cUnit, MIR* mir, OpSize size,
407 RegLocation rlSrc, RegLocation rlObj, bool isObject)
408{
buzbeec143c552011-08-20 17:38:58 -0700409 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
410 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700411 if (fieldPtr == NULL) {
412 /*
413 * With current scheme, we should never be in a situation
414 * in which the fieldPtr is null here. If something changes
415 * and we need to handle it, generate code to load the field
416 * pointer at run-time.
417 */
418 LOG(FATAL) << "Unexpected null field pointer";
419 }
420#if ANDROID_SMP != 0
421 bool isVolatile = dvmIsVolatileField(fieldPtr);
422#else
423 bool isVolatile = false;
424#endif
buzbeec143c552011-08-20 17:38:58 -0700425 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700426 RegisterClass regClass = oatRegClassBySize(size);
427 rlObj = loadValue(cUnit, rlObj, kCoreReg);
428 rlSrc = loadValue(cUnit, rlSrc, regClass);
429 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
430 NULL);/* null object? */
431
432 if (isVolatile) {
433 oatGenMemBarrier(cUnit, kSY);
434 }
435 storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
436 if (isObject) {
437 /* NOTE: marking card based on object head */
438 markGCCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
439 }
440}
441
442static void genIGetWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
443 RegLocation rlObj)
444{
buzbeec143c552011-08-20 17:38:58 -0700445 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
446 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700447 if (fieldPtr == NULL) {
448 /*
449 * With current scheme, we should never be in a situation
450 * in which the fieldPtr is null here. If something changes
451 * and we need to handle it, generate code to load the field
452 * pointer at run-time.
453 */
454 LOG(FATAL) << "Unexpected null field pointer";
455 }
456#if ANDROID_SMP != 0
457 bool isVolatile = dvmIsVolatileField(fieldPtr);
458#else
459 bool isVolatile = false;
460#endif
buzbeec143c552011-08-20 17:38:58 -0700461 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700462 RegLocation rlResult;
463 rlObj = loadValue(cUnit, rlObj, kCoreReg);
464 int regPtr = oatAllocTemp(cUnit);
465
466 assert(rlDest.wide);
467
468 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
469 NULL);/* null object? */
470 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
471 rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
472
473 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
474
475 if (isVolatile) {
476 oatGenMemBarrier(cUnit, kSY);
477 }
478
479 oatFreeTemp(cUnit, regPtr);
480 storeValueWide(cUnit, rlDest, rlResult);
481}
482
483static void genIPutWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
484 RegLocation rlObj)
485{
buzbeec143c552011-08-20 17:38:58 -0700486 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
487 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700488 if (fieldPtr == NULL) {
489 /*
490 * With current scheme, we should never be in a situation
491 * in which the fieldPtr is null here. If something changes
492 * and we need to handle it, generate code to load the field
493 * pointer at run-time.
494 */
495 LOG(FATAL) << "Unexpected null field pointer";
496 }
497#if ANDROID_SMP != 0
498 bool isVolatile = dvmIsVolatileField(fieldPtr);
499#else
500 bool isVolatile = false;
501#endif
buzbeec143c552011-08-20 17:38:58 -0700502 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700503
504 rlObj = loadValue(cUnit, rlObj, kCoreReg);
505 int regPtr;
506 rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
507 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
508 NULL);/* null object? */
509 regPtr = oatAllocTemp(cUnit);
510 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
511
512 if (isVolatile) {
513 oatGenMemBarrier(cUnit, kSY);
514 }
515 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
516
517 oatFreeTemp(cUnit, regPtr);
518}
519
520static void genConstClass(CompilationUnit* cUnit, MIR* mir,
521 RegLocation rlDest, RegLocation rlSrc)
522{
buzbeec143c552011-08-20 17:38:58 -0700523 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
524 GetResolvedClass(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700525
526 if (classPtr == NULL) {
527 LOG(FATAL) << "Unexpected null class pointer";
528 }
529
buzbeec143c552011-08-20 17:38:58 -0700530 UNIMPLEMENTED(WARNING) << "Not position independent. Fix";
buzbee67bf8852011-08-17 17:51:35 -0700531 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
532 loadConstantNoClobber(cUnit, rlResult.lowReg, (int) classPtr );
533 storeValue(cUnit, rlDest, rlResult);
534}
535
536static void genConstString(CompilationUnit* cUnit, MIR* mir,
537 RegLocation rlDest, RegLocation rlSrc)
538{
buzbeec143c552011-08-20 17:38:58 -0700539 String* strPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
540 GetResolvedString(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700541
542 if (strPtr == NULL) {
543 /* Shouldn't happen */
544 LOG(FATAL) << "Unexpected null const string pointer";
545 }
546
buzbeec143c552011-08-20 17:38:58 -0700547 UNIMPLEMENTED(WARNING) << "Not position indendent. Fix";
buzbee67bf8852011-08-17 17:51:35 -0700548 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
549 loadConstantNoClobber(cUnit, rlResult.lowReg, (int) strPtr );
550 storeValue(cUnit, rlDest, rlResult);
551}
552
553static void genNewInstance(CompilationUnit* cUnit, MIR* mir,
554 RegLocation rlDest)
555{
buzbeec143c552011-08-20 17:38:58 -0700556 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
557 GetResolvedClass(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700558
559 if (classPtr == NULL) {
560 /* Shouldn't happen */
561 LOG(FATAL) << "Unexpected null class pointer";
562 }
563
564 // Verifier should have already rejected abstract/interface
buzbeec143c552011-08-20 17:38:58 -0700565 assert((classPtr->access_flags_ &
566 (art::kAccInterface|art::kAccAbstract)) == 0);
buzbee67bf8852011-08-17 17:51:35 -0700567 oatFlushAllRegs(cUnit); /* Everything to home location */
568 loadWordDisp(cUnit, rSELF,
569 OFFSETOF_MEMBER(Thread, pArtAllocObjectNoThrow), rLR);
570 loadConstant(cUnit, r0, (int) classPtr);
buzbeec143c552011-08-20 17:38:58 -0700571 UNIMPLEMENTED(WARNING) << "Need NewWorld dvmAllocObject";
buzbee67bf8852011-08-17 17:51:35 -0700572 opReg(cUnit, kOpBlx, rLR);
573 oatClobberCallRegs(cUnit);
574 RegLocation rlResult = oatGetReturn(cUnit);
575 storeValue(cUnit, rlDest, rlResult);
576}
577
578void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
579{
580 loadWordDisp(cUnit, rSELF,
581 OFFSETOF_MEMBER(Thread, pArtAllocObjectNoThrow), rLR);
582 loadValueDirectFixed(cUnit, rlSrc, r1); /* Exception object */
583 genRegCopy(cUnit, r0, rSELF);
584 opReg(cUnit, kOpBlx, rLR);
585}
586
587static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
588 RegLocation rlSrc)
589{
590 // May generate a call - use explicit registers
591 RegLocation rlResult;
buzbeec143c552011-08-20 17:38:58 -0700592 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
593 GetResolvedClass(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700594 if (classPtr == NULL) {
595 /* Shouldn't happen */
596 LOG(FATAL) << "Unexpected null class pointer";
597 }
598 oatFlushAllRegs(cUnit); /* Everything to home location */
599 loadValueDirectFixed(cUnit, rlSrc, r0); /* Ref */
600 loadConstant(cUnit, r2, (int) classPtr );
601 /* When taken r0 has NULL which can be used for store directly */
602 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, r0, 0);
603 /* r1 now contains object->clazz */
buzbeec143c552011-08-20 17:38:58 -0700604 assert(OFFSETOF_MEMBER(Object, klass_) == 0);
605 loadWordDisp(cUnit, r0, OFFSETOF_MEMBER(Object, klass_), r1);
buzbee67bf8852011-08-17 17:51:35 -0700606 /* r1 now contains object->clazz */
607 loadWordDisp(cUnit, rSELF,
608 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivial), rLR);
609 loadConstant(cUnit, r0, 1); /* Assume true */
610 opRegReg(cUnit, kOpCmp, r1, r2);
611 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
612 genRegCopy(cUnit, r0, r1);
613 genRegCopy(cUnit, r1, r2);
614 opReg(cUnit, kOpBlx, rLR);
615 oatClobberCallRegs(cUnit);
616 /* branch target here */
617 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
618 target->defMask = ENCODE_ALL;
619 rlResult = oatGetReturn(cUnit);
620 storeValue(cUnit, rlDest, rlResult);
621 branch1->generic.target = (LIR*)target;
622 branch2->generic.target = (LIR*)target;
623}
624
625static void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
626{
buzbeec143c552011-08-20 17:38:58 -0700627 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
628 GetResolvedClass(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700629 if (classPtr == NULL) {
630 /* Shouldn't happen with our current model */
631 LOG(FATAL) << "Unexpected null class pointer";
632 }
633 oatFlushAllRegs(cUnit); /* Everything to home location */
634 loadConstant(cUnit, r1, (int) classPtr );
635 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
636 /* Null? */
637 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq,
638 rlSrc.lowReg, 0);
639 /*
640 * rlSrc.lowReg now contains object->clazz. Note that
641 * it could have been allocated r0, but we're okay so long
642 * as we don't do anything desctructive until r0 is loaded
643 * with clazz.
644 */
645 /* r0 now contains object->clazz */
buzbeec143c552011-08-20 17:38:58 -0700646 loadWordDisp(cUnit, rlSrc.lowReg, OFFSETOF_MEMBER(Object, klass_), r0);
buzbee67bf8852011-08-17 17:51:35 -0700647 loadWordDisp(cUnit, rSELF,
648 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivialNoThrow), rLR);
649 opRegReg(cUnit, kOpCmp, r0, r1);
650 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
651 // Assume success - if not, artInstanceOfNonTrivial will handle throw
652 opReg(cUnit, kOpBlx, rLR);
653 oatClobberCallRegs(cUnit);
654 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
655 target->defMask = ENCODE_ALL;
656 branch1->generic.target = (LIR*)target;
657 branch2->generic.target = (LIR*)target;
658}
659
660static void genNegFloat(CompilationUnit* cUnit, RegLocation rlDest,
661 RegLocation rlSrc)
662{
663 RegLocation rlResult;
664 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
665 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
666 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
667 storeValue(cUnit, rlDest, rlResult);
668}
669
670static void genNegDouble(CompilationUnit* cUnit, RegLocation rlDest,
671 RegLocation rlSrc)
672{
673 RegLocation rlResult;
674 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
675 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
676 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
677 S2D(rlSrc.lowReg, rlSrc.highReg));
678 storeValueWide(cUnit, rlDest, rlResult);
679}
680
681/*
682 * To avoid possible conflicts, we use a lot of temps here. Note that
683 * our usage of Thumb2 instruction forms avoids the problems with register
684 * reuse for multiply instructions prior to arm6.
685 */
686static void genMulLong(CompilationUnit* cUnit, RegLocation rlDest,
687 RegLocation rlSrc1, RegLocation rlSrc2)
688{
689 RegLocation rlResult;
690 int resLo = oatAllocTemp(cUnit);
691 int resHi = oatAllocTemp(cUnit);
692 int tmp1 = oatAllocTemp(cUnit);
693
694 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
695 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
696
697 newLIR3(cUnit, kThumb2MulRRR, tmp1, rlSrc2.lowReg, rlSrc1.highReg);
698 newLIR4(cUnit, kThumb2Umull, resLo, resHi, rlSrc2.lowReg, rlSrc1.lowReg);
699 newLIR4(cUnit, kThumb2Mla, tmp1, rlSrc1.lowReg, rlSrc2.highReg, tmp1);
700 newLIR4(cUnit, kThumb2AddRRR, resHi, tmp1, resHi, 0);
701 oatFreeTemp(cUnit, tmp1);
702
703 rlResult = oatGetReturnWide(cUnit);
704 rlResult.lowReg = resLo;
705 rlResult.highReg = resHi;
706 storeValueWide(cUnit, rlDest, rlResult);
707}
708
709static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
710 OpKind secondOp, RegLocation rlDest,
711 RegLocation rlSrc1, RegLocation rlSrc2)
712{
buzbee9e0f9b02011-08-24 15:32:46 -0700713 /*
714 * NOTE: This is the one place in the code in which we might have
715 * as many as six live temporary registers. There are 5 in the normal
716 * set for Arm. Until we have spill capabilities, temporarily add
717 * lr to the temp set. It is safe to do this locally, but note that
718 * lr is used explicitly elsewhere in the code generator and cannot
719 * normally be used as a general temp register.
720 */
buzbee67bf8852011-08-17 17:51:35 -0700721 RegLocation rlResult;
buzbee9e0f9b02011-08-24 15:32:46 -0700722 oatMarkTemp(cUnit, rLR); // Add lr to the temp pool
723 oatFreeTemp(cUnit, rLR); // and make it available
buzbee67bf8852011-08-17 17:51:35 -0700724 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
725 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
726 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
727 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
728 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
729 rlSrc2.highReg);
730 storeValueWide(cUnit, rlDest, rlResult);
buzbee9e0f9b02011-08-24 15:32:46 -0700731 oatClobber(cUnit, rLR);
732 oatUnmarkTemp(cUnit, rLR); // Remove lr from the temp pool
buzbee67bf8852011-08-17 17:51:35 -0700733}
734
735void oatInitializeRegAlloc(CompilationUnit* cUnit)
736{
737 int numRegs = sizeof(coreRegs)/sizeof(*coreRegs);
738 int numReserved = sizeof(reservedRegs)/sizeof(*reservedRegs);
739 int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
740 int numFPRegs = sizeof(fpRegs)/sizeof(*fpRegs);
741 int numFPTemps = sizeof(fpTemps)/sizeof(*fpTemps);
742 RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true);
743 cUnit->regPool = pool;
744 pool->numCoreRegs = numRegs;
745 pool->coreRegs = (RegisterInfo *)
746 oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true);
747 pool->numFPRegs = numFPRegs;
748 pool->FPRegs = (RegisterInfo *)
749 oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true);
750 oatInitPool(pool->coreRegs, coreRegs, pool->numCoreRegs);
751 oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
752 // Keep special registers from being allocated
753 for (int i = 0; i < numReserved; i++) {
754 oatMarkInUse(cUnit, reservedRegs[i]);
755 }
756 // Mark temp regs - all others not in use can be used for promotion
757 for (int i = 0; i < numTemps; i++) {
758 oatMarkTemp(cUnit, coreTemps[i]);
759 }
760 for (int i = 0; i < numFPTemps; i++) {
761 oatMarkTemp(cUnit, fpTemps[i]);
762 }
763 pool->nullCheckedRegs =
764 oatAllocBitVector(cUnit->numSSARegs, false);
765}
766
767/*
768 * Handle simple case (thin lock) inline. If it's complicated, bail
769 * out to the heavyweight lock/unlock routines. We'll use dedicated
770 * registers here in order to be in the right position in case we
771 * to bail to dvm[Lock/Unlock]Object(self, object)
772 *
773 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
774 * r1 -> object [arg1 for dvm[Lock/Unlock]Object
775 * r2 -> intial contents of object->lock, later result of strex
776 * r3 -> self->threadId
777 * r12 -> allow to be used by utilities as general temp
778 *
779 * The result of the strex is 0 if we acquire the lock.
780 *
781 * See comments in Sync.c for the layout of the lock word.
782 * Of particular interest to this code is the test for the
783 * simple case - which we handle inline. For monitor enter, the
784 * simple case is thin lock, held by no-one. For monitor exit,
785 * the simple case is thin lock, held by the unlocking thread with
786 * a recurse count of 0.
787 *
788 * A minor complication is that there is a field in the lock word
789 * unrelated to locking: the hash state. This field must be ignored, but
790 * preserved.
791 *
792 */
793static void genMonitorEnter(CompilationUnit* cUnit, MIR* mir,
794 RegLocation rlSrc)
795{
796 ArmLIR* target;
797 ArmLIR* hopTarget;
798 ArmLIR* branch;
799 ArmLIR* hopBranch;
800
801 oatFlushAllRegs(cUnit);
buzbeec143c552011-08-20 17:38:58 -0700802 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700803 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
804 oatLockAllTemps(cUnit); // Prepare for explicit register usage
805 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
buzbeec143c552011-08-20 17:38:58 -0700806 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700807 newLIR3(cUnit, kThumb2Ldrex, r2, r1,
buzbeec143c552011-08-20 17:38:58 -0700808 OFFSETOF_MEMBER(Object, monitor_) >> 2); // Get object->lock
809 // Align owner
810 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
buzbee67bf8852011-08-17 17:51:35 -0700811 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700812 newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, art::Monitor::kLwLockOwnerShift
813 - 1);
814 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
815 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700816 hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0);
buzbeec143c552011-08-20 17:38:58 -0700817 newLIR4(cUnit, kThumb2Strex, r2, r3, r1,
818 OFFSETOF_MEMBER(Object, monitor_) >> 2);
buzbee67bf8852011-08-17 17:51:35 -0700819 oatGenMemBarrier(cUnit, kSY);
820 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
821
822 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
823 hopTarget->defMask = ENCODE_ALL;
824 hopBranch->generic.target = (LIR*)hopTarget;
825
826 // Go expensive route - artLockObjectNoThrow(self, obj);
827 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtLockObjectNoThrow),
828 rLR);
829 genRegCopy(cUnit, r0, rSELF);
830 newLIR1(cUnit, kThumbBlxR, rLR);
831
832 // Resume here
833 target = newLIR0(cUnit, kArmPseudoTargetLabel);
834 target->defMask = ENCODE_ALL;
835 branch->generic.target = (LIR*)target;
836}
837
838/*
839 * For monitor unlock, we don't have to use ldrex/strex. Once
840 * we've determined that the lock is thin and that we own it with
841 * a zero recursion count, it's safe to punch it back to the
842 * initial, unlock thin state with a store word.
843 */
844static void genMonitorExit(CompilationUnit* cUnit, MIR* mir,
845 RegLocation rlSrc)
846{
847 ArmLIR* target;
848 ArmLIR* branch;
849 ArmLIR* hopTarget;
850 ArmLIR* hopBranch;
851
buzbeec143c552011-08-20 17:38:58 -0700852 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700853 oatFlushAllRegs(cUnit);
854 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
855 oatLockAllTemps(cUnit); // Prepare for explicit register usage
856 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
buzbeec143c552011-08-20 17:38:58 -0700857 loadWordDisp(cUnit, r1, OFFSETOF_MEMBER(Object, monitor_), r2); // Get lock
858 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700859 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700860 opRegRegImm(cUnit, kOpAnd, r12, r2, (art::Monitor::kLwHashStateMask <<
861 art::Monitor::kLwHashStateShift));
862 // Align owner
863 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
864 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
865 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700866 opRegReg(cUnit, kOpSub, r2, r3);
867 hopBranch = opCondBranch(cUnit, kArmCondNe);
868 oatGenMemBarrier(cUnit, kSY);
buzbeec143c552011-08-20 17:38:58 -0700869 storeWordDisp(cUnit, r1, OFFSETOF_MEMBER(Object, monitor_), r12);
buzbee67bf8852011-08-17 17:51:35 -0700870 branch = opNone(cUnit, kOpUncondBr);
871
872 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
873 hopTarget->defMask = ENCODE_ALL;
874 hopBranch->generic.target = (LIR*)hopTarget;
875
876 // Go expensive route - artUnlockObjectNoThrow(self, obj);
877 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtUnlockObjectNoThrow),
878 rLR);
879 genRegCopy(cUnit, r0, rSELF);
880 newLIR1(cUnit, kThumbBlxR, rLR);
881
882 // Resume here
883 target = newLIR0(cUnit, kArmPseudoTargetLabel);
884 target->defMask = ENCODE_ALL;
885 branch->generic.target = (LIR*)target;
886}
887
888/*
889 * 64-bit 3way compare function.
890 * mov rX, #-1
891 * cmp op1hi, op2hi
892 * blt done
893 * bgt flip
894 * sub rX, op1lo, op2lo (treat as unsigned)
895 * beq done
896 * ite hi
897 * mov(hi) rX, #-1
898 * mov(!hi) rX, #1
899 * flip:
900 * neg rX
901 * done:
902 */
903static void genCmpLong(CompilationUnit* cUnit, MIR* mir,
904 RegLocation rlDest, RegLocation rlSrc1,
905 RegLocation rlSrc2)
906{
907 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
908 ArmLIR* target1;
909 ArmLIR* target2;
910 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
911 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
912 rlTemp.lowReg = oatAllocTemp(cUnit);
913 loadConstant(cUnit, rlTemp.lowReg, -1);
914 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
915 ArmLIR* branch1 = opCondBranch(cUnit, kArmCondLt);
916 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondGt);
917 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
918 ArmLIR* branch3 = opCondBranch(cUnit, kArmCondEq);
919
920 genIT(cUnit, kArmCondHi, "E");
921 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
922 loadConstant(cUnit, rlTemp.lowReg, 1);
923 genBarrier(cUnit);
924
925 target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
926 target2->defMask = -1;
927 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
928
929 target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
930 target1->defMask = -1;
931
932 storeValue(cUnit, rlDest, rlTemp);
933
934 branch1->generic.target = (LIR*)target1;
935 branch2->generic.target = (LIR*)target2;
936 branch3->generic.target = branch1->generic.target;
937}
938
939static void genMultiplyByTwoBitMultiplier(CompilationUnit* cUnit,
940 RegLocation rlSrc, RegLocation rlResult, int lit,
941 int firstBit, int secondBit)
942{
943 opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
944 encodeShift(kArmLsl, secondBit - firstBit));
945 if (firstBit != 0) {
946 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
947 }
948}
949
950static bool genConversionCall(CompilationUnit* cUnit, MIR* mir, int funcOffset,
951 int srcSize, int tgtSize)
952{
953 /*
954 * Don't optimize the register usage since it calls out to support
955 * functions
956 */
957 RegLocation rlSrc;
958 RegLocation rlDest;
959 oatFlushAllRegs(cUnit); /* Send everything to home location */
960 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
961 if (srcSize == 1) {
962 rlSrc = oatGetSrc(cUnit, mir, 0);
963 loadValueDirectFixed(cUnit, rlSrc, r0);
964 } else {
965 rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
966 loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
967 }
968 opReg(cUnit, kOpBlx, rLR);
969 oatClobberCallRegs(cUnit);
970 if (tgtSize == 1) {
971 RegLocation rlResult;
972 rlDest = oatGetDest(cUnit, mir, 0);
973 rlResult = oatGetReturn(cUnit);
974 storeValue(cUnit, rlDest, rlResult);
975 } else {
976 RegLocation rlResult;
977 rlDest = oatGetDestWide(cUnit, mir, 0, 1);
978 rlResult = oatGetReturnWide(cUnit);
979 storeValueWide(cUnit, rlDest, rlResult);
980 }
981 return false;
982}
983
984static bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
985 RegLocation rlDest, RegLocation rlSrc1,
986 RegLocation rlSrc2)
987{
988 RegLocation rlResult;
989 int funcOffset;
990
991 switch (mir->dalvikInsn.opcode) {
992 case OP_ADD_FLOAT_2ADDR:
993 case OP_ADD_FLOAT:
994 funcOffset = OFFSETOF_MEMBER(Thread, pFadd);
995 break;
996 case OP_SUB_FLOAT_2ADDR:
997 case OP_SUB_FLOAT:
998 funcOffset = OFFSETOF_MEMBER(Thread, pFsub);
999 break;
1000 case OP_DIV_FLOAT_2ADDR:
1001 case OP_DIV_FLOAT:
1002 funcOffset = OFFSETOF_MEMBER(Thread, pFdiv);
1003 break;
1004 case OP_MUL_FLOAT_2ADDR:
1005 case OP_MUL_FLOAT:
1006 funcOffset = OFFSETOF_MEMBER(Thread, pFmul);
1007 break;
1008 case OP_REM_FLOAT_2ADDR:
1009 case OP_REM_FLOAT:
1010 funcOffset = OFFSETOF_MEMBER(Thread, pFmodf);
1011 break;
1012 case OP_NEG_FLOAT: {
1013 genNegFloat(cUnit, rlDest, rlSrc1);
1014 return false;
1015 }
1016 default:
1017 return true;
1018 }
1019 oatFlushAllRegs(cUnit); /* Send everything to home location */
1020 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1021 loadValueDirectFixed(cUnit, rlSrc1, r0);
1022 loadValueDirectFixed(cUnit, rlSrc2, r1);
1023 opReg(cUnit, kOpBlx, rLR);
1024 oatClobberCallRegs(cUnit);
1025 rlResult = oatGetReturn(cUnit);
1026 storeValue(cUnit, rlDest, rlResult);
1027 return false;
1028}
1029
1030static bool genArithOpDoublePortable(CompilationUnit* cUnit, MIR* mir,
1031 RegLocation rlDest, RegLocation rlSrc1,
1032 RegLocation rlSrc2)
1033{
1034 RegLocation rlResult;
1035 int funcOffset;
1036
1037 switch (mir->dalvikInsn.opcode) {
1038 case OP_ADD_DOUBLE_2ADDR:
1039 case OP_ADD_DOUBLE:
1040 funcOffset = OFFSETOF_MEMBER(Thread, pDadd);
1041 break;
1042 case OP_SUB_DOUBLE_2ADDR:
1043 case OP_SUB_DOUBLE:
1044 funcOffset = OFFSETOF_MEMBER(Thread, pDsub);
1045 break;
1046 case OP_DIV_DOUBLE_2ADDR:
1047 case OP_DIV_DOUBLE:
1048 funcOffset = OFFSETOF_MEMBER(Thread, pDdiv);
1049 break;
1050 case OP_MUL_DOUBLE_2ADDR:
1051 case OP_MUL_DOUBLE:
1052 funcOffset = OFFSETOF_MEMBER(Thread, pDmul);
1053 break;
1054 case OP_REM_DOUBLE_2ADDR:
1055 case OP_REM_DOUBLE:
1056 funcOffset = OFFSETOF_MEMBER(Thread, pFmod);
1057 break;
1058 case OP_NEG_DOUBLE: {
1059 genNegDouble(cUnit, rlDest, rlSrc1);
1060 return false;
1061 }
1062 default:
1063 return true;
1064 }
1065 oatFlushAllRegs(cUnit); /* Send everything to home location */
1066 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1067 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1068 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1069 opReg(cUnit, kOpBlx, rLR);
1070 oatClobberCallRegs(cUnit);
1071 rlResult = oatGetReturnWide(cUnit);
1072 storeValueWide(cUnit, rlDest, rlResult);
1073 return false;
1074}
1075
1076static bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
1077{
1078 Opcode opcode = mir->dalvikInsn.opcode;
1079
1080 switch (opcode) {
1081 case OP_INT_TO_FLOAT:
1082 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2f),
1083 1, 1);
1084 case OP_FLOAT_TO_INT:
1085 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2iz),
1086 1, 1);
1087 case OP_DOUBLE_TO_FLOAT:
1088 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2f),
1089 2, 1);
1090 case OP_FLOAT_TO_DOUBLE:
1091 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2d),
1092 1, 2);
1093 case OP_INT_TO_DOUBLE:
1094 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2d),
1095 1, 2);
1096 case OP_DOUBLE_TO_INT:
1097 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2iz),
1098 2, 1);
1099 case OP_FLOAT_TO_LONG:
1100 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
1101 pArtF2l), 1, 2);
1102 case OP_LONG_TO_FLOAT:
1103 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2f),
1104 2, 1);
1105 case OP_DOUBLE_TO_LONG:
1106 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
1107 pArtD2l), 2, 2);
1108 case OP_LONG_TO_DOUBLE:
1109 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2d),
1110 2, 2);
1111 default:
1112 return true;
1113 }
1114 return false;
1115}
1116
1117/* Generate conditional branch instructions */
1118static ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
1119 ArmConditionCode cond,
1120 ArmLIR* target)
1121{
1122 ArmLIR* branch = opCondBranch(cUnit, cond);
1123 branch->generic.target = (LIR*) target;
1124 return branch;
1125}
1126
1127/* Generate a unconditional branch to go to the interpreter */
1128static inline ArmLIR* genTrap(CompilationUnit* cUnit, int dOffset,
1129 ArmLIR* pcrLabel)
1130{
1131 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1132 return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
1133}
1134
1135/*
1136 * Generate array store
1137 *
1138 */
buzbeec143c552011-08-20 17:38:58 -07001139static void genArrayPut(CompilationUnit* cUnit, MIR* mir,
buzbee67bf8852011-08-17 17:51:35 -07001140 RegLocation rlArray, RegLocation rlIndex,
1141 RegLocation rlSrc, int scale)
1142{
1143 RegisterClass regClass = oatRegClassBySize(kWord);
buzbeec143c552011-08-20 17:38:58 -07001144 int lenOffset = Array::LengthOffset().Int32Value();
1145 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001146
1147 /* Make sure it's a legal object Put. Use direct regs at first */
1148 loadValueDirectFixed(cUnit, rlArray, r1);
1149 loadValueDirectFixed(cUnit, rlSrc, r0);
1150
1151 /* null array object? */
1152 ArmLIR* pcrLabel = NULL;
1153
1154 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1155 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, r1,
1156 mir->offset, NULL);
1157 }
1158 loadWordDisp(cUnit, rSELF,
1159 OFFSETOF_MEMBER(Thread, pArtCanPutArrayElementNoThrow), rLR);
1160 /* Get the array's clazz */
buzbeec143c552011-08-20 17:38:58 -07001161 loadWordDisp(cUnit, r1, OFFSETOF_MEMBER(Object, klass_), r1);
buzbee67bf8852011-08-17 17:51:35 -07001162 /* Get the object's clazz */
buzbeec143c552011-08-20 17:38:58 -07001163 loadWordDisp(cUnit, r0, OFFSETOF_MEMBER(Object, klass_), r0);
buzbee67bf8852011-08-17 17:51:35 -07001164 opReg(cUnit, kOpBlx, rLR);
1165 oatClobberCallRegs(cUnit);
1166
1167 // Now, redo loadValues in case they didn't survive the call
1168
1169 int regPtr;
1170 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1171 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1172
1173 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1174 oatClobber(cUnit, rlArray.lowReg);
1175 regPtr = rlArray.lowReg;
1176 } else {
1177 regPtr = oatAllocTemp(cUnit);
1178 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1179 }
1180
1181 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1182 int regLen = oatAllocTemp(cUnit);
1183 //NOTE: max live temps(4) here.
1184 /* Get len */
1185 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1186 /* regPtr -> array data */
1187 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1188 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1189 pcrLabel);
1190 oatFreeTemp(cUnit, regLen);
1191 } else {
1192 /* regPtr -> array data */
1193 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1194 }
1195 /* at this point, regPtr points to array, 2 live temps */
1196 rlSrc = loadValue(cUnit, rlSrc, regClass);
1197 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1198 scale, kWord);
1199}
1200
1201/*
1202 * Generate array load
1203 */
1204static void genArrayGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
1205 RegLocation rlArray, RegLocation rlIndex,
1206 RegLocation rlDest, int scale)
1207{
1208 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001209 int lenOffset = Array::LengthOffset().Int32Value();
1210 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001211 RegLocation rlResult;
1212 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1213 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1214 int regPtr;
1215
1216 /* null object? */
1217 ArmLIR* pcrLabel = NULL;
1218
1219 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1220 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
1221 rlArray.lowReg, mir->offset, NULL);
1222 }
1223
1224 regPtr = oatAllocTemp(cUnit);
1225
1226 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1227 int regLen = oatAllocTemp(cUnit);
1228 /* Get len */
1229 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1230 /* regPtr -> array data */
1231 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1232 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1233 pcrLabel);
1234 oatFreeTemp(cUnit, regLen);
1235 } else {
1236 /* regPtr -> array data */
1237 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1238 }
1239 if ((size == kLong) || (size == kDouble)) {
1240 if (scale) {
1241 int rNewIndex = oatAllocTemp(cUnit);
1242 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1243 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1244 oatFreeTemp(cUnit, rNewIndex);
1245 } else {
1246 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1247 }
1248 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1249
1250 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
1251
1252 oatFreeTemp(cUnit, regPtr);
1253 storeValueWide(cUnit, rlDest, rlResult);
1254 } else {
1255 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1256
1257 loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
1258 scale, size);
1259
1260 oatFreeTemp(cUnit, regPtr);
1261 storeValue(cUnit, rlDest, rlResult);
1262 }
1263}
1264
1265/*
1266 * Generate array store
1267 *
1268 */
1269static void genArrayPut(CompilationUnit* cUnit, MIR* mir, OpSize size,
1270 RegLocation rlArray, RegLocation rlIndex,
1271 RegLocation rlSrc, int scale)
1272{
1273 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001274 int lenOffset = Array::LengthOffset().Int32Value();
1275 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001276
1277 int regPtr;
1278 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1279 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1280
1281 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1282 oatClobber(cUnit, rlArray.lowReg);
1283 regPtr = rlArray.lowReg;
1284 } else {
1285 regPtr = oatAllocTemp(cUnit);
1286 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1287 }
1288
1289 /* null object? */
1290 ArmLIR* pcrLabel = NULL;
1291
1292 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1293 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
1294 mir->offset, NULL);
1295 }
1296
1297 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1298 int regLen = oatAllocTemp(cUnit);
1299 //NOTE: max live temps(4) here.
1300 /* Get len */
1301 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1302 /* regPtr -> array data */
1303 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1304 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1305 pcrLabel);
1306 oatFreeTemp(cUnit, regLen);
1307 } else {
1308 /* regPtr -> array data */
1309 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1310 }
1311 /* at this point, regPtr points to array, 2 live temps */
1312 if ((size == kLong) || (size == kDouble)) {
1313 //TODO: need specific wide routine that can handle fp regs
1314 if (scale) {
1315 int rNewIndex = oatAllocTemp(cUnit);
1316 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1317 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1318 oatFreeTemp(cUnit, rNewIndex);
1319 } else {
1320 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1321 }
1322 rlSrc = loadValueWide(cUnit, rlSrc, regClass);
1323
1324 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
1325
1326 oatFreeTemp(cUnit, regPtr);
1327 } else {
1328 rlSrc = loadValue(cUnit, rlSrc, regClass);
1329
1330 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1331 scale, size);
1332 }
1333}
1334
1335static bool genShiftOpLong(CompilationUnit* cUnit, MIR* mir,
1336 RegLocation rlDest, RegLocation rlSrc1,
1337 RegLocation rlShift)
1338{
buzbee54330722011-08-23 16:46:55 -07001339 int funcOffset;
buzbee67bf8852011-08-17 17:51:35 -07001340
buzbee67bf8852011-08-17 17:51:35 -07001341 switch( mir->dalvikInsn.opcode) {
1342 case OP_SHL_LONG:
1343 case OP_SHL_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001344 funcOffset = OFFSETOF_MEMBER(Thread, pShlLong);
buzbee67bf8852011-08-17 17:51:35 -07001345 break;
1346 case OP_SHR_LONG:
1347 case OP_SHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001348 funcOffset = OFFSETOF_MEMBER(Thread, pShrLong);
buzbee67bf8852011-08-17 17:51:35 -07001349 break;
1350 case OP_USHR_LONG:
1351 case OP_USHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001352 funcOffset = OFFSETOF_MEMBER(Thread, pUshrLong);
buzbee67bf8852011-08-17 17:51:35 -07001353 break;
1354 default:
buzbee54330722011-08-23 16:46:55 -07001355 LOG(FATAL) << "Unexpected case";
buzbee67bf8852011-08-17 17:51:35 -07001356 return true;
1357 }
buzbee54330722011-08-23 16:46:55 -07001358 oatFlushAllRegs(cUnit); /* Send everything to home location */
1359 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1360 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1361 loadValueDirect(cUnit, rlShift, r2);
1362 opReg(cUnit, kOpBlx, rLR);
1363 oatClobberCallRegs(cUnit);
1364 RegLocation rlResult = oatGetReturnWide(cUnit);
buzbee67bf8852011-08-17 17:51:35 -07001365 storeValueWide(cUnit, rlDest, rlResult);
1366 return false;
1367}
1368
1369static bool genArithOpLong(CompilationUnit* cUnit, MIR* mir,
1370 RegLocation rlDest, RegLocation rlSrc1,
1371 RegLocation rlSrc2)
1372{
1373 RegLocation rlResult;
1374 OpKind firstOp = kOpBkpt;
1375 OpKind secondOp = kOpBkpt;
1376 bool callOut = false;
1377 int funcOffset;
1378 int retReg = r0;
1379
1380 switch (mir->dalvikInsn.opcode) {
1381 case OP_NOT_LONG:
1382 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1383 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1384 opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg);
1385 opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg);
1386 storeValueWide(cUnit, rlDest, rlResult);
1387 return false;
1388 break;
1389 case OP_ADD_LONG:
1390 case OP_ADD_LONG_2ADDR:
1391 firstOp = kOpAdd;
1392 secondOp = kOpAdc;
1393 break;
1394 case OP_SUB_LONG:
1395 case OP_SUB_LONG_2ADDR:
1396 firstOp = kOpSub;
1397 secondOp = kOpSbc;
1398 break;
1399 case OP_MUL_LONG:
1400 case OP_MUL_LONG_2ADDR:
1401 genMulLong(cUnit, rlDest, rlSrc1, rlSrc2);
1402 return false;
1403 case OP_DIV_LONG:
1404 case OP_DIV_LONG_2ADDR:
1405 callOut = true;
1406 retReg = r0;
1407 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1408 break;
1409 /* NOTE - result is in r2/r3 instead of r0/r1 */
1410 case OP_REM_LONG:
1411 case OP_REM_LONG_2ADDR:
1412 callOut = true;
1413 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1414 retReg = r2;
1415 break;
1416 case OP_AND_LONG_2ADDR:
1417 case OP_AND_LONG:
1418 firstOp = kOpAnd;
1419 secondOp = kOpAnd;
1420 break;
1421 case OP_OR_LONG:
1422 case OP_OR_LONG_2ADDR:
1423 firstOp = kOpOr;
1424 secondOp = kOpOr;
1425 break;
1426 case OP_XOR_LONG:
1427 case OP_XOR_LONG_2ADDR:
1428 firstOp = kOpXor;
1429 secondOp = kOpXor;
1430 break;
1431 case OP_NEG_LONG: {
1432 //TUNING: can improve this using Thumb2 code
1433 int tReg = oatAllocTemp(cUnit);
1434 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1435 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1436 loadConstantNoClobber(cUnit, tReg, 0);
1437 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1438 tReg, rlSrc2.lowReg);
1439 opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
1440 genRegCopy(cUnit, rlResult.highReg, tReg);
1441 storeValueWide(cUnit, rlDest, rlResult);
1442 return false;
1443 }
1444 default:
1445 LOG(FATAL) << "Invalid long arith op";
1446 }
1447 if (!callOut) {
1448 genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2);
1449 } else {
1450 // Adjust return regs in to handle case of rem returning r2/r3
1451 oatFlushAllRegs(cUnit); /* Send everything to home location */
1452 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1453 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1454 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1455 opReg(cUnit, kOpBlx, rLR);
1456 oatClobberCallRegs(cUnit);
1457 if (retReg == r0)
1458 rlResult = oatGetReturnWide(cUnit);
1459 else
1460 rlResult = oatGetReturnWideAlt(cUnit);
1461 storeValueWide(cUnit, rlDest, rlResult);
1462 }
1463 return false;
1464}
1465
1466static bool genArithOpInt(CompilationUnit* cUnit, MIR* mir,
1467 RegLocation rlDest, RegLocation rlSrc1,
1468 RegLocation rlSrc2)
1469{
1470 OpKind op = kOpBkpt;
1471 bool callOut = false;
1472 bool checkZero = false;
1473 bool unary = false;
1474 int retReg = r0;
1475 int funcOffset;
1476 RegLocation rlResult;
1477 bool shiftOp = false;
1478
1479 switch (mir->dalvikInsn.opcode) {
1480 case OP_NEG_INT:
1481 op = kOpNeg;
1482 unary = true;
1483 break;
1484 case OP_NOT_INT:
1485 op = kOpMvn;
1486 unary = true;
1487 break;
1488 case OP_ADD_INT:
1489 case OP_ADD_INT_2ADDR:
1490 op = kOpAdd;
1491 break;
1492 case OP_SUB_INT:
1493 case OP_SUB_INT_2ADDR:
1494 op = kOpSub;
1495 break;
1496 case OP_MUL_INT:
1497 case OP_MUL_INT_2ADDR:
1498 op = kOpMul;
1499 break;
1500 case OP_DIV_INT:
1501 case OP_DIV_INT_2ADDR:
1502 callOut = true;
1503 checkZero = true;
1504 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1505 retReg = r0;
1506 break;
1507 /* NOTE: returns in r1 */
1508 case OP_REM_INT:
1509 case OP_REM_INT_2ADDR:
1510 callOut = true;
1511 checkZero = true;
1512 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1513 retReg = r1;
1514 break;
1515 case OP_AND_INT:
1516 case OP_AND_INT_2ADDR:
1517 op = kOpAnd;
1518 break;
1519 case OP_OR_INT:
1520 case OP_OR_INT_2ADDR:
1521 op = kOpOr;
1522 break;
1523 case OP_XOR_INT:
1524 case OP_XOR_INT_2ADDR:
1525 op = kOpXor;
1526 break;
1527 case OP_SHL_INT:
1528 case OP_SHL_INT_2ADDR:
1529 shiftOp = true;
1530 op = kOpLsl;
1531 break;
1532 case OP_SHR_INT:
1533 case OP_SHR_INT_2ADDR:
1534 shiftOp = true;
1535 op = kOpAsr;
1536 break;
1537 case OP_USHR_INT:
1538 case OP_USHR_INT_2ADDR:
1539 shiftOp = true;
1540 op = kOpLsr;
1541 break;
1542 default:
1543 LOG(FATAL) << "Invalid word arith op: " <<
1544 (int)mir->dalvikInsn.opcode;
1545 }
1546 if (!callOut) {
1547 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
1548 if (unary) {
1549 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1550 opRegReg(cUnit, op, rlResult.lowReg,
1551 rlSrc1.lowReg);
1552 } else {
1553 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
1554 if (shiftOp) {
1555 int tReg = oatAllocTemp(cUnit);
1556 opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31);
1557 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1558 opRegRegReg(cUnit, op, rlResult.lowReg,
1559 rlSrc1.lowReg, tReg);
1560 oatFreeTemp(cUnit, tReg);
1561 } else {
1562 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1563 opRegRegReg(cUnit, op, rlResult.lowReg,
1564 rlSrc1.lowReg, rlSrc2.lowReg);
1565 }
1566 }
1567 storeValue(cUnit, rlDest, rlResult);
1568 } else {
1569 RegLocation rlResult;
1570 oatFlushAllRegs(cUnit); /* Send everything to home location */
1571 loadValueDirectFixed(cUnit, rlSrc2, r1);
1572 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1573 loadValueDirectFixed(cUnit, rlSrc1, r0);
1574 if (checkZero) {
1575 genNullCheck(cUnit, rlSrc2.sRegLow, r1, mir->offset, NULL);
1576 }
1577 opReg(cUnit, kOpBlx, rLR);
1578 oatClobberCallRegs(cUnit);
1579 if (retReg == r0)
1580 rlResult = oatGetReturn(cUnit);
1581 else
1582 rlResult = oatGetReturnAlt(cUnit);
1583 storeValue(cUnit, rlDest, rlResult);
1584 }
1585 return false;
1586}
1587
1588/* Generate unconditional branch instructions */
1589static ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
1590{
1591 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1592 branch->generic.target = (LIR*) target;
1593 return branch;
1594}
1595
1596/*
1597 * Fetch *self->info.breakFlags. If the breakFlags are non-zero,
1598 * punt to the interpreter.
1599 */
1600static void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
1601{
1602 UNIMPLEMENTED(WARNING);
1603#if 0
1604 int rTemp = oatAllocTemp(cUnit);
1605 ArmLIR* ld;
1606 ld = loadBaseDisp(cUnit, NULL, rSELF,
1607 offsetof(Thread, interpBreak.ctl.breakFlags),
1608 rTemp, kUnsignedByte, INVALID_SREG);
1609 setMemRefType(ld, true /* isLoad */, kMustNotAlias);
1610 genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
1611#endif
1612}
1613
1614/*
1615 * The following are the first-level codegen routines that analyze the format
1616 * of each bytecode then either dispatch special purpose codegen routines
1617 * or produce corresponding Thumb instructions directly.
1618 */
1619
1620static bool isPowerOfTwo(int x)
1621{
1622 return (x & (x - 1)) == 0;
1623}
1624
1625// Returns true if no more than two bits are set in 'x'.
1626static bool isPopCountLE2(unsigned int x)
1627{
1628 x &= x - 1;
1629 return (x & (x - 1)) == 0;
1630}
1631
1632// Returns the index of the lowest set bit in 'x'.
1633static int lowestSetBit(unsigned int x) {
1634 int bit_posn = 0;
1635 while ((x & 0xf) == 0) {
1636 bit_posn += 4;
1637 x >>= 4;
1638 }
1639 while ((x & 1) == 0) {
1640 bit_posn++;
1641 x >>= 1;
1642 }
1643 return bit_posn;
1644}
1645
1646// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit'
1647// and store the result in 'rlDest'.
1648static bool handleEasyDivide(CompilationUnit* cUnit, Opcode dalvikOpcode,
1649 RegLocation rlSrc, RegLocation rlDest, int lit)
1650{
1651 if (lit < 2 || !isPowerOfTwo(lit)) {
1652 return false;
1653 }
1654 int k = lowestSetBit(lit);
1655 if (k >= 30) {
1656 // Avoid special cases.
1657 return false;
1658 }
1659 bool div = (dalvikOpcode == OP_DIV_INT_LIT8 ||
1660 dalvikOpcode == OP_DIV_INT_LIT16);
1661 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1662 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1663 if (div) {
1664 int tReg = oatAllocTemp(cUnit);
1665 if (lit == 2) {
1666 // Division by 2 is by far the most common division by constant.
1667 opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
1668 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1669 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1670 } else {
1671 opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
1672 opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
1673 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1674 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1675 }
1676 } else {
1677 int cReg = oatAllocTemp(cUnit);
1678 loadConstant(cUnit, cReg, lit - 1);
1679 int tReg1 = oatAllocTemp(cUnit);
1680 int tReg2 = oatAllocTemp(cUnit);
1681 if (lit == 2) {
1682 opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
1683 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1684 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1685 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1686 } else {
1687 opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
1688 opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
1689 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1690 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1691 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1692 }
1693 }
1694 storeValue(cUnit, rlDest, rlResult);
1695 return true;
1696}
1697
1698// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
1699// and store the result in 'rlDest'.
1700static bool handleEasyMultiply(CompilationUnit* cUnit,
1701 RegLocation rlSrc, RegLocation rlDest, int lit)
1702{
1703 // Can we simplify this multiplication?
1704 bool powerOfTwo = false;
1705 bool popCountLE2 = false;
1706 bool powerOfTwoMinusOne = false;
1707 if (lit < 2) {
1708 // Avoid special cases.
1709 return false;
1710 } else if (isPowerOfTwo(lit)) {
1711 powerOfTwo = true;
1712 } else if (isPopCountLE2(lit)) {
1713 popCountLE2 = true;
1714 } else if (isPowerOfTwo(lit + 1)) {
1715 powerOfTwoMinusOne = true;
1716 } else {
1717 return false;
1718 }
1719 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1720 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1721 if (powerOfTwo) {
1722 // Shift.
1723 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg,
1724 lowestSetBit(lit));
1725 } else if (popCountLE2) {
1726 // Shift and add and shift.
1727 int firstBit = lowestSetBit(lit);
1728 int secondBit = lowestSetBit(lit ^ (1 << firstBit));
1729 genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit,
1730 firstBit, secondBit);
1731 } else {
1732 // Reverse subtract: (src << (shift + 1)) - src.
1733 assert(powerOfTwoMinusOne);
1734 // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
1735 int tReg = oatAllocTemp(cUnit);
1736 opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
1737 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
1738 }
1739 storeValue(cUnit, rlDest, rlResult);
1740 return true;
1741}
1742
1743static bool genArithOpIntLit(CompilationUnit* cUnit, MIR* mir,
1744 RegLocation rlDest, RegLocation rlSrc,
1745 int lit)
1746{
1747 Opcode dalvikOpcode = mir->dalvikInsn.opcode;
1748 RegLocation rlResult;
1749 OpKind op = (OpKind)0; /* Make gcc happy */
1750 int shiftOp = false;
1751 bool isDiv = false;
1752 int funcOffset;
1753
1754 switch (dalvikOpcode) {
1755 case OP_RSUB_INT_LIT8:
1756 case OP_RSUB_INT: {
1757 int tReg;
1758 //TUNING: add support for use of Arm rsub op
1759 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1760 tReg = oatAllocTemp(cUnit);
1761 loadConstant(cUnit, tReg, lit);
1762 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1763 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1764 tReg, rlSrc.lowReg);
1765 storeValue(cUnit, rlDest, rlResult);
1766 return false;
1767 break;
1768 }
1769
1770 case OP_ADD_INT_LIT8:
1771 case OP_ADD_INT_LIT16:
1772 op = kOpAdd;
1773 break;
1774 case OP_MUL_INT_LIT8:
1775 case OP_MUL_INT_LIT16: {
1776 if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) {
1777 return false;
1778 }
1779 op = kOpMul;
1780 break;
1781 }
1782 case OP_AND_INT_LIT8:
1783 case OP_AND_INT_LIT16:
1784 op = kOpAnd;
1785 break;
1786 case OP_OR_INT_LIT8:
1787 case OP_OR_INT_LIT16:
1788 op = kOpOr;
1789 break;
1790 case OP_XOR_INT_LIT8:
1791 case OP_XOR_INT_LIT16:
1792 op = kOpXor;
1793 break;
1794 case OP_SHL_INT_LIT8:
1795 lit &= 31;
1796 shiftOp = true;
1797 op = kOpLsl;
1798 break;
1799 case OP_SHR_INT_LIT8:
1800 lit &= 31;
1801 shiftOp = true;
1802 op = kOpAsr;
1803 break;
1804 case OP_USHR_INT_LIT8:
1805 lit &= 31;
1806 shiftOp = true;
1807 op = kOpLsr;
1808 break;
1809
1810 case OP_DIV_INT_LIT8:
1811 case OP_DIV_INT_LIT16:
1812 case OP_REM_INT_LIT8:
1813 case OP_REM_INT_LIT16:
1814 if (lit == 0) {
1815 UNIMPLEMENTED(FATAL);
1816 // FIXME: generate an explicit throw here
1817 return false;
1818 }
1819 if (handleEasyDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit)) {
1820 return false;
1821 }
1822 oatFlushAllRegs(cUnit); /* Everything to home location */
1823 loadValueDirectFixed(cUnit, rlSrc, r0);
1824 oatClobber(cUnit, r0);
1825 if ((dalvikOpcode == OP_DIV_INT_LIT8) ||
1826 (dalvikOpcode == OP_DIV_INT_LIT16)) {
1827 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1828 isDiv = true;
1829 } else {
1830 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1831 isDiv = false;
1832 }
1833 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1834 loadConstant(cUnit, r1, lit);
1835 opReg(cUnit, kOpBlx, rLR);
1836 oatClobberCallRegs(cUnit);
1837 if (isDiv)
1838 rlResult = oatGetReturn(cUnit);
1839 else
1840 rlResult = oatGetReturnAlt(cUnit);
1841 storeValue(cUnit, rlDest, rlResult);
1842 return false;
1843 break;
1844 default:
1845 return true;
1846 }
1847 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1848 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1849 // Avoid shifts by literal 0 - no support in Thumb. Change to copy
1850 if (shiftOp && (lit == 0)) {
1851 genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
1852 } else {
1853 opRegRegImm(cUnit, op, rlResult.lowReg, rlSrc.lowReg, lit);
1854 }
1855 storeValue(cUnit, rlDest, rlResult);
1856 return false;
1857}
1858
1859/* Architectural-specific debugging helpers go here */
1860void oatArchDump(void)
1861{
1862 /* Print compiled opcode in this VM instance */
1863 int i, start, streak;
1864 char buf[1024];
1865
1866 streak = i = 0;
1867 buf[0] = 0;
1868 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1869 i++;
1870 }
1871 if (i == kNumPackedOpcodes) {
1872 return;
1873 }
1874 for (start = i++, streak = 1; i < kNumPackedOpcodes; i++) {
1875 if (opcodeCoverage[i]) {
1876 streak++;
1877 } else {
1878 if (streak == 1) {
1879 sprintf(buf+strlen(buf), "%x,", start);
1880 } else {
1881 sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
1882 }
1883 streak = 0;
1884 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1885 i++;
1886 }
1887 if (i < kNumPackedOpcodes) {
1888 streak = 1;
1889 start = i;
1890 }
1891 }
1892 }
1893 if (streak) {
1894 if (streak == 1) {
1895 sprintf(buf+strlen(buf), "%x", start);
1896 } else {
1897 sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
1898 }
1899 }
1900 if (strlen(buf)) {
1901 LOG(INFO) << "dalvik.vm.oat.op = " << buf;
1902 }
1903}