blob: 3da5aaa5fb9b8c44e500b475d453189ea0389476 [file] [log] [blame]
buzbee67bf8852011-08-17 17:51:35 -07001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains codegen for the Thumb2 ISA and is intended to be
19 * includes by:
20 *
21 * Codegen-$(TARGET_ARCH_VARIANT).c
22 *
23 */
24
25/*
26 * Construct an s4 from two consecutive half-words of switch data.
27 * This needs to check endianness because the DEX optimizer only swaps
28 * half-words in instruction stream.
29 *
30 * "switchData" must be 32-bit aligned.
31 */
32#if __BYTE_ORDER == __LITTLE_ENDIAN
33static inline s4 s4FromSwitchData(const void* switchData) {
34 return *(s4*) switchData;
35}
36#else
37static inline s4 s4FromSwitchData(const void* switchData) {
38 u2* data = switchData;
39 return data[0] | (((s4) data[1]) << 16);
40}
41#endif
42
43/*
44 * Generate a Thumb2 IT instruction, which can nullify up to
45 * four subsequent instructions based on a condition and its
46 * inverse. The condition applies to the first instruction, which
47 * is executed if the condition is met. The string "guide" consists
48 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
49 * A "T" means the instruction is executed if the condition is
50 * met, and an "E" means the instruction is executed if the condition
51 * is not met.
52 */
53static ArmLIR* genIT(CompilationUnit* cUnit, ArmConditionCode code,
54 const char* guide)
55{
56 int mask;
57 int condBit = code & 1;
58 int altBit = condBit ^ 1;
59 int mask3 = 0;
60 int mask2 = 0;
61 int mask1 = 0;
62
63 //Note: case fallthroughs intentional
64 switch(strlen(guide)) {
65 case 3:
66 mask1 = (guide[2] == 'T') ? condBit : altBit;
67 case 2:
68 mask2 = (guide[1] == 'T') ? condBit : altBit;
69 case 1:
70 mask3 = (guide[0] == 'T') ? condBit : altBit;
71 break;
72 case 0:
73 break;
74 default:
75 LOG(FATAL) << "OAT: bad case in genIT";
76 }
77 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
78 (1 << (3 - strlen(guide)));
79 return newLIR2(cUnit, kThumb2It, code, mask);
80}
81
82/*
83 * Insert a kArmPseudoCaseLabel at the beginning of the Dalvik
84 * offset vaddr. This label will be used to fix up the case
85 * branch table during the assembly phase. Be sure to set
86 * all resource flags on this to prevent code motion across
87 * target boundaries. KeyVal is just there for debugging.
88 */
89static ArmLIR* insertCaseLabel(CompilationUnit* cUnit, int vaddr, int keyVal)
90{
91 ArmLIR* lir;
92 for (lir = (ArmLIR*)cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
93 if ((lir->opcode == kArmPseudoDalvikByteCodeBoundary) &&
94 (lir->generic.dalvikOffset == vaddr)) {
95 ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
96 newLabel->generic.dalvikOffset = vaddr;
97 newLabel->opcode = kArmPseudoCaseLabel;
98 newLabel->operands[0] = keyVal;
99 oatInsertLIRAfter((LIR*)lir, (LIR*)newLabel);
100 return newLabel;
101 }
102 }
103 oatCodegenDump(cUnit);
104 LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
105 return NULL; // Quiet gcc
106}
107
108static void markPackedCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
109{
110 const u2* table = tabRec->table;
111 int baseVaddr = tabRec->vaddr;
112 int *targets = (int*)&table[4];
113 int entries = table[1];
114 int lowKey = s4FromSwitchData(&table[2]);
115 for (int i = 0; i < entries; i++) {
116 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
117 i + lowKey);
118 }
119}
120
121static void markSparseCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
122{
123 const u2* table = tabRec->table;
124 int baseVaddr = tabRec->vaddr;
125 int entries = table[1];
126 int* keys = (int*)&table[2];
127 int* targets = &keys[entries];
128 for (int i = 0; i < entries; i++) {
129 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
130 keys[i]);
131 }
132}
133
134void oatProcessSwitchTables(CompilationUnit* cUnit)
135{
136 GrowableListIterator iterator;
137 oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
138 while (true) {
139 SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
140 &iterator);
141 if (tabRec == NULL) break;
142 if (tabRec->table[0] == kPackedSwitchSignature)
143 markPackedCaseLabels(cUnit, tabRec);
144 else if (tabRec->table[0] == kSparseSwitchSignature)
145 markSparseCaseLabels(cUnit, tabRec);
146 else {
147 LOG(FATAL) << "Invalid switch table";
148 }
149 }
150}
151
152static void dumpSparseSwitchTable(const u2* table)
153 /*
154 * Sparse switch data format:
155 * ushort ident = 0x0200 magic value
156 * ushort size number of entries in the table; > 0
157 * int keys[size] keys, sorted low-to-high; 32-bit aligned
158 * int targets[size] branch targets, relative to switch opcode
159 *
160 * Total size is (2+size*4) 16-bit code units.
161 */
162{
163 u2 ident = table[0];
164 int entries = table[1];
165 int* keys = (int*)&table[2];
166 int* targets = &keys[entries];
167 LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident <<
168 ", entries: " << std::dec << entries;
169 for (int i = 0; i < entries; i++) {
170 LOG(INFO) << " Key[" << keys[i] << "] -> 0x" << std::hex <<
171 targets[i];
172 }
173}
174
175static void dumpPackedSwitchTable(const u2* table)
176 /*
177 * Packed switch data format:
178 * ushort ident = 0x0100 magic value
179 * ushort size number of entries in the table
180 * int first_key first (and lowest) switch case value
181 * int targets[size] branch targets, relative to switch opcode
182 *
183 * Total size is (4+size*2) 16-bit code units.
184 */
185{
186 u2 ident = table[0];
187 int* targets = (int*)&table[4];
188 int entries = table[1];
189 int lowKey = s4FromSwitchData(&table[2]);
190 LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident <<
191 ", entries: " << std::dec << entries << ", lowKey: " << lowKey;
192 for (int i = 0; i < entries; i++) {
193 LOG(INFO) << " Key[" << (i + lowKey) << "] -> 0x" << std::hex <<
194 targets[i];
195 }
196}
197
198/*
199 * The sparse table in the literal pool is an array of <key,displacement>
200 * pairs. For each set, we'll load them as a pair using ldmia.
201 * This means that the register number of the temp we use for the key
202 * must be lower than the reg for the displacement.
203 *
204 * The test loop will look something like:
205 *
206 * adr rBase, <table>
207 * ldr rVal, [rSP, vRegOff]
208 * mov rIdx, #tableSize
209 * lp:
210 * ldmia rBase!, {rKey, rDisp}
211 * sub rIdx, #1
212 * cmp rVal, rKey
213 * ifeq
214 * add rPC, rDisp ; This is the branch from which we compute displacement
215 * cbnz rIdx, lp
216 */
217static void genSparseSwitch(CompilationUnit* cUnit, MIR* mir,
218 RegLocation rlSrc)
219{
220 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
221 if (cUnit->printMe) {
222 dumpSparseSwitchTable(table);
223 }
224 // Add the table to the list - we'll process it later
225 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
226 true);
227 tabRec->table = table;
228 tabRec->vaddr = mir->offset;
229 int size = table[1];
230 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
231 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
232
233 // Get the switch value
234 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
235 int rBase = oatAllocTemp(cUnit);
236 /* Allocate key and disp temps */
237 int rKey = oatAllocTemp(cUnit);
238 int rDisp = oatAllocTemp(cUnit);
239 // Make sure rKey's register number is less than rDisp's number for ldmia
240 if (rKey > rDisp) {
241 int tmp = rDisp;
242 rDisp = rKey;
243 rKey = tmp;
244 }
245 // Materialize a pointer to the switch table
246 newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec);
247 // Set up rIdx
248 int rIdx = oatAllocTemp(cUnit);
249 loadConstant(cUnit, rIdx, size);
250 // Establish loop branch target
251 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
252 target->defMask = ENCODE_ALL;
253 // Load next key/disp
254 newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
255 opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
256 // Go if match. NOTE: No instruction set switch here - must stay Thumb2
257 genIT(cUnit, kArmCondEq, "");
258 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
259 tabRec->bxInst = switchBranch;
260 // Needs to use setflags encoding here
261 newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
262 ArmLIR* branch = opCondBranch(cUnit, kArmCondNe);
263 branch->generic.target = (LIR*)target;
264}
265
266
267static void genPackedSwitch(CompilationUnit* cUnit, MIR* mir,
268 RegLocation rlSrc)
269{
270 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
271 if (cUnit->printMe) {
272 dumpPackedSwitchTable(table);
273 }
274 // Add the table to the list - we'll process it later
275 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
276 true);
277 tabRec->table = table;
278 tabRec->vaddr = mir->offset;
279 int size = table[1];
280 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
281 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
282
283 // Get the switch value
284 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
285 int tableBase = oatAllocTemp(cUnit);
286 // Materialize a pointer to the switch table
287 newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec);
288 int lowKey = s4FromSwitchData(&table[2]);
289 int keyReg;
290 // Remove the bias, if necessary
291 if (lowKey == 0) {
292 keyReg = rlSrc.lowReg;
293 } else {
294 keyReg = oatAllocTemp(cUnit);
295 opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
296 }
297 // Bounds check - if < 0 or >= size continue following switch
298 opRegImm(cUnit, kOpCmp, keyReg, size-1);
299 ArmLIR* branchOver = opCondBranch(cUnit, kArmCondHi);
300
301 // Load the displacement from the switch table
302 int dispReg = oatAllocTemp(cUnit);
303 loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
304
305 // ..and go! NOTE: No instruction set switch here - must stay Thumb2
306 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
307 tabRec->bxInst = switchBranch;
308
309 /* branchOver target here */
310 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
311 target->defMask = ENCODE_ALL;
312 branchOver->generic.target = (LIR*)target;
313}
314
315/*
316 * Array data table format:
317 * ushort ident = 0x0300 magic value
318 * ushort width width of each element in the table
319 * uint size number of elements in the table
320 * ubyte data[size*width] table of data values (may contain a single-byte
321 * padding at the end)
322 *
323 * Total size is 4+(width * size + 1)/2 16-bit code units.
324 */
325static void genFillArrayData(CompilationUnit* cUnit, MIR* mir,
326 RegLocation rlSrc)
327{
328 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
329 // Add the table to the list - we'll process it later
330 FillArrayData *tabRec = (FillArrayData *)
331 oatNew(sizeof(FillArrayData), true);
332 tabRec->table = table;
333 tabRec->vaddr = mir->offset;
334 u2 width = tabRec->table[1];
335 u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16);
336 tabRec->size = (size * width) + 8;
337
338 oatInsertGrowableList(&cUnit->fillArrayData, (intptr_t)tabRec);
339
340 // Making a call - use explicit registers
341 oatFlushAllRegs(cUnit); /* Everything to home location */
342 loadValueDirectFixed(cUnit, rlSrc, r0);
343 loadWordDisp(cUnit, rSELF,
344 OFFSETOF_MEMBER(Thread, pArtHandleFillArrayDataNoThrow), rLR);
buzbeee6d61962011-08-27 11:58:19 -0700345 // Materialize a pointer to the fill data image
buzbee67bf8852011-08-17 17:51:35 -0700346 newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec);
347 opReg(cUnit, kOpBlx, rLR);
348 oatClobberCallRegs(cUnit);
349}
350
351/*
352 * Mark garbage collection card. Skip if the value we're storing is null.
353 */
354static void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
355{
356 int regCardBase = oatAllocTemp(cUnit);
357 int regCardNo = oatAllocTemp(cUnit);
358 ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
buzbeec143c552011-08-20 17:38:58 -0700359 loadWordDisp(cUnit, rSELF, Thread::CardTableOffset().Int32Value(),
buzbee67bf8852011-08-17 17:51:35 -0700360 regCardBase);
361 opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
362 storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
363 kUnsignedByte);
364 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
365 target->defMask = ENCODE_ALL;
366 branchOver->generic.target = (LIR*)target;
367 oatFreeTemp(cUnit, regCardBase);
368 oatFreeTemp(cUnit, regCardNo);
369}
370
371static void genIGetX(CompilationUnit* cUnit, MIR* mir, OpSize size,
372 RegLocation rlDest, RegLocation rlObj)
373{
buzbeec143c552011-08-20 17:38:58 -0700374 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
375 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700376 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700377 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700378 }
379#if ANDROID_SMP != 0
380 bool isVolatile = dvmIsVolatileField(fieldPtr);
381#else
382 bool isVolatile = false;
383#endif
buzbeec143c552011-08-20 17:38:58 -0700384 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700385 RegLocation rlResult;
386 RegisterClass regClass = oatRegClassBySize(size);
387 rlObj = loadValue(cUnit, rlObj, kCoreReg);
388 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
389 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
390 NULL);/* null object? */
391 loadBaseDisp(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg,
392 size, rlObj.sRegLow);
393 if (isVolatile) {
394 oatGenMemBarrier(cUnit, kSY);
395 }
396
397 storeValue(cUnit, rlDest, rlResult);
398}
399
400static void genIPutX(CompilationUnit* cUnit, MIR* mir, OpSize size,
401 RegLocation rlSrc, RegLocation rlObj, bool isObject)
402{
buzbeec143c552011-08-20 17:38:58 -0700403 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
404 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700405 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700406 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700407 }
408#if ANDROID_SMP != 0
409 bool isVolatile = dvmIsVolatileField(fieldPtr);
410#else
411 bool isVolatile = false;
412#endif
buzbeec143c552011-08-20 17:38:58 -0700413 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700414 RegisterClass regClass = oatRegClassBySize(size);
415 rlObj = loadValue(cUnit, rlObj, kCoreReg);
416 rlSrc = loadValue(cUnit, rlSrc, regClass);
417 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
418 NULL);/* null object? */
419
420 if (isVolatile) {
421 oatGenMemBarrier(cUnit, kSY);
422 }
423 storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
424 if (isObject) {
425 /* NOTE: marking card based on object head */
426 markGCCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
427 }
428}
429
430static void genIGetWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
431 RegLocation rlObj)
432{
buzbeec143c552011-08-20 17:38:58 -0700433 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
434 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700435 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700436 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700437 }
438#if ANDROID_SMP != 0
439 bool isVolatile = dvmIsVolatileField(fieldPtr);
440#else
441 bool isVolatile = false;
442#endif
buzbeec143c552011-08-20 17:38:58 -0700443 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700444 RegLocation rlResult;
445 rlObj = loadValue(cUnit, rlObj, kCoreReg);
446 int regPtr = oatAllocTemp(cUnit);
447
448 assert(rlDest.wide);
449
450 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
451 NULL);/* null object? */
452 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
453 rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
454
455 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
456
457 if (isVolatile) {
458 oatGenMemBarrier(cUnit, kSY);
459 }
460
461 oatFreeTemp(cUnit, regPtr);
462 storeValueWide(cUnit, rlDest, rlResult);
463}
464
465static void genIPutWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
466 RegLocation rlObj)
467{
buzbeec143c552011-08-20 17:38:58 -0700468 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
469 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700470 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700471 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700472 }
473#if ANDROID_SMP != 0
474 bool isVolatile = dvmIsVolatileField(fieldPtr);
475#else
476 bool isVolatile = false;
477#endif
buzbeec143c552011-08-20 17:38:58 -0700478 int fieldOffset = fieldPtr->GetOffset();
buzbee67bf8852011-08-17 17:51:35 -0700479
480 rlObj = loadValue(cUnit, rlObj, kCoreReg);
481 int regPtr;
482 rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
483 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
484 NULL);/* null object? */
485 regPtr = oatAllocTemp(cUnit);
486 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
487
488 if (isVolatile) {
489 oatGenMemBarrier(cUnit, kSY);
490 }
491 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
492
493 oatFreeTemp(cUnit, regPtr);
494}
495
496static void genConstClass(CompilationUnit* cUnit, MIR* mir,
497 RegLocation rlDest, RegLocation rlSrc)
498{
buzbeec143c552011-08-20 17:38:58 -0700499 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
Brian Carlstrom9ea1cb12011-08-24 23:18:18 -0700500 GetResolvedType(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700501
502 if (classPtr == NULL) {
503 LOG(FATAL) << "Unexpected null class pointer";
504 }
505
buzbeec143c552011-08-20 17:38:58 -0700506 UNIMPLEMENTED(WARNING) << "Not position independent. Fix";
buzbee67bf8852011-08-17 17:51:35 -0700507 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
508 loadConstantNoClobber(cUnit, rlResult.lowReg, (int) classPtr );
509 storeValue(cUnit, rlDest, rlResult);
510}
511
512static void genConstString(CompilationUnit* cUnit, MIR* mir,
513 RegLocation rlDest, RegLocation rlSrc)
514{
buzbeec143c552011-08-20 17:38:58 -0700515 String* strPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
516 GetResolvedString(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700517
518 if (strPtr == NULL) {
519 /* Shouldn't happen */
520 LOG(FATAL) << "Unexpected null const string pointer";
521 }
522
buzbeec143c552011-08-20 17:38:58 -0700523 UNIMPLEMENTED(WARNING) << "Not position indendent. Fix";
buzbee67bf8852011-08-17 17:51:35 -0700524 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
525 loadConstantNoClobber(cUnit, rlResult.lowReg, (int) strPtr );
526 storeValue(cUnit, rlDest, rlResult);
527}
528
buzbeedfd3d702011-08-28 12:56:51 -0700529/*
530 * Let helper function take care of everything. Will
531 * call Class::NewInstanceFromCode(type_idx, method);
532 */
buzbee67bf8852011-08-17 17:51:35 -0700533static void genNewInstance(CompilationUnit* cUnit, MIR* mir,
534 RegLocation rlDest)
535{
buzbeedfd3d702011-08-28 12:56:51 -0700536 oatFlushAllRegs(cUnit); /* Everything to home location */
buzbee67bf8852011-08-17 17:51:35 -0700537 loadWordDisp(cUnit, rSELF,
Brian Carlstrom1f870082011-08-23 16:02:11 -0700538 OFFSETOF_MEMBER(Thread, pAllocObjectFromCode), rLR);
buzbeedfd3d702011-08-28 12:56:51 -0700539 loadCurrMethodDirect(cUnit, r1); // arg1 <= Method*
540 loadConstant(cUnit, r0, mir->dalvikInsn.vB); // arg0 <- type_id
buzbee67bf8852011-08-17 17:51:35 -0700541 opReg(cUnit, kOpBlx, rLR);
542 oatClobberCallRegs(cUnit);
543 RegLocation rlResult = oatGetReturn(cUnit);
544 storeValue(cUnit, rlDest, rlResult);
545}
546
547void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
548{
549 loadWordDisp(cUnit, rSELF,
550 OFFSETOF_MEMBER(Thread, pArtAllocObjectNoThrow), rLR);
551 loadValueDirectFixed(cUnit, rlSrc, r1); /* Exception object */
552 genRegCopy(cUnit, r0, rSELF);
553 opReg(cUnit, kOpBlx, rLR);
554}
555
556static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
557 RegLocation rlSrc)
558{
559 // May generate a call - use explicit registers
560 RegLocation rlResult;
buzbeec143c552011-08-20 17:38:58 -0700561 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
Brian Carlstrom9ea1cb12011-08-24 23:18:18 -0700562 GetResolvedType(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700563 if (classPtr == NULL) {
564 /* Shouldn't happen */
565 LOG(FATAL) << "Unexpected null class pointer";
566 }
567 oatFlushAllRegs(cUnit); /* Everything to home location */
568 loadValueDirectFixed(cUnit, rlSrc, r0); /* Ref */
569 loadConstant(cUnit, r2, (int) classPtr );
570 /* When taken r0 has NULL which can be used for store directly */
571 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, r0, 0);
572 /* r1 now contains object->clazz */
buzbeec143c552011-08-20 17:38:58 -0700573 assert(OFFSETOF_MEMBER(Object, klass_) == 0);
574 loadWordDisp(cUnit, r0, OFFSETOF_MEMBER(Object, klass_), r1);
buzbee67bf8852011-08-17 17:51:35 -0700575 /* r1 now contains object->clazz */
576 loadWordDisp(cUnit, rSELF,
577 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivial), rLR);
578 loadConstant(cUnit, r0, 1); /* Assume true */
579 opRegReg(cUnit, kOpCmp, r1, r2);
580 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
581 genRegCopy(cUnit, r0, r1);
582 genRegCopy(cUnit, r1, r2);
583 opReg(cUnit, kOpBlx, rLR);
584 oatClobberCallRegs(cUnit);
585 /* branch target here */
586 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
587 target->defMask = ENCODE_ALL;
588 rlResult = oatGetReturn(cUnit);
589 storeValue(cUnit, rlDest, rlResult);
590 branch1->generic.target = (LIR*)target;
591 branch2->generic.target = (LIR*)target;
592}
593
594static void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
595{
buzbeec143c552011-08-20 17:38:58 -0700596 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
Brian Carlstrom9ea1cb12011-08-24 23:18:18 -0700597 GetResolvedType(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700598 if (classPtr == NULL) {
599 /* Shouldn't happen with our current model */
600 LOG(FATAL) << "Unexpected null class pointer";
601 }
602 oatFlushAllRegs(cUnit); /* Everything to home location */
603 loadConstant(cUnit, r1, (int) classPtr );
604 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
605 /* Null? */
606 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq,
607 rlSrc.lowReg, 0);
608 /*
609 * rlSrc.lowReg now contains object->clazz. Note that
610 * it could have been allocated r0, but we're okay so long
611 * as we don't do anything desctructive until r0 is loaded
612 * with clazz.
613 */
614 /* r0 now contains object->clazz */
buzbeec143c552011-08-20 17:38:58 -0700615 loadWordDisp(cUnit, rlSrc.lowReg, OFFSETOF_MEMBER(Object, klass_), r0);
buzbee67bf8852011-08-17 17:51:35 -0700616 loadWordDisp(cUnit, rSELF,
617 OFFSETOF_MEMBER(Thread, pArtInstanceofNonTrivialNoThrow), rLR);
618 opRegReg(cUnit, kOpCmp, r0, r1);
619 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
620 // Assume success - if not, artInstanceOfNonTrivial will handle throw
621 opReg(cUnit, kOpBlx, rLR);
622 oatClobberCallRegs(cUnit);
623 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
624 target->defMask = ENCODE_ALL;
625 branch1->generic.target = (LIR*)target;
626 branch2->generic.target = (LIR*)target;
627}
628
629static void genNegFloat(CompilationUnit* cUnit, RegLocation rlDest,
630 RegLocation rlSrc)
631{
632 RegLocation rlResult;
633 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
634 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
635 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
636 storeValue(cUnit, rlDest, rlResult);
637}
638
639static void genNegDouble(CompilationUnit* cUnit, RegLocation rlDest,
640 RegLocation rlSrc)
641{
642 RegLocation rlResult;
643 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
644 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
645 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
646 S2D(rlSrc.lowReg, rlSrc.highReg));
647 storeValueWide(cUnit, rlDest, rlResult);
648}
649
buzbee439c4fa2011-08-27 15:59:07 -0700650static void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
651 RegLocation rlFree)
buzbee67bf8852011-08-17 17:51:35 -0700652{
buzbee439c4fa2011-08-27 15:59:07 -0700653 if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg))
654 oatFreeTemp(cUnit, rlFree.lowReg);
655 if ((rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg))
656 oatFreeTemp(cUnit, rlFree.lowReg);
buzbee67bf8852011-08-17 17:51:35 -0700657}
658
659static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
660 OpKind secondOp, RegLocation rlDest,
661 RegLocation rlSrc1, RegLocation rlSrc2)
662{
buzbee9e0f9b02011-08-24 15:32:46 -0700663 /*
664 * NOTE: This is the one place in the code in which we might have
665 * as many as six live temporary registers. There are 5 in the normal
666 * set for Arm. Until we have spill capabilities, temporarily add
667 * lr to the temp set. It is safe to do this locally, but note that
668 * lr is used explicitly elsewhere in the code generator and cannot
669 * normally be used as a general temp register.
670 */
buzbee67bf8852011-08-17 17:51:35 -0700671 RegLocation rlResult;
buzbee9e0f9b02011-08-24 15:32:46 -0700672 oatMarkTemp(cUnit, rLR); // Add lr to the temp pool
673 oatFreeTemp(cUnit, rLR); // and make it available
buzbee67bf8852011-08-17 17:51:35 -0700674 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
675 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
676 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
677 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
678 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
679 rlSrc2.highReg);
buzbee439c4fa2011-08-27 15:59:07 -0700680 /*
681 * NOTE: If rlDest refers to a frame variable in a large frame, the
682 * following storeValueWide might need to allocate a temp register.
683 * To further work around the lack of a spill capability, explicitly
684 * free any temps from rlSrc1 & rlSrc2 that aren't still live in rlResult.
685 * Remove when spill is functional.
686 */
687 freeRegLocTemps(cUnit, rlResult, rlSrc1);
688 freeRegLocTemps(cUnit, rlResult, rlSrc2);
buzbee67bf8852011-08-17 17:51:35 -0700689 storeValueWide(cUnit, rlDest, rlResult);
buzbee9e0f9b02011-08-24 15:32:46 -0700690 oatClobber(cUnit, rLR);
691 oatUnmarkTemp(cUnit, rLR); // Remove lr from the temp pool
buzbee67bf8852011-08-17 17:51:35 -0700692}
693
694void oatInitializeRegAlloc(CompilationUnit* cUnit)
695{
696 int numRegs = sizeof(coreRegs)/sizeof(*coreRegs);
697 int numReserved = sizeof(reservedRegs)/sizeof(*reservedRegs);
698 int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
699 int numFPRegs = sizeof(fpRegs)/sizeof(*fpRegs);
700 int numFPTemps = sizeof(fpTemps)/sizeof(*fpTemps);
701 RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true);
702 cUnit->regPool = pool;
703 pool->numCoreRegs = numRegs;
704 pool->coreRegs = (RegisterInfo *)
705 oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true);
706 pool->numFPRegs = numFPRegs;
707 pool->FPRegs = (RegisterInfo *)
708 oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true);
709 oatInitPool(pool->coreRegs, coreRegs, pool->numCoreRegs);
710 oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
711 // Keep special registers from being allocated
712 for (int i = 0; i < numReserved; i++) {
713 oatMarkInUse(cUnit, reservedRegs[i]);
714 }
715 // Mark temp regs - all others not in use can be used for promotion
716 for (int i = 0; i < numTemps; i++) {
717 oatMarkTemp(cUnit, coreTemps[i]);
718 }
719 for (int i = 0; i < numFPTemps; i++) {
720 oatMarkTemp(cUnit, fpTemps[i]);
721 }
722 pool->nullCheckedRegs =
723 oatAllocBitVector(cUnit->numSSARegs, false);
724}
725
726/*
727 * Handle simple case (thin lock) inline. If it's complicated, bail
728 * out to the heavyweight lock/unlock routines. We'll use dedicated
729 * registers here in order to be in the right position in case we
730 * to bail to dvm[Lock/Unlock]Object(self, object)
731 *
732 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
733 * r1 -> object [arg1 for dvm[Lock/Unlock]Object
734 * r2 -> intial contents of object->lock, later result of strex
735 * r3 -> self->threadId
736 * r12 -> allow to be used by utilities as general temp
737 *
738 * The result of the strex is 0 if we acquire the lock.
739 *
740 * See comments in Sync.c for the layout of the lock word.
741 * Of particular interest to this code is the test for the
742 * simple case - which we handle inline. For monitor enter, the
743 * simple case is thin lock, held by no-one. For monitor exit,
744 * the simple case is thin lock, held by the unlocking thread with
745 * a recurse count of 0.
746 *
747 * A minor complication is that there is a field in the lock word
748 * unrelated to locking: the hash state. This field must be ignored, but
749 * preserved.
750 *
751 */
752static void genMonitorEnter(CompilationUnit* cUnit, MIR* mir,
753 RegLocation rlSrc)
754{
755 ArmLIR* target;
756 ArmLIR* hopTarget;
757 ArmLIR* branch;
758 ArmLIR* hopBranch;
759
760 oatFlushAllRegs(cUnit);
buzbeec143c552011-08-20 17:38:58 -0700761 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700762 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
buzbee2e748f32011-08-29 21:02:19 -0700763 oatLockCallTemps(cUnit); // Prepare for explicit register usage
buzbee67bf8852011-08-17 17:51:35 -0700764 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
buzbeec143c552011-08-20 17:38:58 -0700765 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700766 newLIR3(cUnit, kThumb2Ldrex, r2, r1,
buzbeec143c552011-08-20 17:38:58 -0700767 OFFSETOF_MEMBER(Object, monitor_) >> 2); // Get object->lock
768 // Align owner
769 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
buzbee67bf8852011-08-17 17:51:35 -0700770 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700771 newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, art::Monitor::kLwLockOwnerShift
772 - 1);
773 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
774 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700775 hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0);
buzbeec143c552011-08-20 17:38:58 -0700776 newLIR4(cUnit, kThumb2Strex, r2, r3, r1,
777 OFFSETOF_MEMBER(Object, monitor_) >> 2);
buzbee67bf8852011-08-17 17:51:35 -0700778 oatGenMemBarrier(cUnit, kSY);
779 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
780
781 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
782 hopTarget->defMask = ENCODE_ALL;
783 hopBranch->generic.target = (LIR*)hopTarget;
784
785 // Go expensive route - artLockObjectNoThrow(self, obj);
786 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtLockObjectNoThrow),
787 rLR);
788 genRegCopy(cUnit, r0, rSELF);
789 newLIR1(cUnit, kThumbBlxR, rLR);
790
791 // Resume here
792 target = newLIR0(cUnit, kArmPseudoTargetLabel);
793 target->defMask = ENCODE_ALL;
794 branch->generic.target = (LIR*)target;
795}
796
797/*
798 * For monitor unlock, we don't have to use ldrex/strex. Once
799 * we've determined that the lock is thin and that we own it with
800 * a zero recursion count, it's safe to punch it back to the
801 * initial, unlock thin state with a store word.
802 */
803static void genMonitorExit(CompilationUnit* cUnit, MIR* mir,
804 RegLocation rlSrc)
805{
806 ArmLIR* target;
807 ArmLIR* branch;
808 ArmLIR* hopTarget;
809 ArmLIR* hopBranch;
810
buzbeec143c552011-08-20 17:38:58 -0700811 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700812 oatFlushAllRegs(cUnit);
813 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
buzbee2e748f32011-08-29 21:02:19 -0700814 oatLockCallTemps(cUnit); // Prepare for explicit register usage
buzbee67bf8852011-08-17 17:51:35 -0700815 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
buzbeec143c552011-08-20 17:38:58 -0700816 loadWordDisp(cUnit, r1, OFFSETOF_MEMBER(Object, monitor_), r2); // Get lock
817 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700818 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700819 opRegRegImm(cUnit, kOpAnd, r12, r2, (art::Monitor::kLwHashStateMask <<
820 art::Monitor::kLwHashStateShift));
821 // Align owner
822 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
823 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
824 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700825 opRegReg(cUnit, kOpSub, r2, r3);
826 hopBranch = opCondBranch(cUnit, kArmCondNe);
827 oatGenMemBarrier(cUnit, kSY);
buzbeec143c552011-08-20 17:38:58 -0700828 storeWordDisp(cUnit, r1, OFFSETOF_MEMBER(Object, monitor_), r12);
buzbee67bf8852011-08-17 17:51:35 -0700829 branch = opNone(cUnit, kOpUncondBr);
830
831 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
832 hopTarget->defMask = ENCODE_ALL;
833 hopBranch->generic.target = (LIR*)hopTarget;
834
835 // Go expensive route - artUnlockObjectNoThrow(self, obj);
836 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pArtUnlockObjectNoThrow),
837 rLR);
838 genRegCopy(cUnit, r0, rSELF);
839 newLIR1(cUnit, kThumbBlxR, rLR);
840
841 // Resume here
842 target = newLIR0(cUnit, kArmPseudoTargetLabel);
843 target->defMask = ENCODE_ALL;
844 branch->generic.target = (LIR*)target;
845}
846
847/*
848 * 64-bit 3way compare function.
849 * mov rX, #-1
850 * cmp op1hi, op2hi
851 * blt done
852 * bgt flip
853 * sub rX, op1lo, op2lo (treat as unsigned)
854 * beq done
855 * ite hi
856 * mov(hi) rX, #-1
857 * mov(!hi) rX, #1
858 * flip:
859 * neg rX
860 * done:
861 */
862static void genCmpLong(CompilationUnit* cUnit, MIR* mir,
863 RegLocation rlDest, RegLocation rlSrc1,
864 RegLocation rlSrc2)
865{
866 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
867 ArmLIR* target1;
868 ArmLIR* target2;
869 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
870 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
871 rlTemp.lowReg = oatAllocTemp(cUnit);
872 loadConstant(cUnit, rlTemp.lowReg, -1);
873 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
874 ArmLIR* branch1 = opCondBranch(cUnit, kArmCondLt);
875 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondGt);
876 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
877 ArmLIR* branch3 = opCondBranch(cUnit, kArmCondEq);
878
879 genIT(cUnit, kArmCondHi, "E");
880 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
881 loadConstant(cUnit, rlTemp.lowReg, 1);
882 genBarrier(cUnit);
883
884 target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
885 target2->defMask = -1;
886 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
887
888 target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
889 target1->defMask = -1;
890
891 storeValue(cUnit, rlDest, rlTemp);
892
893 branch1->generic.target = (LIR*)target1;
894 branch2->generic.target = (LIR*)target2;
895 branch3->generic.target = branch1->generic.target;
896}
897
898static void genMultiplyByTwoBitMultiplier(CompilationUnit* cUnit,
899 RegLocation rlSrc, RegLocation rlResult, int lit,
900 int firstBit, int secondBit)
901{
902 opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
903 encodeShift(kArmLsl, secondBit - firstBit));
904 if (firstBit != 0) {
905 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
906 }
907}
908
909static bool genConversionCall(CompilationUnit* cUnit, MIR* mir, int funcOffset,
910 int srcSize, int tgtSize)
911{
912 /*
913 * Don't optimize the register usage since it calls out to support
914 * functions
915 */
916 RegLocation rlSrc;
917 RegLocation rlDest;
918 oatFlushAllRegs(cUnit); /* Send everything to home location */
919 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
920 if (srcSize == 1) {
921 rlSrc = oatGetSrc(cUnit, mir, 0);
922 loadValueDirectFixed(cUnit, rlSrc, r0);
923 } else {
924 rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
925 loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
926 }
927 opReg(cUnit, kOpBlx, rLR);
928 oatClobberCallRegs(cUnit);
929 if (tgtSize == 1) {
930 RegLocation rlResult;
931 rlDest = oatGetDest(cUnit, mir, 0);
932 rlResult = oatGetReturn(cUnit);
933 storeValue(cUnit, rlDest, rlResult);
934 } else {
935 RegLocation rlResult;
936 rlDest = oatGetDestWide(cUnit, mir, 0, 1);
937 rlResult = oatGetReturnWide(cUnit);
938 storeValueWide(cUnit, rlDest, rlResult);
939 }
940 return false;
941}
942
943static bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
944 RegLocation rlDest, RegLocation rlSrc1,
945 RegLocation rlSrc2)
946{
947 RegLocation rlResult;
948 int funcOffset;
949
950 switch (mir->dalvikInsn.opcode) {
951 case OP_ADD_FLOAT_2ADDR:
952 case OP_ADD_FLOAT:
953 funcOffset = OFFSETOF_MEMBER(Thread, pFadd);
954 break;
955 case OP_SUB_FLOAT_2ADDR:
956 case OP_SUB_FLOAT:
957 funcOffset = OFFSETOF_MEMBER(Thread, pFsub);
958 break;
959 case OP_DIV_FLOAT_2ADDR:
960 case OP_DIV_FLOAT:
961 funcOffset = OFFSETOF_MEMBER(Thread, pFdiv);
962 break;
963 case OP_MUL_FLOAT_2ADDR:
964 case OP_MUL_FLOAT:
965 funcOffset = OFFSETOF_MEMBER(Thread, pFmul);
966 break;
967 case OP_REM_FLOAT_2ADDR:
968 case OP_REM_FLOAT:
969 funcOffset = OFFSETOF_MEMBER(Thread, pFmodf);
970 break;
971 case OP_NEG_FLOAT: {
972 genNegFloat(cUnit, rlDest, rlSrc1);
973 return false;
974 }
975 default:
976 return true;
977 }
978 oatFlushAllRegs(cUnit); /* Send everything to home location */
979 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
980 loadValueDirectFixed(cUnit, rlSrc1, r0);
981 loadValueDirectFixed(cUnit, rlSrc2, r1);
982 opReg(cUnit, kOpBlx, rLR);
983 oatClobberCallRegs(cUnit);
984 rlResult = oatGetReturn(cUnit);
985 storeValue(cUnit, rlDest, rlResult);
986 return false;
987}
988
989static bool genArithOpDoublePortable(CompilationUnit* cUnit, MIR* mir,
990 RegLocation rlDest, RegLocation rlSrc1,
991 RegLocation rlSrc2)
992{
993 RegLocation rlResult;
994 int funcOffset;
995
996 switch (mir->dalvikInsn.opcode) {
997 case OP_ADD_DOUBLE_2ADDR:
998 case OP_ADD_DOUBLE:
999 funcOffset = OFFSETOF_MEMBER(Thread, pDadd);
1000 break;
1001 case OP_SUB_DOUBLE_2ADDR:
1002 case OP_SUB_DOUBLE:
1003 funcOffset = OFFSETOF_MEMBER(Thread, pDsub);
1004 break;
1005 case OP_DIV_DOUBLE_2ADDR:
1006 case OP_DIV_DOUBLE:
1007 funcOffset = OFFSETOF_MEMBER(Thread, pDdiv);
1008 break;
1009 case OP_MUL_DOUBLE_2ADDR:
1010 case OP_MUL_DOUBLE:
1011 funcOffset = OFFSETOF_MEMBER(Thread, pDmul);
1012 break;
1013 case OP_REM_DOUBLE_2ADDR:
1014 case OP_REM_DOUBLE:
1015 funcOffset = OFFSETOF_MEMBER(Thread, pFmod);
1016 break;
1017 case OP_NEG_DOUBLE: {
1018 genNegDouble(cUnit, rlDest, rlSrc1);
1019 return false;
1020 }
1021 default:
1022 return true;
1023 }
1024 oatFlushAllRegs(cUnit); /* Send everything to home location */
1025 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1026 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1027 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1028 opReg(cUnit, kOpBlx, rLR);
1029 oatClobberCallRegs(cUnit);
1030 rlResult = oatGetReturnWide(cUnit);
1031 storeValueWide(cUnit, rlDest, rlResult);
1032 return false;
1033}
1034
1035static bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
1036{
1037 Opcode opcode = mir->dalvikInsn.opcode;
1038
1039 switch (opcode) {
1040 case OP_INT_TO_FLOAT:
1041 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2f),
1042 1, 1);
1043 case OP_FLOAT_TO_INT:
1044 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2iz),
1045 1, 1);
1046 case OP_DOUBLE_TO_FLOAT:
1047 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2f),
1048 2, 1);
1049 case OP_FLOAT_TO_DOUBLE:
1050 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2d),
1051 1, 2);
1052 case OP_INT_TO_DOUBLE:
1053 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2d),
1054 1, 2);
1055 case OP_DOUBLE_TO_INT:
1056 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2iz),
1057 2, 1);
1058 case OP_FLOAT_TO_LONG:
1059 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
1060 pArtF2l), 1, 2);
1061 case OP_LONG_TO_FLOAT:
1062 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2f),
1063 2, 1);
1064 case OP_DOUBLE_TO_LONG:
1065 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
1066 pArtD2l), 2, 2);
1067 case OP_LONG_TO_DOUBLE:
1068 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2d),
1069 2, 2);
1070 default:
1071 return true;
1072 }
1073 return false;
1074}
1075
1076/* Generate conditional branch instructions */
1077static ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
1078 ArmConditionCode cond,
1079 ArmLIR* target)
1080{
1081 ArmLIR* branch = opCondBranch(cUnit, cond);
1082 branch->generic.target = (LIR*) target;
1083 return branch;
1084}
1085
1086/* Generate a unconditional branch to go to the interpreter */
1087static inline ArmLIR* genTrap(CompilationUnit* cUnit, int dOffset,
1088 ArmLIR* pcrLabel)
1089{
1090 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1091 return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
1092}
1093
1094/*
1095 * Generate array store
1096 *
1097 */
buzbeec143c552011-08-20 17:38:58 -07001098static void genArrayPut(CompilationUnit* cUnit, MIR* mir,
buzbee67bf8852011-08-17 17:51:35 -07001099 RegLocation rlArray, RegLocation rlIndex,
1100 RegLocation rlSrc, int scale)
1101{
1102 RegisterClass regClass = oatRegClassBySize(kWord);
buzbeec143c552011-08-20 17:38:58 -07001103 int lenOffset = Array::LengthOffset().Int32Value();
1104 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001105
1106 /* Make sure it's a legal object Put. Use direct regs at first */
1107 loadValueDirectFixed(cUnit, rlArray, r1);
1108 loadValueDirectFixed(cUnit, rlSrc, r0);
1109
1110 /* null array object? */
1111 ArmLIR* pcrLabel = NULL;
1112
1113 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1114 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, r1,
1115 mir->offset, NULL);
1116 }
1117 loadWordDisp(cUnit, rSELF,
1118 OFFSETOF_MEMBER(Thread, pArtCanPutArrayElementNoThrow), rLR);
1119 /* Get the array's clazz */
buzbeec143c552011-08-20 17:38:58 -07001120 loadWordDisp(cUnit, r1, OFFSETOF_MEMBER(Object, klass_), r1);
buzbee67bf8852011-08-17 17:51:35 -07001121 /* Get the object's clazz */
buzbeec143c552011-08-20 17:38:58 -07001122 loadWordDisp(cUnit, r0, OFFSETOF_MEMBER(Object, klass_), r0);
buzbee67bf8852011-08-17 17:51:35 -07001123 opReg(cUnit, kOpBlx, rLR);
1124 oatClobberCallRegs(cUnit);
1125
1126 // Now, redo loadValues in case they didn't survive the call
1127
1128 int regPtr;
1129 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1130 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1131
1132 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1133 oatClobber(cUnit, rlArray.lowReg);
1134 regPtr = rlArray.lowReg;
1135 } else {
1136 regPtr = oatAllocTemp(cUnit);
1137 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1138 }
1139
1140 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1141 int regLen = oatAllocTemp(cUnit);
1142 //NOTE: max live temps(4) here.
1143 /* Get len */
1144 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1145 /* regPtr -> array data */
1146 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1147 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1148 pcrLabel);
1149 oatFreeTemp(cUnit, regLen);
1150 } else {
1151 /* regPtr -> array data */
1152 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1153 }
1154 /* at this point, regPtr points to array, 2 live temps */
1155 rlSrc = loadValue(cUnit, rlSrc, regClass);
1156 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1157 scale, kWord);
1158}
1159
1160/*
1161 * Generate array load
1162 */
1163static void genArrayGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
1164 RegLocation rlArray, RegLocation rlIndex,
1165 RegLocation rlDest, int scale)
1166{
1167 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001168 int lenOffset = Array::LengthOffset().Int32Value();
1169 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001170 RegLocation rlResult;
1171 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1172 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1173 int regPtr;
1174
1175 /* null object? */
1176 ArmLIR* pcrLabel = NULL;
1177
1178 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1179 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
1180 rlArray.lowReg, mir->offset, NULL);
1181 }
1182
1183 regPtr = oatAllocTemp(cUnit);
1184
1185 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1186 int regLen = oatAllocTemp(cUnit);
1187 /* Get len */
1188 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1189 /* regPtr -> array data */
1190 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1191 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1192 pcrLabel);
1193 oatFreeTemp(cUnit, regLen);
1194 } else {
1195 /* regPtr -> array data */
1196 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1197 }
1198 if ((size == kLong) || (size == kDouble)) {
1199 if (scale) {
1200 int rNewIndex = oatAllocTemp(cUnit);
1201 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1202 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1203 oatFreeTemp(cUnit, rNewIndex);
1204 } else {
1205 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1206 }
1207 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1208
1209 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
1210
1211 oatFreeTemp(cUnit, regPtr);
1212 storeValueWide(cUnit, rlDest, rlResult);
1213 } else {
1214 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1215
1216 loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
1217 scale, size);
1218
1219 oatFreeTemp(cUnit, regPtr);
1220 storeValue(cUnit, rlDest, rlResult);
1221 }
1222}
1223
1224/*
1225 * Generate array store
1226 *
1227 */
1228static void genArrayPut(CompilationUnit* cUnit, MIR* mir, OpSize size,
1229 RegLocation rlArray, RegLocation rlIndex,
1230 RegLocation rlSrc, int scale)
1231{
1232 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001233 int lenOffset = Array::LengthOffset().Int32Value();
1234 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001235
1236 int regPtr;
1237 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1238 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1239
1240 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1241 oatClobber(cUnit, rlArray.lowReg);
1242 regPtr = rlArray.lowReg;
1243 } else {
1244 regPtr = oatAllocTemp(cUnit);
1245 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1246 }
1247
1248 /* null object? */
1249 ArmLIR* pcrLabel = NULL;
1250
1251 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1252 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
1253 mir->offset, NULL);
1254 }
1255
1256 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1257 int regLen = oatAllocTemp(cUnit);
1258 //NOTE: max live temps(4) here.
1259 /* Get len */
1260 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1261 /* regPtr -> array data */
1262 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1263 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1264 pcrLabel);
1265 oatFreeTemp(cUnit, regLen);
1266 } else {
1267 /* regPtr -> array data */
1268 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1269 }
1270 /* at this point, regPtr points to array, 2 live temps */
1271 if ((size == kLong) || (size == kDouble)) {
1272 //TODO: need specific wide routine that can handle fp regs
1273 if (scale) {
1274 int rNewIndex = oatAllocTemp(cUnit);
1275 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1276 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1277 oatFreeTemp(cUnit, rNewIndex);
1278 } else {
1279 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1280 }
1281 rlSrc = loadValueWide(cUnit, rlSrc, regClass);
1282
1283 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
1284
1285 oatFreeTemp(cUnit, regPtr);
1286 } else {
1287 rlSrc = loadValue(cUnit, rlSrc, regClass);
1288
1289 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1290 scale, size);
1291 }
1292}
1293
1294static bool genShiftOpLong(CompilationUnit* cUnit, MIR* mir,
1295 RegLocation rlDest, RegLocation rlSrc1,
1296 RegLocation rlShift)
1297{
buzbee54330722011-08-23 16:46:55 -07001298 int funcOffset;
buzbee67bf8852011-08-17 17:51:35 -07001299
buzbee67bf8852011-08-17 17:51:35 -07001300 switch( mir->dalvikInsn.opcode) {
1301 case OP_SHL_LONG:
1302 case OP_SHL_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001303 funcOffset = OFFSETOF_MEMBER(Thread, pShlLong);
buzbee67bf8852011-08-17 17:51:35 -07001304 break;
1305 case OP_SHR_LONG:
1306 case OP_SHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001307 funcOffset = OFFSETOF_MEMBER(Thread, pShrLong);
buzbee67bf8852011-08-17 17:51:35 -07001308 break;
1309 case OP_USHR_LONG:
1310 case OP_USHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001311 funcOffset = OFFSETOF_MEMBER(Thread, pUshrLong);
buzbee67bf8852011-08-17 17:51:35 -07001312 break;
1313 default:
buzbee54330722011-08-23 16:46:55 -07001314 LOG(FATAL) << "Unexpected case";
buzbee67bf8852011-08-17 17:51:35 -07001315 return true;
1316 }
buzbee54330722011-08-23 16:46:55 -07001317 oatFlushAllRegs(cUnit); /* Send everything to home location */
1318 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1319 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1320 loadValueDirect(cUnit, rlShift, r2);
1321 opReg(cUnit, kOpBlx, rLR);
1322 oatClobberCallRegs(cUnit);
1323 RegLocation rlResult = oatGetReturnWide(cUnit);
buzbee67bf8852011-08-17 17:51:35 -07001324 storeValueWide(cUnit, rlDest, rlResult);
1325 return false;
1326}
1327
1328static bool genArithOpLong(CompilationUnit* cUnit, MIR* mir,
1329 RegLocation rlDest, RegLocation rlSrc1,
1330 RegLocation rlSrc2)
1331{
1332 RegLocation rlResult;
1333 OpKind firstOp = kOpBkpt;
1334 OpKind secondOp = kOpBkpt;
1335 bool callOut = false;
1336 int funcOffset;
1337 int retReg = r0;
1338
1339 switch (mir->dalvikInsn.opcode) {
1340 case OP_NOT_LONG:
1341 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1342 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1343 opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg);
1344 opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg);
1345 storeValueWide(cUnit, rlDest, rlResult);
1346 return false;
1347 break;
1348 case OP_ADD_LONG:
1349 case OP_ADD_LONG_2ADDR:
1350 firstOp = kOpAdd;
1351 secondOp = kOpAdc;
1352 break;
1353 case OP_SUB_LONG:
1354 case OP_SUB_LONG_2ADDR:
1355 firstOp = kOpSub;
1356 secondOp = kOpSbc;
1357 break;
1358 case OP_MUL_LONG:
1359 case OP_MUL_LONG_2ADDR:
buzbee439c4fa2011-08-27 15:59:07 -07001360 callOut = true;
1361 retReg = r0;
1362 funcOffset = OFFSETOF_MEMBER(Thread, pLmul);
1363 break;
buzbee67bf8852011-08-17 17:51:35 -07001364 case OP_DIV_LONG:
1365 case OP_DIV_LONG_2ADDR:
1366 callOut = true;
1367 retReg = r0;
1368 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1369 break;
1370 /* NOTE - result is in r2/r3 instead of r0/r1 */
1371 case OP_REM_LONG:
1372 case OP_REM_LONG_2ADDR:
1373 callOut = true;
1374 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1375 retReg = r2;
1376 break;
1377 case OP_AND_LONG_2ADDR:
1378 case OP_AND_LONG:
1379 firstOp = kOpAnd;
1380 secondOp = kOpAnd;
1381 break;
1382 case OP_OR_LONG:
1383 case OP_OR_LONG_2ADDR:
1384 firstOp = kOpOr;
1385 secondOp = kOpOr;
1386 break;
1387 case OP_XOR_LONG:
1388 case OP_XOR_LONG_2ADDR:
1389 firstOp = kOpXor;
1390 secondOp = kOpXor;
1391 break;
1392 case OP_NEG_LONG: {
1393 //TUNING: can improve this using Thumb2 code
1394 int tReg = oatAllocTemp(cUnit);
1395 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1396 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1397 loadConstantNoClobber(cUnit, tReg, 0);
1398 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1399 tReg, rlSrc2.lowReg);
1400 opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
1401 genRegCopy(cUnit, rlResult.highReg, tReg);
1402 storeValueWide(cUnit, rlDest, rlResult);
1403 return false;
1404 }
1405 default:
1406 LOG(FATAL) << "Invalid long arith op";
1407 }
1408 if (!callOut) {
1409 genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2);
1410 } else {
1411 // Adjust return regs in to handle case of rem returning r2/r3
1412 oatFlushAllRegs(cUnit); /* Send everything to home location */
1413 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1414 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1415 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1416 opReg(cUnit, kOpBlx, rLR);
1417 oatClobberCallRegs(cUnit);
1418 if (retReg == r0)
1419 rlResult = oatGetReturnWide(cUnit);
1420 else
1421 rlResult = oatGetReturnWideAlt(cUnit);
1422 storeValueWide(cUnit, rlDest, rlResult);
1423 }
1424 return false;
1425}
1426
1427static bool genArithOpInt(CompilationUnit* cUnit, MIR* mir,
1428 RegLocation rlDest, RegLocation rlSrc1,
1429 RegLocation rlSrc2)
1430{
1431 OpKind op = kOpBkpt;
1432 bool callOut = false;
1433 bool checkZero = false;
1434 bool unary = false;
1435 int retReg = r0;
1436 int funcOffset;
1437 RegLocation rlResult;
1438 bool shiftOp = false;
1439
1440 switch (mir->dalvikInsn.opcode) {
1441 case OP_NEG_INT:
1442 op = kOpNeg;
1443 unary = true;
1444 break;
1445 case OP_NOT_INT:
1446 op = kOpMvn;
1447 unary = true;
1448 break;
1449 case OP_ADD_INT:
1450 case OP_ADD_INT_2ADDR:
1451 op = kOpAdd;
1452 break;
1453 case OP_SUB_INT:
1454 case OP_SUB_INT_2ADDR:
1455 op = kOpSub;
1456 break;
1457 case OP_MUL_INT:
1458 case OP_MUL_INT_2ADDR:
1459 op = kOpMul;
1460 break;
1461 case OP_DIV_INT:
1462 case OP_DIV_INT_2ADDR:
1463 callOut = true;
1464 checkZero = true;
1465 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1466 retReg = r0;
1467 break;
1468 /* NOTE: returns in r1 */
1469 case OP_REM_INT:
1470 case OP_REM_INT_2ADDR:
1471 callOut = true;
1472 checkZero = true;
1473 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1474 retReg = r1;
1475 break;
1476 case OP_AND_INT:
1477 case OP_AND_INT_2ADDR:
1478 op = kOpAnd;
1479 break;
1480 case OP_OR_INT:
1481 case OP_OR_INT_2ADDR:
1482 op = kOpOr;
1483 break;
1484 case OP_XOR_INT:
1485 case OP_XOR_INT_2ADDR:
1486 op = kOpXor;
1487 break;
1488 case OP_SHL_INT:
1489 case OP_SHL_INT_2ADDR:
1490 shiftOp = true;
1491 op = kOpLsl;
1492 break;
1493 case OP_SHR_INT:
1494 case OP_SHR_INT_2ADDR:
1495 shiftOp = true;
1496 op = kOpAsr;
1497 break;
1498 case OP_USHR_INT:
1499 case OP_USHR_INT_2ADDR:
1500 shiftOp = true;
1501 op = kOpLsr;
1502 break;
1503 default:
1504 LOG(FATAL) << "Invalid word arith op: " <<
1505 (int)mir->dalvikInsn.opcode;
1506 }
1507 if (!callOut) {
1508 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
1509 if (unary) {
1510 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1511 opRegReg(cUnit, op, rlResult.lowReg,
1512 rlSrc1.lowReg);
1513 } else {
1514 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
1515 if (shiftOp) {
1516 int tReg = oatAllocTemp(cUnit);
1517 opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31);
1518 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1519 opRegRegReg(cUnit, op, rlResult.lowReg,
1520 rlSrc1.lowReg, tReg);
1521 oatFreeTemp(cUnit, tReg);
1522 } else {
1523 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1524 opRegRegReg(cUnit, op, rlResult.lowReg,
1525 rlSrc1.lowReg, rlSrc2.lowReg);
1526 }
1527 }
1528 storeValue(cUnit, rlDest, rlResult);
1529 } else {
1530 RegLocation rlResult;
1531 oatFlushAllRegs(cUnit); /* Send everything to home location */
1532 loadValueDirectFixed(cUnit, rlSrc2, r1);
1533 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1534 loadValueDirectFixed(cUnit, rlSrc1, r0);
1535 if (checkZero) {
1536 genNullCheck(cUnit, rlSrc2.sRegLow, r1, mir->offset, NULL);
1537 }
1538 opReg(cUnit, kOpBlx, rLR);
1539 oatClobberCallRegs(cUnit);
1540 if (retReg == r0)
1541 rlResult = oatGetReturn(cUnit);
1542 else
1543 rlResult = oatGetReturnAlt(cUnit);
1544 storeValue(cUnit, rlDest, rlResult);
1545 }
1546 return false;
1547}
1548
1549/* Generate unconditional branch instructions */
1550static ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
1551{
1552 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1553 branch->generic.target = (LIR*) target;
1554 return branch;
1555}
1556
1557/*
1558 * Fetch *self->info.breakFlags. If the breakFlags are non-zero,
1559 * punt to the interpreter.
1560 */
1561static void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
1562{
1563 UNIMPLEMENTED(WARNING);
1564#if 0
1565 int rTemp = oatAllocTemp(cUnit);
1566 ArmLIR* ld;
1567 ld = loadBaseDisp(cUnit, NULL, rSELF,
1568 offsetof(Thread, interpBreak.ctl.breakFlags),
1569 rTemp, kUnsignedByte, INVALID_SREG);
1570 setMemRefType(ld, true /* isLoad */, kMustNotAlias);
1571 genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
1572#endif
1573}
1574
1575/*
1576 * The following are the first-level codegen routines that analyze the format
1577 * of each bytecode then either dispatch special purpose codegen routines
1578 * or produce corresponding Thumb instructions directly.
1579 */
1580
1581static bool isPowerOfTwo(int x)
1582{
1583 return (x & (x - 1)) == 0;
1584}
1585
1586// Returns true if no more than two bits are set in 'x'.
1587static bool isPopCountLE2(unsigned int x)
1588{
1589 x &= x - 1;
1590 return (x & (x - 1)) == 0;
1591}
1592
1593// Returns the index of the lowest set bit in 'x'.
1594static int lowestSetBit(unsigned int x) {
1595 int bit_posn = 0;
1596 while ((x & 0xf) == 0) {
1597 bit_posn += 4;
1598 x >>= 4;
1599 }
1600 while ((x & 1) == 0) {
1601 bit_posn++;
1602 x >>= 1;
1603 }
1604 return bit_posn;
1605}
1606
1607// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit'
1608// and store the result in 'rlDest'.
1609static bool handleEasyDivide(CompilationUnit* cUnit, Opcode dalvikOpcode,
1610 RegLocation rlSrc, RegLocation rlDest, int lit)
1611{
1612 if (lit < 2 || !isPowerOfTwo(lit)) {
1613 return false;
1614 }
1615 int k = lowestSetBit(lit);
1616 if (k >= 30) {
1617 // Avoid special cases.
1618 return false;
1619 }
1620 bool div = (dalvikOpcode == OP_DIV_INT_LIT8 ||
1621 dalvikOpcode == OP_DIV_INT_LIT16);
1622 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1623 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1624 if (div) {
1625 int tReg = oatAllocTemp(cUnit);
1626 if (lit == 2) {
1627 // Division by 2 is by far the most common division by constant.
1628 opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
1629 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1630 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1631 } else {
1632 opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
1633 opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
1634 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1635 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1636 }
1637 } else {
1638 int cReg = oatAllocTemp(cUnit);
1639 loadConstant(cUnit, cReg, lit - 1);
1640 int tReg1 = oatAllocTemp(cUnit);
1641 int tReg2 = oatAllocTemp(cUnit);
1642 if (lit == 2) {
1643 opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
1644 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1645 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1646 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1647 } else {
1648 opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
1649 opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
1650 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1651 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1652 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1653 }
1654 }
1655 storeValue(cUnit, rlDest, rlResult);
1656 return true;
1657}
1658
1659// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
1660// and store the result in 'rlDest'.
1661static bool handleEasyMultiply(CompilationUnit* cUnit,
1662 RegLocation rlSrc, RegLocation rlDest, int lit)
1663{
1664 // Can we simplify this multiplication?
1665 bool powerOfTwo = false;
1666 bool popCountLE2 = false;
1667 bool powerOfTwoMinusOne = false;
1668 if (lit < 2) {
1669 // Avoid special cases.
1670 return false;
1671 } else if (isPowerOfTwo(lit)) {
1672 powerOfTwo = true;
1673 } else if (isPopCountLE2(lit)) {
1674 popCountLE2 = true;
1675 } else if (isPowerOfTwo(lit + 1)) {
1676 powerOfTwoMinusOne = true;
1677 } else {
1678 return false;
1679 }
1680 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1681 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1682 if (powerOfTwo) {
1683 // Shift.
1684 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg,
1685 lowestSetBit(lit));
1686 } else if (popCountLE2) {
1687 // Shift and add and shift.
1688 int firstBit = lowestSetBit(lit);
1689 int secondBit = lowestSetBit(lit ^ (1 << firstBit));
1690 genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit,
1691 firstBit, secondBit);
1692 } else {
1693 // Reverse subtract: (src << (shift + 1)) - src.
1694 assert(powerOfTwoMinusOne);
1695 // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
1696 int tReg = oatAllocTemp(cUnit);
1697 opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
1698 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
1699 }
1700 storeValue(cUnit, rlDest, rlResult);
1701 return true;
1702}
1703
1704static bool genArithOpIntLit(CompilationUnit* cUnit, MIR* mir,
1705 RegLocation rlDest, RegLocation rlSrc,
1706 int lit)
1707{
1708 Opcode dalvikOpcode = mir->dalvikInsn.opcode;
1709 RegLocation rlResult;
1710 OpKind op = (OpKind)0; /* Make gcc happy */
1711 int shiftOp = false;
1712 bool isDiv = false;
1713 int funcOffset;
1714
1715 switch (dalvikOpcode) {
1716 case OP_RSUB_INT_LIT8:
1717 case OP_RSUB_INT: {
1718 int tReg;
1719 //TUNING: add support for use of Arm rsub op
1720 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1721 tReg = oatAllocTemp(cUnit);
1722 loadConstant(cUnit, tReg, lit);
1723 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1724 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1725 tReg, rlSrc.lowReg);
1726 storeValue(cUnit, rlDest, rlResult);
1727 return false;
1728 break;
1729 }
1730
1731 case OP_ADD_INT_LIT8:
1732 case OP_ADD_INT_LIT16:
1733 op = kOpAdd;
1734 break;
1735 case OP_MUL_INT_LIT8:
1736 case OP_MUL_INT_LIT16: {
1737 if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) {
1738 return false;
1739 }
1740 op = kOpMul;
1741 break;
1742 }
1743 case OP_AND_INT_LIT8:
1744 case OP_AND_INT_LIT16:
1745 op = kOpAnd;
1746 break;
1747 case OP_OR_INT_LIT8:
1748 case OP_OR_INT_LIT16:
1749 op = kOpOr;
1750 break;
1751 case OP_XOR_INT_LIT8:
1752 case OP_XOR_INT_LIT16:
1753 op = kOpXor;
1754 break;
1755 case OP_SHL_INT_LIT8:
1756 lit &= 31;
1757 shiftOp = true;
1758 op = kOpLsl;
1759 break;
1760 case OP_SHR_INT_LIT8:
1761 lit &= 31;
1762 shiftOp = true;
1763 op = kOpAsr;
1764 break;
1765 case OP_USHR_INT_LIT8:
1766 lit &= 31;
1767 shiftOp = true;
1768 op = kOpLsr;
1769 break;
1770
1771 case OP_DIV_INT_LIT8:
1772 case OP_DIV_INT_LIT16:
1773 case OP_REM_INT_LIT8:
1774 case OP_REM_INT_LIT16:
1775 if (lit == 0) {
1776 UNIMPLEMENTED(FATAL);
1777 // FIXME: generate an explicit throw here
1778 return false;
1779 }
1780 if (handleEasyDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit)) {
1781 return false;
1782 }
1783 oatFlushAllRegs(cUnit); /* Everything to home location */
1784 loadValueDirectFixed(cUnit, rlSrc, r0);
1785 oatClobber(cUnit, r0);
1786 if ((dalvikOpcode == OP_DIV_INT_LIT8) ||
1787 (dalvikOpcode == OP_DIV_INT_LIT16)) {
1788 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1789 isDiv = true;
1790 } else {
1791 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1792 isDiv = false;
1793 }
1794 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1795 loadConstant(cUnit, r1, lit);
1796 opReg(cUnit, kOpBlx, rLR);
1797 oatClobberCallRegs(cUnit);
1798 if (isDiv)
1799 rlResult = oatGetReturn(cUnit);
1800 else
1801 rlResult = oatGetReturnAlt(cUnit);
1802 storeValue(cUnit, rlDest, rlResult);
1803 return false;
1804 break;
1805 default:
1806 return true;
1807 }
1808 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1809 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1810 // Avoid shifts by literal 0 - no support in Thumb. Change to copy
1811 if (shiftOp && (lit == 0)) {
1812 genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
1813 } else {
1814 opRegRegImm(cUnit, op, rlResult.lowReg, rlSrc.lowReg, lit);
1815 }
1816 storeValue(cUnit, rlDest, rlResult);
1817 return false;
1818}
1819
1820/* Architectural-specific debugging helpers go here */
1821void oatArchDump(void)
1822{
1823 /* Print compiled opcode in this VM instance */
1824 int i, start, streak;
1825 char buf[1024];
1826
1827 streak = i = 0;
1828 buf[0] = 0;
1829 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1830 i++;
1831 }
1832 if (i == kNumPackedOpcodes) {
1833 return;
1834 }
1835 for (start = i++, streak = 1; i < kNumPackedOpcodes; i++) {
1836 if (opcodeCoverage[i]) {
1837 streak++;
1838 } else {
1839 if (streak == 1) {
1840 sprintf(buf+strlen(buf), "%x,", start);
1841 } else {
1842 sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
1843 }
1844 streak = 0;
1845 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1846 i++;
1847 }
1848 if (i < kNumPackedOpcodes) {
1849 streak = 1;
1850 start = i;
1851 }
1852 }
1853 }
1854 if (streak) {
1855 if (streak == 1) {
1856 sprintf(buf+strlen(buf), "%x", start);
1857 } else {
1858 sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
1859 }
1860 }
1861 if (strlen(buf)) {
1862 LOG(INFO) << "dalvik.vm.oat.op = " << buf;
1863 }
1864}