blob: c4a1fed931963df327830abd309829638daeb36d [file] [log] [blame]
buzbee67bf8852011-08-17 17:51:35 -07001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains codegen for the Thumb2 ISA and is intended to be
19 * includes by:
20 *
21 * Codegen-$(TARGET_ARCH_VARIANT).c
22 *
23 */
24
25/*
26 * Construct an s4 from two consecutive half-words of switch data.
27 * This needs to check endianness because the DEX optimizer only swaps
28 * half-words in instruction stream.
29 *
30 * "switchData" must be 32-bit aligned.
31 */
32#if __BYTE_ORDER == __LITTLE_ENDIAN
33static inline s4 s4FromSwitchData(const void* switchData) {
34 return *(s4*) switchData;
35}
36#else
37static inline s4 s4FromSwitchData(const void* switchData) {
38 u2* data = switchData;
39 return data[0] | (((s4) data[1]) << 16);
40}
41#endif
42
buzbee1b4c8592011-08-31 10:43:51 -070043/* Generate unconditional branch instructions */
44static ArmLIR* genUnconditionalBranch(CompilationUnit* cUnit, ArmLIR* target)
45{
46 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
47 branch->generic.target = (LIR*) target;
48 return branch;
49}
50
buzbee67bf8852011-08-17 17:51:35 -070051/*
52 * Generate a Thumb2 IT instruction, which can nullify up to
53 * four subsequent instructions based on a condition and its
54 * inverse. The condition applies to the first instruction, which
55 * is executed if the condition is met. The string "guide" consists
56 * of 0 to 3 chars, and applies to the 2nd through 4th instruction.
57 * A "T" means the instruction is executed if the condition is
58 * met, and an "E" means the instruction is executed if the condition
59 * is not met.
60 */
61static ArmLIR* genIT(CompilationUnit* cUnit, ArmConditionCode code,
62 const char* guide)
63{
64 int mask;
65 int condBit = code & 1;
66 int altBit = condBit ^ 1;
67 int mask3 = 0;
68 int mask2 = 0;
69 int mask1 = 0;
70
71 //Note: case fallthroughs intentional
72 switch(strlen(guide)) {
73 case 3:
74 mask1 = (guide[2] == 'T') ? condBit : altBit;
75 case 2:
76 mask2 = (guide[1] == 'T') ? condBit : altBit;
77 case 1:
78 mask3 = (guide[0] == 'T') ? condBit : altBit;
79 break;
80 case 0:
81 break;
82 default:
83 LOG(FATAL) << "OAT: bad case in genIT";
84 }
85 mask = (mask3 << 3) | (mask2 << 2) | (mask1 << 1) |
86 (1 << (3 - strlen(guide)));
87 return newLIR2(cUnit, kThumb2It, code, mask);
88}
89
90/*
91 * Insert a kArmPseudoCaseLabel at the beginning of the Dalvik
92 * offset vaddr. This label will be used to fix up the case
93 * branch table during the assembly phase. Be sure to set
94 * all resource flags on this to prevent code motion across
95 * target boundaries. KeyVal is just there for debugging.
96 */
97static ArmLIR* insertCaseLabel(CompilationUnit* cUnit, int vaddr, int keyVal)
98{
99 ArmLIR* lir;
100 for (lir = (ArmLIR*)cUnit->firstLIRInsn; lir; lir = NEXT_LIR(lir)) {
101 if ((lir->opcode == kArmPseudoDalvikByteCodeBoundary) &&
102 (lir->generic.dalvikOffset == vaddr)) {
103 ArmLIR* newLabel = (ArmLIR*)oatNew(sizeof(ArmLIR), true);
104 newLabel->generic.dalvikOffset = vaddr;
105 newLabel->opcode = kArmPseudoCaseLabel;
106 newLabel->operands[0] = keyVal;
107 oatInsertLIRAfter((LIR*)lir, (LIR*)newLabel);
108 return newLabel;
109 }
110 }
111 oatCodegenDump(cUnit);
112 LOG(FATAL) << "Error: didn't find vaddr 0x" << std::hex << vaddr;
113 return NULL; // Quiet gcc
114}
115
116static void markPackedCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
117{
118 const u2* table = tabRec->table;
119 int baseVaddr = tabRec->vaddr;
120 int *targets = (int*)&table[4];
121 int entries = table[1];
122 int lowKey = s4FromSwitchData(&table[2]);
123 for (int i = 0; i < entries; i++) {
124 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
125 i + lowKey);
126 }
127}
128
129static void markSparseCaseLabels(CompilationUnit* cUnit, SwitchTable *tabRec)
130{
131 const u2* table = tabRec->table;
132 int baseVaddr = tabRec->vaddr;
133 int entries = table[1];
134 int* keys = (int*)&table[2];
135 int* targets = &keys[entries];
136 for (int i = 0; i < entries; i++) {
137 tabRec->targets[i] = insertCaseLabel(cUnit, baseVaddr + targets[i],
138 keys[i]);
139 }
140}
141
142void oatProcessSwitchTables(CompilationUnit* cUnit)
143{
144 GrowableListIterator iterator;
145 oatGrowableListIteratorInit(&cUnit->switchTables, &iterator);
146 while (true) {
147 SwitchTable *tabRec = (SwitchTable *) oatGrowableListIteratorNext(
148 &iterator);
149 if (tabRec == NULL) break;
150 if (tabRec->table[0] == kPackedSwitchSignature)
151 markPackedCaseLabels(cUnit, tabRec);
152 else if (tabRec->table[0] == kSparseSwitchSignature)
153 markSparseCaseLabels(cUnit, tabRec);
154 else {
155 LOG(FATAL) << "Invalid switch table";
156 }
157 }
158}
159
160static void dumpSparseSwitchTable(const u2* table)
161 /*
162 * Sparse switch data format:
163 * ushort ident = 0x0200 magic value
164 * ushort size number of entries in the table; > 0
165 * int keys[size] keys, sorted low-to-high; 32-bit aligned
166 * int targets[size] branch targets, relative to switch opcode
167 *
168 * Total size is (2+size*4) 16-bit code units.
169 */
170{
171 u2 ident = table[0];
172 int entries = table[1];
173 int* keys = (int*)&table[2];
174 int* targets = &keys[entries];
175 LOG(INFO) << "Sparse switch table - ident:0x" << std::hex << ident <<
176 ", entries: " << std::dec << entries;
177 for (int i = 0; i < entries; i++) {
178 LOG(INFO) << " Key[" << keys[i] << "] -> 0x" << std::hex <<
179 targets[i];
180 }
181}
182
183static void dumpPackedSwitchTable(const u2* table)
184 /*
185 * Packed switch data format:
186 * ushort ident = 0x0100 magic value
187 * ushort size number of entries in the table
188 * int first_key first (and lowest) switch case value
189 * int targets[size] branch targets, relative to switch opcode
190 *
191 * Total size is (4+size*2) 16-bit code units.
192 */
193{
194 u2 ident = table[0];
195 int* targets = (int*)&table[4];
196 int entries = table[1];
197 int lowKey = s4FromSwitchData(&table[2]);
198 LOG(INFO) << "Packed switch table - ident:0x" << std::hex << ident <<
199 ", entries: " << std::dec << entries << ", lowKey: " << lowKey;
200 for (int i = 0; i < entries; i++) {
201 LOG(INFO) << " Key[" << (i + lowKey) << "] -> 0x" << std::hex <<
202 targets[i];
203 }
204}
205
206/*
207 * The sparse table in the literal pool is an array of <key,displacement>
208 * pairs. For each set, we'll load them as a pair using ldmia.
209 * This means that the register number of the temp we use for the key
210 * must be lower than the reg for the displacement.
211 *
212 * The test loop will look something like:
213 *
214 * adr rBase, <table>
215 * ldr rVal, [rSP, vRegOff]
216 * mov rIdx, #tableSize
217 * lp:
218 * ldmia rBase!, {rKey, rDisp}
219 * sub rIdx, #1
220 * cmp rVal, rKey
221 * ifeq
222 * add rPC, rDisp ; This is the branch from which we compute displacement
223 * cbnz rIdx, lp
224 */
225static void genSparseSwitch(CompilationUnit* cUnit, MIR* mir,
226 RegLocation rlSrc)
227{
228 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
229 if (cUnit->printMe) {
230 dumpSparseSwitchTable(table);
231 }
232 // Add the table to the list - we'll process it later
233 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
234 true);
235 tabRec->table = table;
236 tabRec->vaddr = mir->offset;
237 int size = table[1];
238 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
239 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
240
241 // Get the switch value
242 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
243 int rBase = oatAllocTemp(cUnit);
244 /* Allocate key and disp temps */
245 int rKey = oatAllocTemp(cUnit);
246 int rDisp = oatAllocTemp(cUnit);
247 // Make sure rKey's register number is less than rDisp's number for ldmia
248 if (rKey > rDisp) {
249 int tmp = rDisp;
250 rDisp = rKey;
251 rKey = tmp;
252 }
253 // Materialize a pointer to the switch table
254 newLIR3(cUnit, kThumb2AdrST, rBase, 0, (intptr_t)tabRec);
255 // Set up rIdx
256 int rIdx = oatAllocTemp(cUnit);
257 loadConstant(cUnit, rIdx, size);
258 // Establish loop branch target
259 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
260 target->defMask = ENCODE_ALL;
261 // Load next key/disp
262 newLIR2(cUnit, kThumb2LdmiaWB, rBase, (1 << rKey) | (1 << rDisp));
263 opRegReg(cUnit, kOpCmp, rKey, rlSrc.lowReg);
264 // Go if match. NOTE: No instruction set switch here - must stay Thumb2
265 genIT(cUnit, kArmCondEq, "");
266 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, rDisp);
267 tabRec->bxInst = switchBranch;
268 // Needs to use setflags encoding here
269 newLIR3(cUnit, kThumb2SubsRRI12, rIdx, rIdx, 1);
270 ArmLIR* branch = opCondBranch(cUnit, kArmCondNe);
271 branch->generic.target = (LIR*)target;
272}
273
274
275static void genPackedSwitch(CompilationUnit* cUnit, MIR* mir,
276 RegLocation rlSrc)
277{
278 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
279 if (cUnit->printMe) {
280 dumpPackedSwitchTable(table);
281 }
282 // Add the table to the list - we'll process it later
283 SwitchTable *tabRec = (SwitchTable *)oatNew(sizeof(SwitchTable),
284 true);
285 tabRec->table = table;
286 tabRec->vaddr = mir->offset;
287 int size = table[1];
288 tabRec->targets = (ArmLIR* *)oatNew(size * sizeof(ArmLIR*), true);
289 oatInsertGrowableList(&cUnit->switchTables, (intptr_t)tabRec);
290
291 // Get the switch value
292 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
293 int tableBase = oatAllocTemp(cUnit);
294 // Materialize a pointer to the switch table
295 newLIR3(cUnit, kThumb2AdrST, tableBase, 0, (intptr_t)tabRec);
296 int lowKey = s4FromSwitchData(&table[2]);
297 int keyReg;
298 // Remove the bias, if necessary
299 if (lowKey == 0) {
300 keyReg = rlSrc.lowReg;
301 } else {
302 keyReg = oatAllocTemp(cUnit);
303 opRegRegImm(cUnit, kOpSub, keyReg, rlSrc.lowReg, lowKey);
304 }
305 // Bounds check - if < 0 or >= size continue following switch
306 opRegImm(cUnit, kOpCmp, keyReg, size-1);
307 ArmLIR* branchOver = opCondBranch(cUnit, kArmCondHi);
308
309 // Load the displacement from the switch table
310 int dispReg = oatAllocTemp(cUnit);
311 loadBaseIndexed(cUnit, tableBase, keyReg, dispReg, 2, kWord);
312
313 // ..and go! NOTE: No instruction set switch here - must stay Thumb2
314 ArmLIR* switchBranch = newLIR1(cUnit, kThumb2AddPCR, dispReg);
315 tabRec->bxInst = switchBranch;
316
317 /* branchOver target here */
318 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
319 target->defMask = ENCODE_ALL;
320 branchOver->generic.target = (LIR*)target;
321}
322
323/*
324 * Array data table format:
325 * ushort ident = 0x0300 magic value
326 * ushort width width of each element in the table
327 * uint size number of elements in the table
328 * ubyte data[size*width] table of data values (may contain a single-byte
329 * padding at the end)
330 *
331 * Total size is 4+(width * size + 1)/2 16-bit code units.
332 */
333static void genFillArrayData(CompilationUnit* cUnit, MIR* mir,
334 RegLocation rlSrc)
335{
336 const u2* table = cUnit->insns + mir->offset + mir->dalvikInsn.vB;
337 // Add the table to the list - we'll process it later
338 FillArrayData *tabRec = (FillArrayData *)
339 oatNew(sizeof(FillArrayData), true);
340 tabRec->table = table;
341 tabRec->vaddr = mir->offset;
342 u2 width = tabRec->table[1];
343 u4 size = tabRec->table[2] | (((u4)tabRec->table[3]) << 16);
344 tabRec->size = (size * width) + 8;
345
346 oatInsertGrowableList(&cUnit->fillArrayData, (intptr_t)tabRec);
347
348 // Making a call - use explicit registers
349 oatFlushAllRegs(cUnit); /* Everything to home location */
350 loadValueDirectFixed(cUnit, rlSrc, r0);
351 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -0700352 OFFSETOF_MEMBER(Thread, pHandleFillArrayDataFromCode), rLR);
buzbeee6d61962011-08-27 11:58:19 -0700353 // Materialize a pointer to the fill data image
buzbee67bf8852011-08-17 17:51:35 -0700354 newLIR3(cUnit, kThumb2AdrST, r1, 0, (intptr_t)tabRec);
355 opReg(cUnit, kOpBlx, rLR);
356 oatClobberCallRegs(cUnit);
357}
358
359/*
360 * Mark garbage collection card. Skip if the value we're storing is null.
361 */
362static void markGCCard(CompilationUnit* cUnit, int valReg, int tgtAddrReg)
363{
364 int regCardBase = oatAllocTemp(cUnit);
365 int regCardNo = oatAllocTemp(cUnit);
366 ArmLIR* branchOver = genCmpImmBranch(cUnit, kArmCondEq, valReg, 0);
buzbeec143c552011-08-20 17:38:58 -0700367 loadWordDisp(cUnit, rSELF, Thread::CardTableOffset().Int32Value(),
buzbee67bf8852011-08-17 17:51:35 -0700368 regCardBase);
369 opRegRegImm(cUnit, kOpLsr, regCardNo, tgtAddrReg, GC_CARD_SHIFT);
370 storeBaseIndexed(cUnit, regCardBase, regCardNo, regCardBase, 0,
371 kUnsignedByte);
372 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
373 target->defMask = ENCODE_ALL;
374 branchOver->generic.target = (LIR*)target;
375 oatFreeTemp(cUnit, regCardBase);
376 oatFreeTemp(cUnit, regCardNo);
377}
378
379static void genIGetX(CompilationUnit* cUnit, MIR* mir, OpSize size,
380 RegLocation rlDest, RegLocation rlObj)
381{
buzbeec143c552011-08-20 17:38:58 -0700382 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
383 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700384 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700385 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700386 }
387#if ANDROID_SMP != 0
388 bool isVolatile = dvmIsVolatileField(fieldPtr);
389#else
390 bool isVolatile = false;
391#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700392 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700393 RegLocation rlResult;
394 RegisterClass regClass = oatRegClassBySize(size);
395 rlObj = loadValue(cUnit, rlObj, kCoreReg);
396 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
397 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
398 NULL);/* null object? */
399 loadBaseDisp(cUnit, mir, rlObj.lowReg, fieldOffset, rlResult.lowReg,
400 size, rlObj.sRegLow);
401 if (isVolatile) {
402 oatGenMemBarrier(cUnit, kSY);
403 }
404
405 storeValue(cUnit, rlDest, rlResult);
406}
407
408static void genIPutX(CompilationUnit* cUnit, MIR* mir, OpSize size,
409 RegLocation rlSrc, RegLocation rlObj, bool isObject)
410{
buzbeec143c552011-08-20 17:38:58 -0700411 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
412 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700413 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700414 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700415 }
416#if ANDROID_SMP != 0
417 bool isVolatile = dvmIsVolatileField(fieldPtr);
418#else
419 bool isVolatile = false;
420#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700421 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700422 RegisterClass regClass = oatRegClassBySize(size);
423 rlObj = loadValue(cUnit, rlObj, kCoreReg);
424 rlSrc = loadValue(cUnit, rlSrc, regClass);
425 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
426 NULL);/* null object? */
427
428 if (isVolatile) {
429 oatGenMemBarrier(cUnit, kSY);
430 }
431 storeBaseDisp(cUnit, rlObj.lowReg, fieldOffset, rlSrc.lowReg, size);
432 if (isObject) {
433 /* NOTE: marking card based on object head */
434 markGCCard(cUnit, rlSrc.lowReg, rlObj.lowReg);
435 }
436}
437
438static void genIGetWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
439 RegLocation rlObj)
440{
buzbeec143c552011-08-20 17:38:58 -0700441 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
442 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700443 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700444 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700445 }
446#if ANDROID_SMP != 0
447 bool isVolatile = dvmIsVolatileField(fieldPtr);
448#else
449 bool isVolatile = false;
450#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700451 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700452 RegLocation rlResult;
453 rlObj = loadValue(cUnit, rlObj, kCoreReg);
454 int regPtr = oatAllocTemp(cUnit);
455
456 assert(rlDest.wide);
457
458 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
459 NULL);/* null object? */
460 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
461 rlResult = oatEvalLoc(cUnit, rlDest, kAnyReg, true);
462
463 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
464
465 if (isVolatile) {
466 oatGenMemBarrier(cUnit, kSY);
467 }
468
469 oatFreeTemp(cUnit, regPtr);
470 storeValueWide(cUnit, rlDest, rlResult);
471}
472
473static void genIPutWideX(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc,
474 RegLocation rlObj)
475{
buzbeec143c552011-08-20 17:38:58 -0700476 Field* fieldPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
477 GetResolvedField(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700478 if (fieldPtr == NULL) {
buzbeedd3efae2011-08-28 14:39:07 -0700479 UNIMPLEMENTED(FATAL) << "Need to handle unresolved field";
buzbee67bf8852011-08-17 17:51:35 -0700480 }
481#if ANDROID_SMP != 0
482 bool isVolatile = dvmIsVolatileField(fieldPtr);
483#else
484 bool isVolatile = false;
485#endif
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700486 int fieldOffset = fieldPtr->GetOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -0700487
488 rlObj = loadValue(cUnit, rlObj, kCoreReg);
489 int regPtr;
490 rlSrc = loadValueWide(cUnit, rlSrc, kAnyReg);
491 genNullCheck(cUnit, rlObj.sRegLow, rlObj.lowReg, mir->offset,
492 NULL);/* null object? */
493 regPtr = oatAllocTemp(cUnit);
494 opRegRegImm(cUnit, kOpAdd, regPtr, rlObj.lowReg, fieldOffset);
495
496 if (isVolatile) {
497 oatGenMemBarrier(cUnit, kSY);
498 }
499 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
500
501 oatFreeTemp(cUnit, regPtr);
502}
503
504static void genConstClass(CompilationUnit* cUnit, MIR* mir,
505 RegLocation rlDest, RegLocation rlSrc)
506{
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700507 art::Class* classPtr = cUnit->method->GetDexCacheResolvedTypes()->
buzbee1b4c8592011-08-31 10:43:51 -0700508 Get(mir->dalvikInsn.vB);
509 int mReg = loadCurrMethod(cUnit);
510 int resReg = oatAllocTemp(cUnit);
buzbee67bf8852011-08-17 17:51:35 -0700511 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700512 loadWordDisp(cUnit, mReg, Method::DexCacheStringsOffset().Int32Value(),
buzbee1b4c8592011-08-31 10:43:51 -0700513 resReg);
514 loadWordDisp(cUnit, resReg, Array::DataOffset().Int32Value() +
515 (sizeof(String*) * mir->dalvikInsn.vB), rlResult.lowReg);
516 if (classPtr != NULL) {
517 // Fast path, we're done - just store result
518 storeValue(cUnit, rlDest, rlResult);
519 } else {
520 // Slow path. Must test at runtime
521 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, rlResult.lowReg,
522 0);
523 // Resolved, store and hop over following code
524 storeValue(cUnit, rlDest, rlResult);
525 ArmLIR* branch2 = genUnconditionalBranch(cUnit,0);
526 // TUNING: move slow path to end & remove unconditional branch
527 ArmLIR* target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
528 target1->defMask = ENCODE_ALL;
529 // Call out to helper, which will return resolved type in r0
530 loadWordDisp(cUnit, rSELF,
531 OFFSETOF_MEMBER(Thread, pInitializeTypeFromCode), rLR);
532 genRegCopy(cUnit, r1, mReg);
533 loadConstant(cUnit, r0, mir->dalvikInsn.vB);
534 opReg(cUnit, kOpBlx, rLR); // resolveTypeFromCode(idx, method)
535 oatClobberCallRegs(cUnit);
536 RegLocation rlResult = oatGetReturn(cUnit);
537 storeValue(cUnit, rlDest, rlResult);
538 // Rejoin code paths
539 ArmLIR* target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
540 target2->defMask = ENCODE_ALL;
541 branch1->generic.target = (LIR*)target1;
542 branch2->generic.target = (LIR*)target2;
543 }
buzbee67bf8852011-08-17 17:51:35 -0700544}
545
546static void genConstString(CompilationUnit* cUnit, MIR* mir,
547 RegLocation rlDest, RegLocation rlSrc)
548{
buzbee1b4c8592011-08-31 10:43:51 -0700549 /* All strings should be available at compile time */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700550 const art::String* str = cUnit->method->GetDexCacheStrings()->
buzbee1b4c8592011-08-31 10:43:51 -0700551 Get(mir->dalvikInsn.vB);
552 DCHECK(str != NULL);
buzbee67bf8852011-08-17 17:51:35 -0700553
buzbee1b4c8592011-08-31 10:43:51 -0700554 int mReg = loadCurrMethod(cUnit);
555 int resReg = oatAllocTemp(cUnit);
buzbee67bf8852011-08-17 17:51:35 -0700556 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700557 loadWordDisp(cUnit, mReg, Method::DexCacheStringsOffset().Int32Value(),
buzbee1b4c8592011-08-31 10:43:51 -0700558 resReg);
559 loadWordDisp(cUnit, resReg, Array::DataOffset().Int32Value() +
560 (sizeof(String*) * mir->dalvikInsn.vB), rlResult.lowReg);
buzbee67bf8852011-08-17 17:51:35 -0700561 storeValue(cUnit, rlDest, rlResult);
562}
563
buzbeedfd3d702011-08-28 12:56:51 -0700564/*
565 * Let helper function take care of everything. Will
566 * call Class::NewInstanceFromCode(type_idx, method);
567 */
buzbee67bf8852011-08-17 17:51:35 -0700568static void genNewInstance(CompilationUnit* cUnit, MIR* mir,
569 RegLocation rlDest)
570{
buzbeedfd3d702011-08-28 12:56:51 -0700571 oatFlushAllRegs(cUnit); /* Everything to home location */
buzbee67bf8852011-08-17 17:51:35 -0700572 loadWordDisp(cUnit, rSELF,
Brian Carlstrom1f870082011-08-23 16:02:11 -0700573 OFFSETOF_MEMBER(Thread, pAllocObjectFromCode), rLR);
buzbeedfd3d702011-08-28 12:56:51 -0700574 loadCurrMethodDirect(cUnit, r1); // arg1 <= Method*
575 loadConstant(cUnit, r0, mir->dalvikInsn.vB); // arg0 <- type_id
buzbee67bf8852011-08-17 17:51:35 -0700576 opReg(cUnit, kOpBlx, rLR);
577 oatClobberCallRegs(cUnit);
578 RegLocation rlResult = oatGetReturn(cUnit);
579 storeValue(cUnit, rlDest, rlResult);
580}
581
582void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
583{
584 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -0700585 OFFSETOF_MEMBER(Thread, pThrowException), rLR);
586 loadValueDirectFixed(cUnit, rlSrc, r1); // Get exception object
buzbee67bf8852011-08-17 17:51:35 -0700587 genRegCopy(cUnit, r0, rSELF);
buzbee1b4c8592011-08-31 10:43:51 -0700588 opReg(cUnit, kOpBlx, rLR); // artThrowException(thread, exception);
buzbee67bf8852011-08-17 17:51:35 -0700589}
590
591static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
592 RegLocation rlSrc)
593{
594 // May generate a call - use explicit registers
595 RegLocation rlResult;
buzbeec143c552011-08-20 17:38:58 -0700596 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
Brian Carlstrom9ea1cb12011-08-24 23:18:18 -0700597 GetResolvedType(mir->dalvikInsn.vC);
buzbee67bf8852011-08-17 17:51:35 -0700598 if (classPtr == NULL) {
599 /* Shouldn't happen */
600 LOG(FATAL) << "Unexpected null class pointer";
601 }
602 oatFlushAllRegs(cUnit); /* Everything to home location */
603 loadValueDirectFixed(cUnit, rlSrc, r0); /* Ref */
604 loadConstant(cUnit, r2, (int) classPtr );
605 /* When taken r0 has NULL which can be used for store directly */
606 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq, r0, 0);
607 /* r1 now contains object->clazz */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700608 assert(Object::ClassOffset().Int32Value() == 0);
609 loadWordDisp(cUnit, r0, Object::ClassOffset().Int32Value(), r1);
buzbee67bf8852011-08-17 17:51:35 -0700610 /* r1 now contains object->clazz */
611 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -0700612 OFFSETOF_MEMBER(Thread, pInstanceofNonTrivialFromCode), rLR);
buzbee67bf8852011-08-17 17:51:35 -0700613 loadConstant(cUnit, r0, 1); /* Assume true */
614 opRegReg(cUnit, kOpCmp, r1, r2);
615 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
616 genRegCopy(cUnit, r0, r1);
617 genRegCopy(cUnit, r1, r2);
618 opReg(cUnit, kOpBlx, rLR);
619 oatClobberCallRegs(cUnit);
620 /* branch target here */
621 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
622 target->defMask = ENCODE_ALL;
623 rlResult = oatGetReturn(cUnit);
624 storeValue(cUnit, rlDest, rlResult);
625 branch1->generic.target = (LIR*)target;
626 branch2->generic.target = (LIR*)target;
627}
628
629static void genCheckCast(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
630{
buzbeec143c552011-08-20 17:38:58 -0700631 Class* classPtr = cUnit->method->GetDeclaringClass()->GetDexCache()->
Brian Carlstrom9ea1cb12011-08-24 23:18:18 -0700632 GetResolvedType(mir->dalvikInsn.vB);
buzbee67bf8852011-08-17 17:51:35 -0700633 if (classPtr == NULL) {
634 /* Shouldn't happen with our current model */
635 LOG(FATAL) << "Unexpected null class pointer";
636 }
637 oatFlushAllRegs(cUnit); /* Everything to home location */
638 loadConstant(cUnit, r1, (int) classPtr );
639 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
640 /* Null? */
641 ArmLIR* branch1 = genCmpImmBranch(cUnit, kArmCondEq,
642 rlSrc.lowReg, 0);
643 /*
644 * rlSrc.lowReg now contains object->clazz. Note that
645 * it could have been allocated r0, but we're okay so long
646 * as we don't do anything desctructive until r0 is loaded
647 * with clazz.
648 */
649 /* r0 now contains object->clazz */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700650 loadWordDisp(cUnit, rlSrc.lowReg, Object::ClassOffset().Int32Value(), r0);
buzbee67bf8852011-08-17 17:51:35 -0700651 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -0700652 OFFSETOF_MEMBER(Thread, pInstanceofNonTrivialFromCode), rLR);
buzbee67bf8852011-08-17 17:51:35 -0700653 opRegReg(cUnit, kOpCmp, r0, r1);
654 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondEq);
655 // Assume success - if not, artInstanceOfNonTrivial will handle throw
656 opReg(cUnit, kOpBlx, rLR);
657 oatClobberCallRegs(cUnit);
658 ArmLIR* target = newLIR0(cUnit, kArmPseudoTargetLabel);
659 target->defMask = ENCODE_ALL;
660 branch1->generic.target = (LIR*)target;
661 branch2->generic.target = (LIR*)target;
662}
663
664static void genNegFloat(CompilationUnit* cUnit, RegLocation rlDest,
665 RegLocation rlSrc)
666{
667 RegLocation rlResult;
668 rlSrc = loadValue(cUnit, rlSrc, kFPReg);
669 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
670 newLIR2(cUnit, kThumb2Vnegs, rlResult.lowReg, rlSrc.lowReg);
671 storeValue(cUnit, rlDest, rlResult);
672}
673
674static void genNegDouble(CompilationUnit* cUnit, RegLocation rlDest,
675 RegLocation rlSrc)
676{
677 RegLocation rlResult;
678 rlSrc = loadValueWide(cUnit, rlSrc, kFPReg);
679 rlResult = oatEvalLoc(cUnit, rlDest, kFPReg, true);
680 newLIR2(cUnit, kThumb2Vnegd, S2D(rlResult.lowReg, rlResult.highReg),
681 S2D(rlSrc.lowReg, rlSrc.highReg));
682 storeValueWide(cUnit, rlDest, rlResult);
683}
684
buzbee439c4fa2011-08-27 15:59:07 -0700685static void freeRegLocTemps(CompilationUnit* cUnit, RegLocation rlKeep,
686 RegLocation rlFree)
buzbee67bf8852011-08-17 17:51:35 -0700687{
buzbee439c4fa2011-08-27 15:59:07 -0700688 if ((rlFree.lowReg != rlKeep.lowReg) && (rlFree.lowReg != rlKeep.highReg))
689 oatFreeTemp(cUnit, rlFree.lowReg);
690 if ((rlFree.highReg != rlKeep.lowReg) && (rlFree.highReg != rlKeep.highReg))
691 oatFreeTemp(cUnit, rlFree.lowReg);
buzbee67bf8852011-08-17 17:51:35 -0700692}
693
694static void genLong3Addr(CompilationUnit* cUnit, MIR* mir, OpKind firstOp,
695 OpKind secondOp, RegLocation rlDest,
696 RegLocation rlSrc1, RegLocation rlSrc2)
697{
buzbee9e0f9b02011-08-24 15:32:46 -0700698 /*
699 * NOTE: This is the one place in the code in which we might have
700 * as many as six live temporary registers. There are 5 in the normal
701 * set for Arm. Until we have spill capabilities, temporarily add
702 * lr to the temp set. It is safe to do this locally, but note that
703 * lr is used explicitly elsewhere in the code generator and cannot
704 * normally be used as a general temp register.
705 */
buzbee67bf8852011-08-17 17:51:35 -0700706 RegLocation rlResult;
buzbee9e0f9b02011-08-24 15:32:46 -0700707 oatMarkTemp(cUnit, rLR); // Add lr to the temp pool
708 oatFreeTemp(cUnit, rLR); // and make it available
buzbee67bf8852011-08-17 17:51:35 -0700709 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
710 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
711 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
712 opRegRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
713 opRegRegReg(cUnit, secondOp, rlResult.highReg, rlSrc1.highReg,
714 rlSrc2.highReg);
buzbee439c4fa2011-08-27 15:59:07 -0700715 /*
716 * NOTE: If rlDest refers to a frame variable in a large frame, the
717 * following storeValueWide might need to allocate a temp register.
718 * To further work around the lack of a spill capability, explicitly
719 * free any temps from rlSrc1 & rlSrc2 that aren't still live in rlResult.
720 * Remove when spill is functional.
721 */
722 freeRegLocTemps(cUnit, rlResult, rlSrc1);
723 freeRegLocTemps(cUnit, rlResult, rlSrc2);
buzbee67bf8852011-08-17 17:51:35 -0700724 storeValueWide(cUnit, rlDest, rlResult);
buzbee9e0f9b02011-08-24 15:32:46 -0700725 oatClobber(cUnit, rLR);
726 oatUnmarkTemp(cUnit, rLR); // Remove lr from the temp pool
buzbee67bf8852011-08-17 17:51:35 -0700727}
728
729void oatInitializeRegAlloc(CompilationUnit* cUnit)
730{
731 int numRegs = sizeof(coreRegs)/sizeof(*coreRegs);
732 int numReserved = sizeof(reservedRegs)/sizeof(*reservedRegs);
733 int numTemps = sizeof(coreTemps)/sizeof(*coreTemps);
734 int numFPRegs = sizeof(fpRegs)/sizeof(*fpRegs);
735 int numFPTemps = sizeof(fpTemps)/sizeof(*fpTemps);
736 RegisterPool *pool = (RegisterPool *)oatNew(sizeof(*pool), true);
737 cUnit->regPool = pool;
738 pool->numCoreRegs = numRegs;
739 pool->coreRegs = (RegisterInfo *)
740 oatNew(numRegs * sizeof(*cUnit->regPool->coreRegs), true);
741 pool->numFPRegs = numFPRegs;
742 pool->FPRegs = (RegisterInfo *)
743 oatNew(numFPRegs * sizeof(*cUnit->regPool->FPRegs), true);
744 oatInitPool(pool->coreRegs, coreRegs, pool->numCoreRegs);
745 oatInitPool(pool->FPRegs, fpRegs, pool->numFPRegs);
746 // Keep special registers from being allocated
747 for (int i = 0; i < numReserved; i++) {
748 oatMarkInUse(cUnit, reservedRegs[i]);
749 }
750 // Mark temp regs - all others not in use can be used for promotion
751 for (int i = 0; i < numTemps; i++) {
752 oatMarkTemp(cUnit, coreTemps[i]);
753 }
754 for (int i = 0; i < numFPTemps; i++) {
755 oatMarkTemp(cUnit, fpTemps[i]);
756 }
757 pool->nullCheckedRegs =
758 oatAllocBitVector(cUnit->numSSARegs, false);
759}
760
761/*
762 * Handle simple case (thin lock) inline. If it's complicated, bail
763 * out to the heavyweight lock/unlock routines. We'll use dedicated
764 * registers here in order to be in the right position in case we
765 * to bail to dvm[Lock/Unlock]Object(self, object)
766 *
767 * r0 -> self pointer [arg0 for dvm[Lock/Unlock]Object
768 * r1 -> object [arg1 for dvm[Lock/Unlock]Object
769 * r2 -> intial contents of object->lock, later result of strex
770 * r3 -> self->threadId
771 * r12 -> allow to be used by utilities as general temp
772 *
773 * The result of the strex is 0 if we acquire the lock.
774 *
775 * See comments in Sync.c for the layout of the lock word.
776 * Of particular interest to this code is the test for the
777 * simple case - which we handle inline. For monitor enter, the
778 * simple case is thin lock, held by no-one. For monitor exit,
779 * the simple case is thin lock, held by the unlocking thread with
780 * a recurse count of 0.
781 *
782 * A minor complication is that there is a field in the lock word
783 * unrelated to locking: the hash state. This field must be ignored, but
784 * preserved.
785 *
786 */
787static void genMonitorEnter(CompilationUnit* cUnit, MIR* mir,
788 RegLocation rlSrc)
789{
790 ArmLIR* target;
791 ArmLIR* hopTarget;
792 ArmLIR* branch;
793 ArmLIR* hopBranch;
794
795 oatFlushAllRegs(cUnit);
buzbeec143c552011-08-20 17:38:58 -0700796 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700797 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
buzbee2e748f32011-08-29 21:02:19 -0700798 oatLockCallTemps(cUnit); // Prepare for explicit register usage
buzbee67bf8852011-08-17 17:51:35 -0700799 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
buzbeec143c552011-08-20 17:38:58 -0700800 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700801 newLIR3(cUnit, kThumb2Ldrex, r2, r1,
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700802 Object::MonitorOffset().Int32Value() >> 2); // Get object->lock
buzbeec143c552011-08-20 17:38:58 -0700803 // Align owner
804 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
buzbee67bf8852011-08-17 17:51:35 -0700805 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700806 newLIR4(cUnit, kThumb2Bfi, r3, r2, 0, art::Monitor::kLwLockOwnerShift
807 - 1);
808 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
809 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700810 hopBranch = newLIR2(cUnit, kThumb2Cbnz, r2, 0);
buzbeec143c552011-08-20 17:38:58 -0700811 newLIR4(cUnit, kThumb2Strex, r2, r3, r1,
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700812 Object::MonitorOffset().Int32Value() >> 2);
buzbee67bf8852011-08-17 17:51:35 -0700813 oatGenMemBarrier(cUnit, kSY);
814 branch = newLIR2(cUnit, kThumb2Cbz, r2, 0);
815
816 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
817 hopTarget->defMask = ENCODE_ALL;
818 hopBranch->generic.target = (LIR*)hopTarget;
819
buzbee1b4c8592011-08-31 10:43:51 -0700820 // Go expensive route - artLockObjectFromCode(self, obj);
821 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pLockObjectFromCode),
buzbee67bf8852011-08-17 17:51:35 -0700822 rLR);
823 genRegCopy(cUnit, r0, rSELF);
824 newLIR1(cUnit, kThumbBlxR, rLR);
825
826 // Resume here
827 target = newLIR0(cUnit, kArmPseudoTargetLabel);
828 target->defMask = ENCODE_ALL;
829 branch->generic.target = (LIR*)target;
830}
831
832/*
833 * For monitor unlock, we don't have to use ldrex/strex. Once
834 * we've determined that the lock is thin and that we own it with
835 * a zero recursion count, it's safe to punch it back to the
836 * initial, unlock thin state with a store word.
837 */
838static void genMonitorExit(CompilationUnit* cUnit, MIR* mir,
839 RegLocation rlSrc)
840{
841 ArmLIR* target;
842 ArmLIR* branch;
843 ArmLIR* hopTarget;
844 ArmLIR* hopBranch;
845
buzbeec143c552011-08-20 17:38:58 -0700846 assert(art::Monitor::kLwShapeThin == 0);
buzbee67bf8852011-08-17 17:51:35 -0700847 oatFlushAllRegs(cUnit);
848 loadValueDirectFixed(cUnit, rlSrc, r1); // Get obj
buzbee2e748f32011-08-29 21:02:19 -0700849 oatLockCallTemps(cUnit); // Prepare for explicit register usage
buzbee67bf8852011-08-17 17:51:35 -0700850 genNullCheck(cUnit, rlSrc.sRegLow, r1, mir->offset, NULL);
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700851 loadWordDisp(cUnit, r1, Object::MonitorOffset().Int32Value(), r2); // Get lock
buzbeec143c552011-08-20 17:38:58 -0700852 loadWordDisp(cUnit, rSELF, Thread::IdOffset().Int32Value(), r3);
buzbee67bf8852011-08-17 17:51:35 -0700853 // Is lock unheld on lock or held by us (==threadId) on unlock?
buzbeec143c552011-08-20 17:38:58 -0700854 opRegRegImm(cUnit, kOpAnd, r12, r2, (art::Monitor::kLwHashStateMask <<
855 art::Monitor::kLwHashStateShift));
856 // Align owner
857 opRegImm(cUnit, kOpLsl, r3, art::Monitor::kLwLockOwnerShift);
858 newLIR3(cUnit, kThumb2Bfc, r2, art::Monitor::kLwHashStateShift,
859 art::Monitor::kLwLockOwnerShift - 1);
buzbee67bf8852011-08-17 17:51:35 -0700860 opRegReg(cUnit, kOpSub, r2, r3);
861 hopBranch = opCondBranch(cUnit, kArmCondNe);
862 oatGenMemBarrier(cUnit, kSY);
Ian Rogers0cfe1fb2011-08-26 03:29:44 -0700863 storeWordDisp(cUnit, r1, Object::MonitorOffset().Int32Value(), r12);
buzbee67bf8852011-08-17 17:51:35 -0700864 branch = opNone(cUnit, kOpUncondBr);
865
866 hopTarget = newLIR0(cUnit, kArmPseudoTargetLabel);
867 hopTarget->defMask = ENCODE_ALL;
868 hopBranch->generic.target = (LIR*)hopTarget;
869
buzbee1b4c8592011-08-31 10:43:51 -0700870 // Go expensive route - UnlockObjectFromCode(self, obj);
871 loadWordDisp(cUnit, rSELF, OFFSETOF_MEMBER(Thread, pUnlockObjectFromCode),
buzbee67bf8852011-08-17 17:51:35 -0700872 rLR);
873 genRegCopy(cUnit, r0, rSELF);
874 newLIR1(cUnit, kThumbBlxR, rLR);
875
876 // Resume here
877 target = newLIR0(cUnit, kArmPseudoTargetLabel);
878 target->defMask = ENCODE_ALL;
879 branch->generic.target = (LIR*)target;
880}
881
882/*
883 * 64-bit 3way compare function.
884 * mov rX, #-1
885 * cmp op1hi, op2hi
886 * blt done
887 * bgt flip
888 * sub rX, op1lo, op2lo (treat as unsigned)
889 * beq done
890 * ite hi
891 * mov(hi) rX, #-1
892 * mov(!hi) rX, #1
893 * flip:
894 * neg rX
895 * done:
896 */
897static void genCmpLong(CompilationUnit* cUnit, MIR* mir,
898 RegLocation rlDest, RegLocation rlSrc1,
899 RegLocation rlSrc2)
900{
901 RegLocation rlTemp = LOC_C_RETURN; // Just using as template, will change
902 ArmLIR* target1;
903 ArmLIR* target2;
904 rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
905 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
906 rlTemp.lowReg = oatAllocTemp(cUnit);
907 loadConstant(cUnit, rlTemp.lowReg, -1);
908 opRegReg(cUnit, kOpCmp, rlSrc1.highReg, rlSrc2.highReg);
909 ArmLIR* branch1 = opCondBranch(cUnit, kArmCondLt);
910 ArmLIR* branch2 = opCondBranch(cUnit, kArmCondGt);
911 opRegRegReg(cUnit, kOpSub, rlTemp.lowReg, rlSrc1.lowReg, rlSrc2.lowReg);
912 ArmLIR* branch3 = opCondBranch(cUnit, kArmCondEq);
913
914 genIT(cUnit, kArmCondHi, "E");
915 newLIR2(cUnit, kThumb2MovImmShift, rlTemp.lowReg, modifiedImmediate(-1));
916 loadConstant(cUnit, rlTemp.lowReg, 1);
917 genBarrier(cUnit);
918
919 target2 = newLIR0(cUnit, kArmPseudoTargetLabel);
920 target2->defMask = -1;
921 opRegReg(cUnit, kOpNeg, rlTemp.lowReg, rlTemp.lowReg);
922
923 target1 = newLIR0(cUnit, kArmPseudoTargetLabel);
924 target1->defMask = -1;
925
926 storeValue(cUnit, rlDest, rlTemp);
927
928 branch1->generic.target = (LIR*)target1;
929 branch2->generic.target = (LIR*)target2;
930 branch3->generic.target = branch1->generic.target;
931}
932
933static void genMultiplyByTwoBitMultiplier(CompilationUnit* cUnit,
934 RegLocation rlSrc, RegLocation rlResult, int lit,
935 int firstBit, int secondBit)
936{
937 opRegRegRegShift(cUnit, kOpAdd, rlResult.lowReg, rlSrc.lowReg, rlSrc.lowReg,
938 encodeShift(kArmLsl, secondBit - firstBit));
939 if (firstBit != 0) {
940 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlResult.lowReg, firstBit);
941 }
942}
943
944static bool genConversionCall(CompilationUnit* cUnit, MIR* mir, int funcOffset,
945 int srcSize, int tgtSize)
946{
947 /*
948 * Don't optimize the register usage since it calls out to support
949 * functions
950 */
951 RegLocation rlSrc;
952 RegLocation rlDest;
953 oatFlushAllRegs(cUnit); /* Send everything to home location */
954 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
955 if (srcSize == 1) {
956 rlSrc = oatGetSrc(cUnit, mir, 0);
957 loadValueDirectFixed(cUnit, rlSrc, r0);
958 } else {
959 rlSrc = oatGetSrcWide(cUnit, mir, 0, 1);
960 loadValueDirectWideFixed(cUnit, rlSrc, r0, r1);
961 }
962 opReg(cUnit, kOpBlx, rLR);
963 oatClobberCallRegs(cUnit);
964 if (tgtSize == 1) {
965 RegLocation rlResult;
966 rlDest = oatGetDest(cUnit, mir, 0);
967 rlResult = oatGetReturn(cUnit);
968 storeValue(cUnit, rlDest, rlResult);
969 } else {
970 RegLocation rlResult;
971 rlDest = oatGetDestWide(cUnit, mir, 0, 1);
972 rlResult = oatGetReturnWide(cUnit);
973 storeValueWide(cUnit, rlDest, rlResult);
974 }
975 return false;
976}
977
978static bool genArithOpFloatPortable(CompilationUnit* cUnit, MIR* mir,
979 RegLocation rlDest, RegLocation rlSrc1,
980 RegLocation rlSrc2)
981{
982 RegLocation rlResult;
983 int funcOffset;
984
985 switch (mir->dalvikInsn.opcode) {
986 case OP_ADD_FLOAT_2ADDR:
987 case OP_ADD_FLOAT:
988 funcOffset = OFFSETOF_MEMBER(Thread, pFadd);
989 break;
990 case OP_SUB_FLOAT_2ADDR:
991 case OP_SUB_FLOAT:
992 funcOffset = OFFSETOF_MEMBER(Thread, pFsub);
993 break;
994 case OP_DIV_FLOAT_2ADDR:
995 case OP_DIV_FLOAT:
996 funcOffset = OFFSETOF_MEMBER(Thread, pFdiv);
997 break;
998 case OP_MUL_FLOAT_2ADDR:
999 case OP_MUL_FLOAT:
1000 funcOffset = OFFSETOF_MEMBER(Thread, pFmul);
1001 break;
1002 case OP_REM_FLOAT_2ADDR:
1003 case OP_REM_FLOAT:
1004 funcOffset = OFFSETOF_MEMBER(Thread, pFmodf);
1005 break;
1006 case OP_NEG_FLOAT: {
1007 genNegFloat(cUnit, rlDest, rlSrc1);
1008 return false;
1009 }
1010 default:
1011 return true;
1012 }
1013 oatFlushAllRegs(cUnit); /* Send everything to home location */
1014 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1015 loadValueDirectFixed(cUnit, rlSrc1, r0);
1016 loadValueDirectFixed(cUnit, rlSrc2, r1);
1017 opReg(cUnit, kOpBlx, rLR);
1018 oatClobberCallRegs(cUnit);
1019 rlResult = oatGetReturn(cUnit);
1020 storeValue(cUnit, rlDest, rlResult);
1021 return false;
1022}
1023
1024static bool genArithOpDoublePortable(CompilationUnit* cUnit, MIR* mir,
1025 RegLocation rlDest, RegLocation rlSrc1,
1026 RegLocation rlSrc2)
1027{
1028 RegLocation rlResult;
1029 int funcOffset;
1030
1031 switch (mir->dalvikInsn.opcode) {
1032 case OP_ADD_DOUBLE_2ADDR:
1033 case OP_ADD_DOUBLE:
1034 funcOffset = OFFSETOF_MEMBER(Thread, pDadd);
1035 break;
1036 case OP_SUB_DOUBLE_2ADDR:
1037 case OP_SUB_DOUBLE:
1038 funcOffset = OFFSETOF_MEMBER(Thread, pDsub);
1039 break;
1040 case OP_DIV_DOUBLE_2ADDR:
1041 case OP_DIV_DOUBLE:
1042 funcOffset = OFFSETOF_MEMBER(Thread, pDdiv);
1043 break;
1044 case OP_MUL_DOUBLE_2ADDR:
1045 case OP_MUL_DOUBLE:
1046 funcOffset = OFFSETOF_MEMBER(Thread, pDmul);
1047 break;
1048 case OP_REM_DOUBLE_2ADDR:
1049 case OP_REM_DOUBLE:
1050 funcOffset = OFFSETOF_MEMBER(Thread, pFmod);
1051 break;
1052 case OP_NEG_DOUBLE: {
1053 genNegDouble(cUnit, rlDest, rlSrc1);
1054 return false;
1055 }
1056 default:
1057 return true;
1058 }
1059 oatFlushAllRegs(cUnit); /* Send everything to home location */
1060 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1061 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1062 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1063 opReg(cUnit, kOpBlx, rLR);
1064 oatClobberCallRegs(cUnit);
1065 rlResult = oatGetReturnWide(cUnit);
1066 storeValueWide(cUnit, rlDest, rlResult);
1067 return false;
1068}
1069
1070static bool genConversionPortable(CompilationUnit* cUnit, MIR* mir)
1071{
1072 Opcode opcode = mir->dalvikInsn.opcode;
1073
1074 switch (opcode) {
1075 case OP_INT_TO_FLOAT:
1076 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2f),
1077 1, 1);
1078 case OP_FLOAT_TO_INT:
1079 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2iz),
1080 1, 1);
1081 case OP_DOUBLE_TO_FLOAT:
1082 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2f),
1083 2, 1);
1084 case OP_FLOAT_TO_DOUBLE:
1085 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pF2d),
1086 1, 2);
1087 case OP_INT_TO_DOUBLE:
1088 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pI2d),
1089 1, 2);
1090 case OP_DOUBLE_TO_INT:
1091 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pD2iz),
1092 2, 1);
1093 case OP_FLOAT_TO_LONG:
1094 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
buzbee1b4c8592011-08-31 10:43:51 -07001095 pF2l), 1, 2);
buzbee67bf8852011-08-17 17:51:35 -07001096 case OP_LONG_TO_FLOAT:
1097 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2f),
1098 2, 1);
1099 case OP_DOUBLE_TO_LONG:
1100 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread,
buzbee1b4c8592011-08-31 10:43:51 -07001101 pD2l), 2, 2);
buzbee67bf8852011-08-17 17:51:35 -07001102 case OP_LONG_TO_DOUBLE:
1103 return genConversionCall(cUnit, mir, OFFSETOF_MEMBER(Thread, pL2d),
1104 2, 2);
1105 default:
1106 return true;
1107 }
1108 return false;
1109}
1110
1111/* Generate conditional branch instructions */
1112static ArmLIR* genConditionalBranch(CompilationUnit* cUnit,
1113 ArmConditionCode cond,
1114 ArmLIR* target)
1115{
1116 ArmLIR* branch = opCondBranch(cUnit, cond);
1117 branch->generic.target = (LIR*) target;
1118 return branch;
1119}
1120
1121/* Generate a unconditional branch to go to the interpreter */
1122static inline ArmLIR* genTrap(CompilationUnit* cUnit, int dOffset,
1123 ArmLIR* pcrLabel)
1124{
1125 ArmLIR* branch = opNone(cUnit, kOpUncondBr);
1126 return genCheckCommon(cUnit, dOffset, branch, pcrLabel);
1127}
1128
1129/*
1130 * Generate array store
1131 *
1132 */
buzbee1b4c8592011-08-31 10:43:51 -07001133static void genArrayObjPut(CompilationUnit* cUnit, MIR* mir,
1134 RegLocation rlArray, RegLocation rlIndex,
1135 RegLocation rlSrc, int scale)
buzbee67bf8852011-08-17 17:51:35 -07001136{
1137 RegisterClass regClass = oatRegClassBySize(kWord);
buzbeec143c552011-08-20 17:38:58 -07001138 int lenOffset = Array::LengthOffset().Int32Value();
1139 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001140
1141 /* Make sure it's a legal object Put. Use direct regs at first */
1142 loadValueDirectFixed(cUnit, rlArray, r1);
1143 loadValueDirectFixed(cUnit, rlSrc, r0);
1144
1145 /* null array object? */
1146 ArmLIR* pcrLabel = NULL;
1147
1148 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1149 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, r1,
1150 mir->offset, NULL);
1151 }
1152 loadWordDisp(cUnit, rSELF,
buzbee1b4c8592011-08-31 10:43:51 -07001153 OFFSETOF_MEMBER(Thread, pCanPutArrayElementFromCode), rLR);
buzbee67bf8852011-08-17 17:51:35 -07001154 /* Get the array's clazz */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -07001155 loadWordDisp(cUnit, r1, Object::ClassOffset().Int32Value(), r1);
buzbee67bf8852011-08-17 17:51:35 -07001156 /* Get the object's clazz */
Ian Rogers0cfe1fb2011-08-26 03:29:44 -07001157 loadWordDisp(cUnit, r0, Object::ClassOffset().Int32Value(), r0);
buzbee67bf8852011-08-17 17:51:35 -07001158 opReg(cUnit, kOpBlx, rLR);
1159 oatClobberCallRegs(cUnit);
1160
1161 // Now, redo loadValues in case they didn't survive the call
1162
1163 int regPtr;
1164 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1165 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1166
1167 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1168 oatClobber(cUnit, rlArray.lowReg);
1169 regPtr = rlArray.lowReg;
1170 } else {
1171 regPtr = oatAllocTemp(cUnit);
1172 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1173 }
1174
1175 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1176 int regLen = oatAllocTemp(cUnit);
1177 //NOTE: max live temps(4) here.
1178 /* Get len */
1179 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1180 /* regPtr -> array data */
1181 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1182 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1183 pcrLabel);
1184 oatFreeTemp(cUnit, regLen);
1185 } else {
1186 /* regPtr -> array data */
1187 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1188 }
1189 /* at this point, regPtr points to array, 2 live temps */
1190 rlSrc = loadValue(cUnit, rlSrc, regClass);
1191 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1192 scale, kWord);
1193}
1194
1195/*
1196 * Generate array load
1197 */
1198static void genArrayGet(CompilationUnit* cUnit, MIR* mir, OpSize size,
1199 RegLocation rlArray, RegLocation rlIndex,
1200 RegLocation rlDest, int scale)
1201{
1202 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001203 int lenOffset = Array::LengthOffset().Int32Value();
1204 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001205 RegLocation rlResult;
1206 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1207 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1208 int regPtr;
1209
1210 /* null object? */
1211 ArmLIR* pcrLabel = NULL;
1212
1213 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1214 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow,
1215 rlArray.lowReg, mir->offset, NULL);
1216 }
1217
1218 regPtr = oatAllocTemp(cUnit);
1219
1220 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1221 int regLen = oatAllocTemp(cUnit);
1222 /* Get len */
1223 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1224 /* regPtr -> array data */
1225 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1226 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1227 pcrLabel);
1228 oatFreeTemp(cUnit, regLen);
1229 } else {
1230 /* regPtr -> array data */
1231 opRegRegImm(cUnit, kOpAdd, regPtr, rlArray.lowReg, dataOffset);
1232 }
1233 if ((size == kLong) || (size == kDouble)) {
1234 if (scale) {
1235 int rNewIndex = oatAllocTemp(cUnit);
1236 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1237 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1238 oatFreeTemp(cUnit, rNewIndex);
1239 } else {
1240 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1241 }
1242 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1243
1244 loadPair(cUnit, regPtr, rlResult.lowReg, rlResult.highReg);
1245
1246 oatFreeTemp(cUnit, regPtr);
1247 storeValueWide(cUnit, rlDest, rlResult);
1248 } else {
1249 rlResult = oatEvalLoc(cUnit, rlDest, regClass, true);
1250
1251 loadBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlResult.lowReg,
1252 scale, size);
1253
1254 oatFreeTemp(cUnit, regPtr);
1255 storeValue(cUnit, rlDest, rlResult);
1256 }
1257}
1258
1259/*
1260 * Generate array store
1261 *
1262 */
1263static void genArrayPut(CompilationUnit* cUnit, MIR* mir, OpSize size,
1264 RegLocation rlArray, RegLocation rlIndex,
1265 RegLocation rlSrc, int scale)
1266{
1267 RegisterClass regClass = oatRegClassBySize(size);
buzbeec143c552011-08-20 17:38:58 -07001268 int lenOffset = Array::LengthOffset().Int32Value();
1269 int dataOffset = Array::DataOffset().Int32Value();
buzbee67bf8852011-08-17 17:51:35 -07001270
1271 int regPtr;
1272 rlArray = loadValue(cUnit, rlArray, kCoreReg);
1273 rlIndex = loadValue(cUnit, rlIndex, kCoreReg);
1274
1275 if (oatIsTemp(cUnit, rlArray.lowReg)) {
1276 oatClobber(cUnit, rlArray.lowReg);
1277 regPtr = rlArray.lowReg;
1278 } else {
1279 regPtr = oatAllocTemp(cUnit);
1280 genRegCopy(cUnit, regPtr, rlArray.lowReg);
1281 }
1282
1283 /* null object? */
1284 ArmLIR* pcrLabel = NULL;
1285
1286 if (!(mir->OptimizationFlags & MIR_IGNORE_NULL_CHECK)) {
1287 pcrLabel = genNullCheck(cUnit, rlArray.sRegLow, rlArray.lowReg,
1288 mir->offset, NULL);
1289 }
1290
1291 if (!(mir->OptimizationFlags & MIR_IGNORE_RANGE_CHECK)) {
1292 int regLen = oatAllocTemp(cUnit);
1293 //NOTE: max live temps(4) here.
1294 /* Get len */
1295 loadWordDisp(cUnit, rlArray.lowReg, lenOffset, regLen);
1296 /* regPtr -> array data */
1297 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1298 genBoundsCheck(cUnit, rlIndex.lowReg, regLen, mir->offset,
1299 pcrLabel);
1300 oatFreeTemp(cUnit, regLen);
1301 } else {
1302 /* regPtr -> array data */
1303 opRegImm(cUnit, kOpAdd, regPtr, dataOffset);
1304 }
1305 /* at this point, regPtr points to array, 2 live temps */
1306 if ((size == kLong) || (size == kDouble)) {
1307 //TODO: need specific wide routine that can handle fp regs
1308 if (scale) {
1309 int rNewIndex = oatAllocTemp(cUnit);
1310 opRegRegImm(cUnit, kOpLsl, rNewIndex, rlIndex.lowReg, scale);
1311 opRegReg(cUnit, kOpAdd, regPtr, rNewIndex);
1312 oatFreeTemp(cUnit, rNewIndex);
1313 } else {
1314 opRegReg(cUnit, kOpAdd, regPtr, rlIndex.lowReg);
1315 }
1316 rlSrc = loadValueWide(cUnit, rlSrc, regClass);
1317
1318 storePair(cUnit, regPtr, rlSrc.lowReg, rlSrc.highReg);
1319
1320 oatFreeTemp(cUnit, regPtr);
1321 } else {
1322 rlSrc = loadValue(cUnit, rlSrc, regClass);
1323
1324 storeBaseIndexed(cUnit, regPtr, rlIndex.lowReg, rlSrc.lowReg,
1325 scale, size);
1326 }
1327}
1328
1329static bool genShiftOpLong(CompilationUnit* cUnit, MIR* mir,
1330 RegLocation rlDest, RegLocation rlSrc1,
1331 RegLocation rlShift)
1332{
buzbee54330722011-08-23 16:46:55 -07001333 int funcOffset;
buzbee67bf8852011-08-17 17:51:35 -07001334
buzbee67bf8852011-08-17 17:51:35 -07001335 switch( mir->dalvikInsn.opcode) {
1336 case OP_SHL_LONG:
1337 case OP_SHL_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001338 funcOffset = OFFSETOF_MEMBER(Thread, pShlLong);
buzbee67bf8852011-08-17 17:51:35 -07001339 break;
1340 case OP_SHR_LONG:
1341 case OP_SHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001342 funcOffset = OFFSETOF_MEMBER(Thread, pShrLong);
buzbee67bf8852011-08-17 17:51:35 -07001343 break;
1344 case OP_USHR_LONG:
1345 case OP_USHR_LONG_2ADDR:
buzbee54330722011-08-23 16:46:55 -07001346 funcOffset = OFFSETOF_MEMBER(Thread, pUshrLong);
buzbee67bf8852011-08-17 17:51:35 -07001347 break;
1348 default:
buzbee54330722011-08-23 16:46:55 -07001349 LOG(FATAL) << "Unexpected case";
buzbee67bf8852011-08-17 17:51:35 -07001350 return true;
1351 }
buzbee54330722011-08-23 16:46:55 -07001352 oatFlushAllRegs(cUnit); /* Send everything to home location */
1353 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1354 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1355 loadValueDirect(cUnit, rlShift, r2);
1356 opReg(cUnit, kOpBlx, rLR);
1357 oatClobberCallRegs(cUnit);
1358 RegLocation rlResult = oatGetReturnWide(cUnit);
buzbee67bf8852011-08-17 17:51:35 -07001359 storeValueWide(cUnit, rlDest, rlResult);
1360 return false;
1361}
1362
1363static bool genArithOpLong(CompilationUnit* cUnit, MIR* mir,
1364 RegLocation rlDest, RegLocation rlSrc1,
1365 RegLocation rlSrc2)
1366{
1367 RegLocation rlResult;
1368 OpKind firstOp = kOpBkpt;
1369 OpKind secondOp = kOpBkpt;
1370 bool callOut = false;
1371 int funcOffset;
1372 int retReg = r0;
1373
1374 switch (mir->dalvikInsn.opcode) {
1375 case OP_NOT_LONG:
1376 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1377 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1378 opRegReg(cUnit, kOpMvn, rlResult.lowReg, rlSrc2.lowReg);
1379 opRegReg(cUnit, kOpMvn, rlResult.highReg, rlSrc2.highReg);
1380 storeValueWide(cUnit, rlDest, rlResult);
1381 return false;
1382 break;
1383 case OP_ADD_LONG:
1384 case OP_ADD_LONG_2ADDR:
1385 firstOp = kOpAdd;
1386 secondOp = kOpAdc;
1387 break;
1388 case OP_SUB_LONG:
1389 case OP_SUB_LONG_2ADDR:
1390 firstOp = kOpSub;
1391 secondOp = kOpSbc;
1392 break;
1393 case OP_MUL_LONG:
1394 case OP_MUL_LONG_2ADDR:
buzbee439c4fa2011-08-27 15:59:07 -07001395 callOut = true;
1396 retReg = r0;
1397 funcOffset = OFFSETOF_MEMBER(Thread, pLmul);
1398 break;
buzbee67bf8852011-08-17 17:51:35 -07001399 case OP_DIV_LONG:
1400 case OP_DIV_LONG_2ADDR:
1401 callOut = true;
1402 retReg = r0;
1403 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1404 break;
1405 /* NOTE - result is in r2/r3 instead of r0/r1 */
1406 case OP_REM_LONG:
1407 case OP_REM_LONG_2ADDR:
1408 callOut = true;
1409 funcOffset = OFFSETOF_MEMBER(Thread, pLdivmod);
1410 retReg = r2;
1411 break;
1412 case OP_AND_LONG_2ADDR:
1413 case OP_AND_LONG:
1414 firstOp = kOpAnd;
1415 secondOp = kOpAnd;
1416 break;
1417 case OP_OR_LONG:
1418 case OP_OR_LONG_2ADDR:
1419 firstOp = kOpOr;
1420 secondOp = kOpOr;
1421 break;
1422 case OP_XOR_LONG:
1423 case OP_XOR_LONG_2ADDR:
1424 firstOp = kOpXor;
1425 secondOp = kOpXor;
1426 break;
1427 case OP_NEG_LONG: {
1428 //TUNING: can improve this using Thumb2 code
1429 int tReg = oatAllocTemp(cUnit);
1430 rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
1431 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1432 loadConstantNoClobber(cUnit, tReg, 0);
1433 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1434 tReg, rlSrc2.lowReg);
1435 opRegReg(cUnit, kOpSbc, tReg, rlSrc2.highReg);
1436 genRegCopy(cUnit, rlResult.highReg, tReg);
1437 storeValueWide(cUnit, rlDest, rlResult);
1438 return false;
1439 }
1440 default:
1441 LOG(FATAL) << "Invalid long arith op";
1442 }
1443 if (!callOut) {
1444 genLong3Addr(cUnit, mir, firstOp, secondOp, rlDest, rlSrc1, rlSrc2);
1445 } else {
1446 // Adjust return regs in to handle case of rem returning r2/r3
1447 oatFlushAllRegs(cUnit); /* Send everything to home location */
1448 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1449 loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
1450 loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
1451 opReg(cUnit, kOpBlx, rLR);
1452 oatClobberCallRegs(cUnit);
1453 if (retReg == r0)
1454 rlResult = oatGetReturnWide(cUnit);
1455 else
1456 rlResult = oatGetReturnWideAlt(cUnit);
1457 storeValueWide(cUnit, rlDest, rlResult);
1458 }
1459 return false;
1460}
1461
1462static bool genArithOpInt(CompilationUnit* cUnit, MIR* mir,
1463 RegLocation rlDest, RegLocation rlSrc1,
1464 RegLocation rlSrc2)
1465{
1466 OpKind op = kOpBkpt;
1467 bool callOut = false;
1468 bool checkZero = false;
1469 bool unary = false;
1470 int retReg = r0;
1471 int funcOffset;
1472 RegLocation rlResult;
1473 bool shiftOp = false;
1474
1475 switch (mir->dalvikInsn.opcode) {
1476 case OP_NEG_INT:
1477 op = kOpNeg;
1478 unary = true;
1479 break;
1480 case OP_NOT_INT:
1481 op = kOpMvn;
1482 unary = true;
1483 break;
1484 case OP_ADD_INT:
1485 case OP_ADD_INT_2ADDR:
1486 op = kOpAdd;
1487 break;
1488 case OP_SUB_INT:
1489 case OP_SUB_INT_2ADDR:
1490 op = kOpSub;
1491 break;
1492 case OP_MUL_INT:
1493 case OP_MUL_INT_2ADDR:
1494 op = kOpMul;
1495 break;
1496 case OP_DIV_INT:
1497 case OP_DIV_INT_2ADDR:
1498 callOut = true;
1499 checkZero = true;
1500 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1501 retReg = r0;
1502 break;
1503 /* NOTE: returns in r1 */
1504 case OP_REM_INT:
1505 case OP_REM_INT_2ADDR:
1506 callOut = true;
1507 checkZero = true;
1508 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1509 retReg = r1;
1510 break;
1511 case OP_AND_INT:
1512 case OP_AND_INT_2ADDR:
1513 op = kOpAnd;
1514 break;
1515 case OP_OR_INT:
1516 case OP_OR_INT_2ADDR:
1517 op = kOpOr;
1518 break;
1519 case OP_XOR_INT:
1520 case OP_XOR_INT_2ADDR:
1521 op = kOpXor;
1522 break;
1523 case OP_SHL_INT:
1524 case OP_SHL_INT_2ADDR:
1525 shiftOp = true;
1526 op = kOpLsl;
1527 break;
1528 case OP_SHR_INT:
1529 case OP_SHR_INT_2ADDR:
1530 shiftOp = true;
1531 op = kOpAsr;
1532 break;
1533 case OP_USHR_INT:
1534 case OP_USHR_INT_2ADDR:
1535 shiftOp = true;
1536 op = kOpLsr;
1537 break;
1538 default:
1539 LOG(FATAL) << "Invalid word arith op: " <<
1540 (int)mir->dalvikInsn.opcode;
1541 }
1542 if (!callOut) {
1543 rlSrc1 = loadValue(cUnit, rlSrc1, kCoreReg);
1544 if (unary) {
1545 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1546 opRegReg(cUnit, op, rlResult.lowReg,
1547 rlSrc1.lowReg);
1548 } else {
1549 rlSrc2 = loadValue(cUnit, rlSrc2, kCoreReg);
1550 if (shiftOp) {
1551 int tReg = oatAllocTemp(cUnit);
1552 opRegRegImm(cUnit, kOpAnd, tReg, rlSrc2.lowReg, 31);
1553 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1554 opRegRegReg(cUnit, op, rlResult.lowReg,
1555 rlSrc1.lowReg, tReg);
1556 oatFreeTemp(cUnit, tReg);
1557 } else {
1558 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1559 opRegRegReg(cUnit, op, rlResult.lowReg,
1560 rlSrc1.lowReg, rlSrc2.lowReg);
1561 }
1562 }
1563 storeValue(cUnit, rlDest, rlResult);
1564 } else {
1565 RegLocation rlResult;
1566 oatFlushAllRegs(cUnit); /* Send everything to home location */
1567 loadValueDirectFixed(cUnit, rlSrc2, r1);
1568 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1569 loadValueDirectFixed(cUnit, rlSrc1, r0);
1570 if (checkZero) {
1571 genNullCheck(cUnit, rlSrc2.sRegLow, r1, mir->offset, NULL);
1572 }
1573 opReg(cUnit, kOpBlx, rLR);
1574 oatClobberCallRegs(cUnit);
1575 if (retReg == r0)
1576 rlResult = oatGetReturn(cUnit);
1577 else
1578 rlResult = oatGetReturnAlt(cUnit);
1579 storeValue(cUnit, rlDest, rlResult);
1580 }
1581 return false;
1582}
1583
buzbee67bf8852011-08-17 17:51:35 -07001584/*
1585 * Fetch *self->info.breakFlags. If the breakFlags are non-zero,
1586 * punt to the interpreter.
1587 */
1588static void genSuspendPoll(CompilationUnit* cUnit, MIR* mir)
1589{
1590 UNIMPLEMENTED(WARNING);
1591#if 0
1592 int rTemp = oatAllocTemp(cUnit);
1593 ArmLIR* ld;
1594 ld = loadBaseDisp(cUnit, NULL, rSELF,
1595 offsetof(Thread, interpBreak.ctl.breakFlags),
1596 rTemp, kUnsignedByte, INVALID_SREG);
1597 setMemRefType(ld, true /* isLoad */, kMustNotAlias);
1598 genRegImmCheck(cUnit, kArmCondNe, rTemp, 0, mir->offset, NULL);
1599#endif
1600}
1601
1602/*
1603 * The following are the first-level codegen routines that analyze the format
1604 * of each bytecode then either dispatch special purpose codegen routines
1605 * or produce corresponding Thumb instructions directly.
1606 */
1607
1608static bool isPowerOfTwo(int x)
1609{
1610 return (x & (x - 1)) == 0;
1611}
1612
1613// Returns true if no more than two bits are set in 'x'.
1614static bool isPopCountLE2(unsigned int x)
1615{
1616 x &= x - 1;
1617 return (x & (x - 1)) == 0;
1618}
1619
1620// Returns the index of the lowest set bit in 'x'.
1621static int lowestSetBit(unsigned int x) {
1622 int bit_posn = 0;
1623 while ((x & 0xf) == 0) {
1624 bit_posn += 4;
1625 x >>= 4;
1626 }
1627 while ((x & 1) == 0) {
1628 bit_posn++;
1629 x >>= 1;
1630 }
1631 return bit_posn;
1632}
1633
1634// Returns true if it added instructions to 'cUnit' to divide 'rlSrc' by 'lit'
1635// and store the result in 'rlDest'.
1636static bool handleEasyDivide(CompilationUnit* cUnit, Opcode dalvikOpcode,
1637 RegLocation rlSrc, RegLocation rlDest, int lit)
1638{
1639 if (lit < 2 || !isPowerOfTwo(lit)) {
1640 return false;
1641 }
1642 int k = lowestSetBit(lit);
1643 if (k >= 30) {
1644 // Avoid special cases.
1645 return false;
1646 }
1647 bool div = (dalvikOpcode == OP_DIV_INT_LIT8 ||
1648 dalvikOpcode == OP_DIV_INT_LIT16);
1649 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1650 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1651 if (div) {
1652 int tReg = oatAllocTemp(cUnit);
1653 if (lit == 2) {
1654 // Division by 2 is by far the most common division by constant.
1655 opRegRegImm(cUnit, kOpLsr, tReg, rlSrc.lowReg, 32 - k);
1656 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1657 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1658 } else {
1659 opRegRegImm(cUnit, kOpAsr, tReg, rlSrc.lowReg, 31);
1660 opRegRegImm(cUnit, kOpLsr, tReg, tReg, 32 - k);
1661 opRegRegReg(cUnit, kOpAdd, tReg, tReg, rlSrc.lowReg);
1662 opRegRegImm(cUnit, kOpAsr, rlResult.lowReg, tReg, k);
1663 }
1664 } else {
1665 int cReg = oatAllocTemp(cUnit);
1666 loadConstant(cUnit, cReg, lit - 1);
1667 int tReg1 = oatAllocTemp(cUnit);
1668 int tReg2 = oatAllocTemp(cUnit);
1669 if (lit == 2) {
1670 opRegRegImm(cUnit, kOpLsr, tReg1, rlSrc.lowReg, 32 - k);
1671 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1672 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1673 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1674 } else {
1675 opRegRegImm(cUnit, kOpAsr, tReg1, rlSrc.lowReg, 31);
1676 opRegRegImm(cUnit, kOpLsr, tReg1, tReg1, 32 - k);
1677 opRegRegReg(cUnit, kOpAdd, tReg2, tReg1, rlSrc.lowReg);
1678 opRegRegReg(cUnit, kOpAnd, tReg2, tReg2, cReg);
1679 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg2, tReg1);
1680 }
1681 }
1682 storeValue(cUnit, rlDest, rlResult);
1683 return true;
1684}
1685
1686// Returns true if it added instructions to 'cUnit' to multiply 'rlSrc' by 'lit'
1687// and store the result in 'rlDest'.
1688static bool handleEasyMultiply(CompilationUnit* cUnit,
1689 RegLocation rlSrc, RegLocation rlDest, int lit)
1690{
1691 // Can we simplify this multiplication?
1692 bool powerOfTwo = false;
1693 bool popCountLE2 = false;
1694 bool powerOfTwoMinusOne = false;
1695 if (lit < 2) {
1696 // Avoid special cases.
1697 return false;
1698 } else if (isPowerOfTwo(lit)) {
1699 powerOfTwo = true;
1700 } else if (isPopCountLE2(lit)) {
1701 popCountLE2 = true;
1702 } else if (isPowerOfTwo(lit + 1)) {
1703 powerOfTwoMinusOne = true;
1704 } else {
1705 return false;
1706 }
1707 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1708 RegLocation rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1709 if (powerOfTwo) {
1710 // Shift.
1711 opRegRegImm(cUnit, kOpLsl, rlResult.lowReg, rlSrc.lowReg,
1712 lowestSetBit(lit));
1713 } else if (popCountLE2) {
1714 // Shift and add and shift.
1715 int firstBit = lowestSetBit(lit);
1716 int secondBit = lowestSetBit(lit ^ (1 << firstBit));
1717 genMultiplyByTwoBitMultiplier(cUnit, rlSrc, rlResult, lit,
1718 firstBit, secondBit);
1719 } else {
1720 // Reverse subtract: (src << (shift + 1)) - src.
1721 assert(powerOfTwoMinusOne);
1722 // TODO: rsb dst, src, src lsl#lowestSetBit(lit + 1)
1723 int tReg = oatAllocTemp(cUnit);
1724 opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lowestSetBit(lit + 1));
1725 opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
1726 }
1727 storeValue(cUnit, rlDest, rlResult);
1728 return true;
1729}
1730
1731static bool genArithOpIntLit(CompilationUnit* cUnit, MIR* mir,
1732 RegLocation rlDest, RegLocation rlSrc,
1733 int lit)
1734{
1735 Opcode dalvikOpcode = mir->dalvikInsn.opcode;
1736 RegLocation rlResult;
1737 OpKind op = (OpKind)0; /* Make gcc happy */
1738 int shiftOp = false;
1739 bool isDiv = false;
1740 int funcOffset;
1741
1742 switch (dalvikOpcode) {
1743 case OP_RSUB_INT_LIT8:
1744 case OP_RSUB_INT: {
1745 int tReg;
1746 //TUNING: add support for use of Arm rsub op
1747 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1748 tReg = oatAllocTemp(cUnit);
1749 loadConstant(cUnit, tReg, lit);
1750 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1751 opRegRegReg(cUnit, kOpSub, rlResult.lowReg,
1752 tReg, rlSrc.lowReg);
1753 storeValue(cUnit, rlDest, rlResult);
1754 return false;
1755 break;
1756 }
1757
1758 case OP_ADD_INT_LIT8:
1759 case OP_ADD_INT_LIT16:
1760 op = kOpAdd;
1761 break;
1762 case OP_MUL_INT_LIT8:
1763 case OP_MUL_INT_LIT16: {
1764 if (handleEasyMultiply(cUnit, rlSrc, rlDest, lit)) {
1765 return false;
1766 }
1767 op = kOpMul;
1768 break;
1769 }
1770 case OP_AND_INT_LIT8:
1771 case OP_AND_INT_LIT16:
1772 op = kOpAnd;
1773 break;
1774 case OP_OR_INT_LIT8:
1775 case OP_OR_INT_LIT16:
1776 op = kOpOr;
1777 break;
1778 case OP_XOR_INT_LIT8:
1779 case OP_XOR_INT_LIT16:
1780 op = kOpXor;
1781 break;
1782 case OP_SHL_INT_LIT8:
1783 lit &= 31;
1784 shiftOp = true;
1785 op = kOpLsl;
1786 break;
1787 case OP_SHR_INT_LIT8:
1788 lit &= 31;
1789 shiftOp = true;
1790 op = kOpAsr;
1791 break;
1792 case OP_USHR_INT_LIT8:
1793 lit &= 31;
1794 shiftOp = true;
1795 op = kOpLsr;
1796 break;
1797
1798 case OP_DIV_INT_LIT8:
1799 case OP_DIV_INT_LIT16:
1800 case OP_REM_INT_LIT8:
1801 case OP_REM_INT_LIT16:
1802 if (lit == 0) {
1803 UNIMPLEMENTED(FATAL);
1804 // FIXME: generate an explicit throw here
1805 return false;
1806 }
1807 if (handleEasyDivide(cUnit, dalvikOpcode, rlSrc, rlDest, lit)) {
1808 return false;
1809 }
1810 oatFlushAllRegs(cUnit); /* Everything to home location */
1811 loadValueDirectFixed(cUnit, rlSrc, r0);
1812 oatClobber(cUnit, r0);
1813 if ((dalvikOpcode == OP_DIV_INT_LIT8) ||
1814 (dalvikOpcode == OP_DIV_INT_LIT16)) {
1815 funcOffset = OFFSETOF_MEMBER(Thread, pIdiv);
1816 isDiv = true;
1817 } else {
1818 funcOffset = OFFSETOF_MEMBER(Thread, pIdivmod);
1819 isDiv = false;
1820 }
1821 loadWordDisp(cUnit, rSELF, funcOffset, rLR);
1822 loadConstant(cUnit, r1, lit);
1823 opReg(cUnit, kOpBlx, rLR);
1824 oatClobberCallRegs(cUnit);
1825 if (isDiv)
1826 rlResult = oatGetReturn(cUnit);
1827 else
1828 rlResult = oatGetReturnAlt(cUnit);
1829 storeValue(cUnit, rlDest, rlResult);
1830 return false;
1831 break;
1832 default:
1833 return true;
1834 }
1835 rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
1836 rlResult = oatEvalLoc(cUnit, rlDest, kCoreReg, true);
1837 // Avoid shifts by literal 0 - no support in Thumb. Change to copy
1838 if (shiftOp && (lit == 0)) {
1839 genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
1840 } else {
1841 opRegRegImm(cUnit, op, rlResult.lowReg, rlSrc.lowReg, lit);
1842 }
1843 storeValue(cUnit, rlDest, rlResult);
1844 return false;
1845}
1846
1847/* Architectural-specific debugging helpers go here */
1848void oatArchDump(void)
1849{
1850 /* Print compiled opcode in this VM instance */
1851 int i, start, streak;
1852 char buf[1024];
1853
1854 streak = i = 0;
1855 buf[0] = 0;
1856 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1857 i++;
1858 }
1859 if (i == kNumPackedOpcodes) {
1860 return;
1861 }
1862 for (start = i++, streak = 1; i < kNumPackedOpcodes; i++) {
1863 if (opcodeCoverage[i]) {
1864 streak++;
1865 } else {
1866 if (streak == 1) {
1867 sprintf(buf+strlen(buf), "%x,", start);
1868 } else {
1869 sprintf(buf+strlen(buf), "%x-%x,", start, start + streak - 1);
1870 }
1871 streak = 0;
1872 while (opcodeCoverage[i] == 0 && i < kNumPackedOpcodes) {
1873 i++;
1874 }
1875 if (i < kNumPackedOpcodes) {
1876 streak = 1;
1877 start = i;
1878 }
1879 }
1880 }
1881 if (streak) {
1882 if (streak == 1) {
1883 sprintf(buf+strlen(buf), "%x", start);
1884 } else {
1885 sprintf(buf+strlen(buf), "%x-%x", start, start + streak - 1);
1886 }
1887 }
1888 if (strlen(buf)) {
1889 LOG(INFO) << "dalvik.vm.oat.op = " << buf;
1890 }
1891}