blob: 2fc1603b61f31a2ab7731af105619dd3976b2b79 [file] [log] [blame]
buzbee67bf8852011-08-17 17:51:35 -07001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains Arm-specific register alloction support.
19 */
20
21#include "../../CompilerUtility.h"
22#include "../../CompilerIR.h"
23#include "../..//Dataflow.h"
24#include "ArmLIR.h"
25#include "Codegen.h"
26#include "../Ralloc.h"
27
28/*
29 * Placeholder routine until we do proper register allocation.
30 */
31
32typedef struct RefCounts {
33 int count;
34 int sReg;
35 bool doubleStart; // Starting vReg for a double
36} RefCounts;
37
38/*
39 * USE SSA names to count references of base Dalvik vRegs. Also,
40 * mark "wide" in the first of wide SSA locationRec pairs.
41 */
42static void countRefs(CompilationUnit *cUnit, BasicBlock* bb,
43 RefCounts* counts, bool fp)
44{
45 MIR* mir;
46 if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
47 bb->blockType != kExitBlock)
48 return;
49
50 for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
51 SSARepresentation *ssaRep = mir->ssaRep;
52 if (ssaRep) {
53 int i;
54 int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode];
55 if (fp) {
56 // Mark 1st reg of double pairs
57 int first = 0;
58 int sReg;
59 if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) {
60 sReg = DECODE_REG(
61 oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0]));
62 counts[sReg].doubleStart = true;
63 cUnit->regLocation[ssaRep->defs[0]].wide = true;
64 }
65 if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) {
66 sReg = DECODE_REG(
67 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
68 counts[sReg].doubleStart = true;
69 cUnit->regLocation[ssaRep->uses[first]].wide = true;
70 first += 2;
71 }
72 if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) {
73 sReg = DECODE_REG(
74 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
75 counts[sReg].doubleStart = true;
76 cUnit->regLocation[ssaRep->uses[first]].wide = true;
77 first += 2;
78 }
79 if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) {
80 sReg = DECODE_REG(
81 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
82 counts[sReg].doubleStart = true;
83 cUnit->regLocation[ssaRep->uses[first]].wide = true;
84 }
85 }
86 for (i=0; i< ssaRep->numUses; i++) {
87 int origSreg = DECODE_REG(
88 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]));
89 assert(origSreg < cUnit->method->registersSize);
90 bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false;
91 if (fp == fpUse) {
92 counts[origSreg].count++;
93 }
94 }
95 for (i=0; i< ssaRep->numDefs; i++) {
96 if (attrs & DF_SETS_CONST) {
97 // CONST opcodes are untyped - don't pollute the counts
98 continue;
99 }
100 int origSreg = DECODE_REG(
101 oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]));
102 assert(origSreg < cUnit->method->registersSize);
103 bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false;
104 if (fp == fpDef) {
105 counts[origSreg].count++;
106 }
107 }
108 }
109 }
110}
111
112/* qsort callback function, sort descending */
113static int sortCounts(const void *val1, const void *val2)
114{
115 const RefCounts* op1 = (const RefCounts*)val1;
116 const RefCounts* op2 = (const RefCounts*)val2;
117 return (op1->count == op2->count) ? 0 : (op1->count < op2->count ? 1 : -1);
118}
119
120static void dumpCounts(const RefCounts* arr, int size, const char* msg)
121{
122 LOG(INFO) << msg;
123 for (int i = 0; i < size; i++) {
124 LOG(INFO) << "sReg[" << arr[i].sReg << "]: " << arr[i].count;
125 }
126}
127
128/*
129 * Note: some portions of this code required even if the kPromoteRegs
130 * optimization is disabled.
131 */
132extern void oatDoPromotion(CompilationUnit* cUnit)
133{
134 int numRegs = cUnit->method->registersSize;
135 int numIns = cUnit->method->insSize;
136
137 /*
138 * Because ins don't have explicit definitions, we need to type
139 * them based on the signature.
140 */
141 if (numIns > 0) {
142 int sReg = numRegs - numIns;
143 const char *shorty = cUnit->method->shorty;
144 shorty++; // Move past return type;
145 while (*shorty) {
146 char arg = *shorty++;
147 // Is it wide?
148 if ((arg == 'D') || (arg == 'J')) {
149 cUnit->regLocation[sReg].wide = true;
150 cUnit->regLocation[sReg+1].fp = cUnit->regLocation[sReg].fp;
151 sReg++; // Skip to next
152 }
153 sReg++;
154 }
155 }
156 /*
157 * TUNING: is leaf? Can't just use "hasInvoke" to determine as some
158 * instructions might call out to C/assembly helper functions. Until
159 * machinery is in place, always spill lr.
160 */
161 cUnit->coreSpillMask |= (1 << rLR);
162 cUnit->numSpills++;
163 /*
164 * Simple hack for testing register allocation. Just do a static
165 * count of the uses of Dalvik registers. Note that we examine
166 * the SSA names, but count based on original Dalvik register name.
167 * Count refs separately based on type in order to give allocation
168 * preference to fp doubles - which must be allocated sequential
169 * physical single fp registers started with an even-numbered
170 * reg.
171 */
172 RefCounts *coreRegs = (RefCounts *)
173 oatNew(sizeof(RefCounts) * numRegs, true);
174 RefCounts *fpRegs = (RefCounts *)
175 oatNew(sizeof(RefCounts) * numRegs, true);
176 for (int i = 0; i < numRegs; i++) {
177 coreRegs[i].sReg = fpRegs[i].sReg = i;
178 }
179 GrowableListIterator iterator;
180 oatGrowableListIteratorInit(&cUnit->blockList, &iterator);
181 while (true) {
182 BasicBlock* bb;
183 bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator);
184 if (bb == NULL) break;
185 countRefs(cUnit, bb, coreRegs, false);
186 countRefs(cUnit, bb, fpRegs, true);
187 }
188
189 /*
190 * Ideally, we'd allocate doubles starting with an even-numbered
191 * register. Bias the counts to try to allocate any vreg that's
192 * used as the start of a pair first.
193 */
194 for (int i = 0; i < numRegs; i++) {
195 if (fpRegs[i].doubleStart) {
196 fpRegs[i].count *= 2;
197 }
198 }
199
200 // Sort the count arrays
201 qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts);
202 qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts);
203
204 // TODO: temp for debugging, too verbose. Remove when unneeded
205 if (cUnit->printMeVerbose) {
206 dumpCounts(coreRegs, numRegs, "coreRegs");
207 dumpCounts(fpRegs, numRegs, "fpRegs");
208 }
209
210 if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
211 // Promote fpRegs
212 for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
213 if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
214 int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
215 fpRegs[i].doubleStart);
216 if (reg < 0) {
217 break; // No more left
218 }
219 }
220 }
221
222 // Promote core regs
223 for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
224 if (cUnit->regLocation[i].location != kLocPhysReg) {
225 int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
226 if (reg < 0) {
227 break; // No more left
228 }
229 }
230 }
231 }
232
233 // Now, update SSA names to new home locations
234 for (int i = 0; i < cUnit->numSSARegs; i++) {
235 int baseSreg = cUnit->regLocation[i].sRegLow;
236 RegLocation *base = &cUnit->regLocation[baseSreg];
237 RegLocation *baseNext = &cUnit->regLocation[baseSreg+1];
238 RegLocation *curr = &cUnit->regLocation[i];
239 if (curr->fp) {
240 /* Single or double, check fpLocation of base */
241 if (base->fpLocation == kLocPhysReg) {
242 if (curr->wide) {
243 /* TUNING: consider alignment during allocation */
244 if (base->fpLowReg & 1) {
245 // Pair must start on even reg - don't promote
246 continue;
247 }
248 /* Make sure upper half is also in reg or skip */
249 if (baseNext->fpLocation != kLocPhysReg) {
250 continue;
251 }
252 curr->highReg = baseNext->fpLowReg;
253 }
254 curr->location = kLocPhysReg;
255 curr->lowReg = base->fpLowReg;
256 curr->home = true;
257 }
258 } else {
259 /* Core or wide */
260 if (base->location == kLocPhysReg) {
261 if (curr->wide) {
262 /* Make sure upper half is also in reg or skip */
263 if (baseNext->location != kLocPhysReg) {
264 continue;
265 }
266 curr->highReg = baseNext->lowReg;
267 }
268 curr->location = kLocPhysReg;
269 curr->lowReg = base->lowReg;
270 curr->home = true;
271 }
272 }
273 }
274}
275
276/* Returns sp-relative offset in bytes */
277extern int oatVRegOffset(CompilationUnit* cUnit, int reg)
278{
279 return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) :
280 cUnit->insOffset + ((reg - cUnit->numRegs) << 2);
281}
282
283
284/* Clobber all regs that might be used by an external C call */
285extern void oatClobberCallRegs(CompilationUnit *cUnit)
286{
287 oatClobber(cUnit, r0);
288 oatClobber(cUnit, r1);
289 oatClobber(cUnit, r2);
290 oatClobber(cUnit, r3);
291 oatClobber(cUnit, r12);
292 oatClobber(cUnit, r14lr);
293}
294
295/* Clobber all of the temps that might be used by a handler. */
296extern void oatClobberHandlerRegs(CompilationUnit* cUnit)
297{
298 //TUNING: reduce the set of regs used by handlers. Only a few need lots.
299 oatClobberCallRegs(cUnit);
300 oatClobber(cUnit, r7);
301 oatClobber(cUnit, r8);
302 oatClobber(cUnit, r10);
303}
304
305extern RegLocation oatGetReturnWide(CompilationUnit* cUnit)
306{
307 RegLocation res = LOC_C_RETURN_WIDE;
308 oatClobber(cUnit, r0);
309 oatClobber(cUnit, r1);
310 oatMarkInUse(cUnit, r0);
311 oatMarkInUse(cUnit, r1);
312 oatMarkPair(cUnit, res.lowReg, res.highReg);
313 return res;
314}
315
316extern RegLocation oatGetReturnWideAlt(CompilationUnit* cUnit)
317{
318 RegLocation res = LOC_C_RETURN_WIDE;
319 res.lowReg = r2;
320 res.highReg = r3;
321 oatClobber(cUnit, r2);
322 oatClobber(cUnit, r3);
323 oatMarkInUse(cUnit, r2);
324 oatMarkInUse(cUnit, r3);
325 oatMarkPair(cUnit, res.lowReg, res.highReg);
326 return res;
327}
328
329extern RegLocation oatGetReturn(CompilationUnit* cUnit)
330{
331 RegLocation res = LOC_C_RETURN;
332 oatClobber(cUnit, r0);
333 oatMarkInUse(cUnit, r0);
334 return res;
335}
336
337extern RegLocation oatGetReturnAlt(CompilationUnit* cUnit)
338{
339 RegLocation res = LOC_C_RETURN;
340 res.lowReg = r1;
341 oatClobber(cUnit, r1);
342 oatMarkInUse(cUnit, r1);
343 return res;
344}