blob: 0a0c9bc45865011f65d65ce8108f11f8c7c66360 [file] [log] [blame]
buzbee67bf8852011-08-17 17:51:35 -07001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains Arm-specific register alloction support.
19 */
20
21#include "../../CompilerUtility.h"
22#include "../../CompilerIR.h"
23#include "../..//Dataflow.h"
24#include "ArmLIR.h"
25#include "Codegen.h"
26#include "../Ralloc.h"
27
28/*
29 * Placeholder routine until we do proper register allocation.
30 */
31
32typedef struct RefCounts {
33 int count;
34 int sReg;
35 bool doubleStart; // Starting vReg for a double
36} RefCounts;
37
38/*
39 * USE SSA names to count references of base Dalvik vRegs. Also,
40 * mark "wide" in the first of wide SSA locationRec pairs.
41 */
42static void countRefs(CompilationUnit *cUnit, BasicBlock* bb,
43 RefCounts* counts, bool fp)
44{
45 MIR* mir;
46 if (bb->blockType != kDalvikByteCode && bb->blockType != kEntryBlock &&
47 bb->blockType != kExitBlock)
48 return;
49
50 for (mir = bb->firstMIRInsn; mir; mir = mir->next) {
51 SSARepresentation *ssaRep = mir->ssaRep;
52 if (ssaRep) {
53 int i;
54 int attrs = oatDataFlowAttributes[mir->dalvikInsn.opcode];
55 if (fp) {
56 // Mark 1st reg of double pairs
57 int first = 0;
58 int sReg;
59 if ((attrs & (DF_DA_WIDE|DF_FP_A)) == (DF_DA_WIDE|DF_FP_A)) {
60 sReg = DECODE_REG(
61 oatConvertSSARegToDalvik(cUnit, ssaRep->defs[0]));
62 counts[sReg].doubleStart = true;
63 cUnit->regLocation[ssaRep->defs[0]].wide = true;
64 }
65 if ((attrs & (DF_UA_WIDE|DF_FP_A)) == (DF_UA_WIDE|DF_FP_A)) {
66 sReg = DECODE_REG(
67 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
68 counts[sReg].doubleStart = true;
69 cUnit->regLocation[ssaRep->uses[first]].wide = true;
70 first += 2;
71 }
72 if ((attrs & (DF_UB_WIDE|DF_FP_B)) == (DF_UB_WIDE|DF_FP_B)) {
73 sReg = DECODE_REG(
74 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
75 counts[sReg].doubleStart = true;
76 cUnit->regLocation[ssaRep->uses[first]].wide = true;
77 first += 2;
78 }
79 if ((attrs & (DF_UC_WIDE|DF_FP_C)) == (DF_UC_WIDE|DF_FP_C)) {
80 sReg = DECODE_REG(
81 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[first]));
82 counts[sReg].doubleStart = true;
83 cUnit->regLocation[ssaRep->uses[first]].wide = true;
84 }
85 }
86 for (i=0; i< ssaRep->numUses; i++) {
87 int origSreg = DECODE_REG(
88 oatConvertSSARegToDalvik(cUnit, ssaRep->uses[i]));
buzbeec143c552011-08-20 17:38:58 -070089 assert(origSreg < cUnit->method->num_registers_);
buzbee67bf8852011-08-17 17:51:35 -070090 bool fpUse = ssaRep->fpUse ? ssaRep->fpUse[i] : false;
91 if (fp == fpUse) {
92 counts[origSreg].count++;
93 }
94 }
95 for (i=0; i< ssaRep->numDefs; i++) {
96 if (attrs & DF_SETS_CONST) {
97 // CONST opcodes are untyped - don't pollute the counts
98 continue;
99 }
100 int origSreg = DECODE_REG(
101 oatConvertSSARegToDalvik(cUnit, ssaRep->defs[i]));
buzbeec143c552011-08-20 17:38:58 -0700102 assert(origSreg < cUnit->method->num_registers_);
buzbee67bf8852011-08-17 17:51:35 -0700103 bool fpDef = ssaRep->fpDef ? ssaRep->fpDef[i] : false;
104 if (fp == fpDef) {
105 counts[origSreg].count++;
106 }
107 }
108 }
109 }
110}
111
112/* qsort callback function, sort descending */
113static int sortCounts(const void *val1, const void *val2)
114{
115 const RefCounts* op1 = (const RefCounts*)val1;
116 const RefCounts* op2 = (const RefCounts*)val2;
117 return (op1->count == op2->count) ? 0 : (op1->count < op2->count ? 1 : -1);
118}
119
120static void dumpCounts(const RefCounts* arr, int size, const char* msg)
121{
122 LOG(INFO) << msg;
123 for (int i = 0; i < size; i++) {
124 LOG(INFO) << "sReg[" << arr[i].sReg << "]: " << arr[i].count;
125 }
126}
127
128/*
129 * Note: some portions of this code required even if the kPromoteRegs
130 * optimization is disabled.
131 */
132extern void oatDoPromotion(CompilationUnit* cUnit)
133{
buzbeec143c552011-08-20 17:38:58 -0700134 int numRegs = cUnit->method->num_registers_;
135 int numIns = cUnit->method->num_ins_;
buzbee67bf8852011-08-17 17:51:35 -0700136
137 /*
138 * Because ins don't have explicit definitions, we need to type
139 * them based on the signature.
140 */
141 if (numIns > 0) {
142 int sReg = numRegs - numIns;
buzbeec143c552011-08-20 17:38:58 -0700143 const art::StringPiece& shorty = cUnit->method->GetShorty();
144 for (int i = 1; i < shorty.size(); i++) {
145 char arg = shorty[i];
buzbee67bf8852011-08-17 17:51:35 -0700146 // Is it wide?
147 if ((arg == 'D') || (arg == 'J')) {
148 cUnit->regLocation[sReg].wide = true;
149 cUnit->regLocation[sReg+1].fp = cUnit->regLocation[sReg].fp;
150 sReg++; // Skip to next
151 }
152 sReg++;
153 }
154 }
155 /*
156 * TUNING: is leaf? Can't just use "hasInvoke" to determine as some
157 * instructions might call out to C/assembly helper functions. Until
158 * machinery is in place, always spill lr.
159 */
160 cUnit->coreSpillMask |= (1 << rLR);
161 cUnit->numSpills++;
162 /*
163 * Simple hack for testing register allocation. Just do a static
164 * count of the uses of Dalvik registers. Note that we examine
165 * the SSA names, but count based on original Dalvik register name.
166 * Count refs separately based on type in order to give allocation
167 * preference to fp doubles - which must be allocated sequential
168 * physical single fp registers started with an even-numbered
169 * reg.
170 */
171 RefCounts *coreRegs = (RefCounts *)
172 oatNew(sizeof(RefCounts) * numRegs, true);
173 RefCounts *fpRegs = (RefCounts *)
174 oatNew(sizeof(RefCounts) * numRegs, true);
175 for (int i = 0; i < numRegs; i++) {
176 coreRegs[i].sReg = fpRegs[i].sReg = i;
177 }
178 GrowableListIterator iterator;
179 oatGrowableListIteratorInit(&cUnit->blockList, &iterator);
180 while (true) {
181 BasicBlock* bb;
182 bb = (BasicBlock*)oatGrowableListIteratorNext(&iterator);
183 if (bb == NULL) break;
184 countRefs(cUnit, bb, coreRegs, false);
185 countRefs(cUnit, bb, fpRegs, true);
186 }
187
188 /*
189 * Ideally, we'd allocate doubles starting with an even-numbered
190 * register. Bias the counts to try to allocate any vreg that's
191 * used as the start of a pair first.
192 */
193 for (int i = 0; i < numRegs; i++) {
194 if (fpRegs[i].doubleStart) {
195 fpRegs[i].count *= 2;
196 }
197 }
198
199 // Sort the count arrays
200 qsort(coreRegs, numRegs, sizeof(RefCounts), sortCounts);
201 qsort(fpRegs, numRegs, sizeof(RefCounts), sortCounts);
202
203 // TODO: temp for debugging, too verbose. Remove when unneeded
204 if (cUnit->printMeVerbose) {
205 dumpCounts(coreRegs, numRegs, "coreRegs");
206 dumpCounts(fpRegs, numRegs, "fpRegs");
207 }
208
209 if (!(cUnit->disableOpt & (1 << kPromoteRegs))) {
210 // Promote fpRegs
211 for (int i = 0; (fpRegs[i].count > 0) && (i < numRegs); i++) {
212 if (cUnit->regLocation[fpRegs[i].sReg].fpLocation != kLocPhysReg) {
213 int reg = oatAllocPreservedFPReg(cUnit, fpRegs[i].sReg,
214 fpRegs[i].doubleStart);
215 if (reg < 0) {
216 break; // No more left
217 }
218 }
219 }
220
221 // Promote core regs
222 for (int i = 0; (coreRegs[i].count > 0) && i < numRegs; i++) {
223 if (cUnit->regLocation[i].location != kLocPhysReg) {
224 int reg = oatAllocPreservedCoreReg(cUnit, coreRegs[i].sReg);
225 if (reg < 0) {
226 break; // No more left
227 }
228 }
229 }
230 }
231
232 // Now, update SSA names to new home locations
233 for (int i = 0; i < cUnit->numSSARegs; i++) {
234 int baseSreg = cUnit->regLocation[i].sRegLow;
235 RegLocation *base = &cUnit->regLocation[baseSreg];
236 RegLocation *baseNext = &cUnit->regLocation[baseSreg+1];
237 RegLocation *curr = &cUnit->regLocation[i];
238 if (curr->fp) {
239 /* Single or double, check fpLocation of base */
240 if (base->fpLocation == kLocPhysReg) {
241 if (curr->wide) {
242 /* TUNING: consider alignment during allocation */
243 if (base->fpLowReg & 1) {
244 // Pair must start on even reg - don't promote
245 continue;
246 }
247 /* Make sure upper half is also in reg or skip */
248 if (baseNext->fpLocation != kLocPhysReg) {
249 continue;
250 }
251 curr->highReg = baseNext->fpLowReg;
252 }
253 curr->location = kLocPhysReg;
254 curr->lowReg = base->fpLowReg;
255 curr->home = true;
256 }
257 } else {
258 /* Core or wide */
259 if (base->location == kLocPhysReg) {
260 if (curr->wide) {
261 /* Make sure upper half is also in reg or skip */
262 if (baseNext->location != kLocPhysReg) {
263 continue;
264 }
265 curr->highReg = baseNext->lowReg;
266 }
267 curr->location = kLocPhysReg;
268 curr->lowReg = base->lowReg;
269 curr->home = true;
270 }
271 }
272 }
273}
274
275/* Returns sp-relative offset in bytes */
276extern int oatVRegOffset(CompilationUnit* cUnit, int reg)
277{
278 return (reg < cUnit->numRegs) ? cUnit->regsOffset + (reg << 2) :
279 cUnit->insOffset + ((reg - cUnit->numRegs) << 2);
280}
281
282
283/* Clobber all regs that might be used by an external C call */
284extern void oatClobberCallRegs(CompilationUnit *cUnit)
285{
286 oatClobber(cUnit, r0);
287 oatClobber(cUnit, r1);
288 oatClobber(cUnit, r2);
289 oatClobber(cUnit, r3);
290 oatClobber(cUnit, r12);
291 oatClobber(cUnit, r14lr);
292}
293
294/* Clobber all of the temps that might be used by a handler. */
295extern void oatClobberHandlerRegs(CompilationUnit* cUnit)
296{
297 //TUNING: reduce the set of regs used by handlers. Only a few need lots.
298 oatClobberCallRegs(cUnit);
299 oatClobber(cUnit, r7);
300 oatClobber(cUnit, r8);
301 oatClobber(cUnit, r10);
302}
303
304extern RegLocation oatGetReturnWide(CompilationUnit* cUnit)
305{
306 RegLocation res = LOC_C_RETURN_WIDE;
307 oatClobber(cUnit, r0);
308 oatClobber(cUnit, r1);
309 oatMarkInUse(cUnit, r0);
310 oatMarkInUse(cUnit, r1);
311 oatMarkPair(cUnit, res.lowReg, res.highReg);
312 return res;
313}
314
315extern RegLocation oatGetReturnWideAlt(CompilationUnit* cUnit)
316{
317 RegLocation res = LOC_C_RETURN_WIDE;
318 res.lowReg = r2;
319 res.highReg = r3;
320 oatClobber(cUnit, r2);
321 oatClobber(cUnit, r3);
322 oatMarkInUse(cUnit, r2);
323 oatMarkInUse(cUnit, r3);
324 oatMarkPair(cUnit, res.lowReg, res.highReg);
325 return res;
326}
327
328extern RegLocation oatGetReturn(CompilationUnit* cUnit)
329{
330 RegLocation res = LOC_C_RETURN;
331 oatClobber(cUnit, r0);
332 oatMarkInUse(cUnit, r0);
333 return res;
334}
335
336extern RegLocation oatGetReturnAlt(CompilationUnit* cUnit)
337{
338 RegLocation res = LOC_C_RETURN;
339 res.lowReg = r1;
340 oatClobber(cUnit, r1);
341 oatMarkInUse(cUnit, r1);
342 return res;
343}