blob: 3c0b8dbabaf0949f67ea960764b187fd8ae794eb [file] [log] [blame]
sewardj432b1c92004-10-30 13:00:55 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin host_reg_alloc2.c ---*/
sewardj432b1c92004-10-30 13:00:55 +00004/*---------------------------------------------------------------*/
5
sewardjf8ed9d82004-11-12 17:40:23 +00006/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardjf8ed9d82004-11-12 17:40:23 +00009
sewardj785952d2015-08-21 11:29:16 +000010 Copyright (C) 2004-2015 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardjf8ed9d82004-11-12 17:40:23 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardjf8ed9d82004-11-12 17:40:23 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardjf8ed9d82004-11-12 17:40:23 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardjf8ed9d82004-11-12 17:40:23 +000034*/
35
sewardj432b1c92004-10-30 13:00:55 +000036#include "libvex_basictypes.h"
37#include "libvex.h"
38
sewardjcef7d3e2009-07-02 12:21:59 +000039#include "main_util.h"
40#include "host_generic_regs.h"
sewardj432b1c92004-10-30 13:00:55 +000041
42/* Set to 1 for lots of debugging output. */
43#define DEBUG_REGALLOC 0
44
45
46/* TODO 27 Oct 04:
47
sewardj432b1c92004-10-30 13:00:55 +000048 Better consistency checking from what isMove tells us.
49
50 We can possibly do V-V coalescing even when the src is spilled,
51 providing we can arrange for the dst to have the same spill slot.
52
53 Note that state[].hreg is the same as the available real regs.
54
sewardj432b1c92004-10-30 13:00:55 +000055 Generally rationalise data structures. */
56
57
58/* Records information on virtual register live ranges. Computed once
59 and remains unchanged after that. */
60typedef
61 struct {
62 /* Becomes live for the first time after this insn ... */
63 Short live_after;
64 /* Becomes dead for the last time before this insn ... */
65 Short dead_before;
66 /* The "home" spill slot, if needed. Never changes. */
67 Short spill_offset;
68 Short spill_size;
69 /* What kind of register this is. */
70 HRegClass reg_class;
sewardj432b1c92004-10-30 13:00:55 +000071 }
sewardjad572dd2004-12-30 01:44:51 +000072 VRegLR;
sewardj432b1c92004-10-30 13:00:55 +000073
74
75/* Records information on real-register live ranges. Computed once
76 and remains unchanged after that. */
77typedef
78 struct {
79 HReg rreg;
80 /* Becomes live after this insn ... */
81 Short live_after;
82 /* Becomes dead before this insn ... */
83 Short dead_before;
84 }
sewardjad572dd2004-12-30 01:44:51 +000085 RRegLR;
sewardj432b1c92004-10-30 13:00:55 +000086
87
sewardjee7d2282005-07-04 09:38:58 +000088/* An array of the following structs (rreg_state) comprises the
89 running state of the allocator. It indicates what the current
90 disposition of each allocatable real register is. The array gets
sewardja5b50222015-03-26 07:18:32 +000091 updated as the allocator processes instructions. The identity of
92 the register is not recorded here, because the index of this
93 structure in doRegisterAllocation()'s |rreg_state| is the index
94 number of the register, and the register itself can be extracted
95 from the RRegUniverse supplied to doRegisterAllocation(). */
sewardj432b1c92004-10-30 13:00:55 +000096typedef
97 struct {
sewardj607ca2d2007-08-25 21:11:33 +000098 /* ------ FIELDS WHICH DO NOT CHANGE ------ */
sewardj432b1c92004-10-30 13:00:55 +000099 /* Is this involved in any HLRs? (only an optimisation hint) */
100 Bool has_hlrs;
sewardj607ca2d2007-08-25 21:11:33 +0000101 /* ------ FIELDS WHICH DO CHANGE ------ */
102 /* 6 May 07: rearranged fields below so the whole struct fits
103 into 16 bytes on both x86 and amd64. */
104 /* Used when .disp == Bound and we are looking for vregs to
105 spill. */
106 Bool is_spill_cand;
107 /* Optimisation: used when .disp == Bound. Indicates when the
108 rreg has the same value as the spill slot for the associated
109 vreg. Is safely left at False, and becomes True after a
110 spill store or reload for this rreg. */
111 Bool eq_spill_slot;
sewardj432b1c92004-10-30 13:00:55 +0000112 /* What's it's current disposition? */
113 enum { Free, /* available for use */
114 Unavail, /* in a real-reg live range */
115 Bound /* in use (holding value of some vreg) */
116 }
117 disp;
sewardj607ca2d2007-08-25 21:11:33 +0000118 /* If .disp == Bound, what vreg is it bound to? */
sewardj432b1c92004-10-30 13:00:55 +0000119 HReg vreg;
sewardj432b1c92004-10-30 13:00:55 +0000120 }
121 RRegState;
122
sewardjc3bc60a2006-12-01 02:59:17 +0000123
sewardjee7d2282005-07-04 09:38:58 +0000124/* The allocator also maintains a redundant array of indexes
125 (vreg_state) from vreg numbers back to entries in rreg_state. It
126 is redundant because iff vreg_state[i] == j then
127 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
128 point at each other. The purpose of this is to speed up activities
129 which involve looking for a particular vreg: there is no need to
130 scan the rreg_state looking for it, just index directly into
131 vreg_state. The FAQ "does this vreg already have an associated
132 rreg" is the main beneficiary.
133
134 To indicate, in vreg_state[i], that a given vreg is not currently
135 associated with any rreg, that entry can be set to INVALID_RREG_NO.
136
137 Because the vreg_state entries are signed Shorts, the max number
138 of vregs that can be handed by regalloc is 32767.
139*/
140
141#define INVALID_RREG_NO ((Short)(-1))
142
143#define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
144#define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
145
sewardj432b1c92004-10-30 13:00:55 +0000146
sewardj432b1c92004-10-30 13:00:55 +0000147/* Search forward from some given point in the incoming instruction
148 sequence. Point is to select a virtual register to spill, by
149 finding the vreg which is mentioned as far ahead as possible, in
150 the hope that this will minimise the number of consequent reloads.
151
152 Only do the search for vregs which are Bound in the running state,
153 and for which the .is_spill_cand field is set. This allows the
154 caller to arbitrarily restrict the set of spill candidates to be
155 considered.
156
sewardja5b50222015-03-26 07:18:32 +0000157 To do this we don't actually need to see the incoming instruction
158 stream. Rather, what we need us the HRegUsage records for the
159 incoming instruction stream. Hence that is passed in.
160
sewardj432b1c92004-10-30 13:00:55 +0000161 Returns an index into the state array indicating the (v,r) pair to
162 spill, or -1 if none was found. */
163static
164Int findMostDistantlyMentionedVReg (
sewardja5b50222015-03-26 07:18:32 +0000165 HRegUsage* reg_usages_in,
sewardj432b1c92004-10-30 13:00:55 +0000166 Int search_from_instr,
sewardja5b50222015-03-26 07:18:32 +0000167 Int num_instrs,
sewardj432b1c92004-10-30 13:00:55 +0000168 RRegState* state,
sewardja5b50222015-03-26 07:18:32 +0000169 Int n_state
sewardj432b1c92004-10-30 13:00:55 +0000170)
171{
172 Int k, m;
173 Int furthest_k = -1;
174 Int furthest = -1;
175 vassert(search_from_instr >= 0);
176 for (k = 0; k < n_state; k++) {
177 if (!state[k].is_spill_cand)
178 continue;
179 vassert(state[k].disp == Bound);
sewardja5b50222015-03-26 07:18:32 +0000180 for (m = search_from_instr; m < num_instrs; m++) {
181 if (HRegUsage__contains(&reg_usages_in[m], state[k].vreg))
sewardj432b1c92004-10-30 13:00:55 +0000182 break;
183 }
184 if (m > furthest) {
185 furthest = m;
186 furthest_k = k;
187 }
188 }
189 return furthest_k;
190}
191
192
sewardj478646f2008-05-01 20:13:04 +0000193/* Check that this vreg has been assigned a sane spill offset. */
sewardja5b50222015-03-26 07:18:32 +0000194inline
195static void sanity_check_spill_offset ( VRegLR* vreg )
sewardj478646f2008-05-01 20:13:04 +0000196{
sewardjc4530ae2012-05-21 10:18:49 +0000197 switch (vreg->reg_class) {
sewardjc4530ae2012-05-21 10:18:49 +0000198 case HRcVec128: case HRcFlt64:
199 vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
200 default:
201 vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
sewardj478646f2008-05-01 20:13:04 +0000202 }
203}
204
205
sewardjad572dd2004-12-30 01:44:51 +0000206/* Double the size of the real-reg live-range array, if needed. */
sewardja5b50222015-03-26 07:18:32 +0000207__attribute__((noinline))
208static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
sewardj432b1c92004-10-30 13:00:55 +0000209{
sewardjad572dd2004-12-30 01:44:51 +0000210 Int k;
211 RRegLR* arr2;
sewardj432b1c92004-10-30 13:00:55 +0000212 if (0)
213 vex_printf("ensureRRISpace: %d -> %d\n", *size, 2 * *size);
214 vassert(used == *size);
floriand8e3eca2015-03-13 12:46:49 +0000215 arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
sewardj432b1c92004-10-30 13:00:55 +0000216 for (k = 0; k < *size; k++)
217 arr2[k] = (*info)[k];
218 *size *= 2;
219 *info = arr2;
220}
sewardja5b50222015-03-26 07:18:32 +0000221inline
222static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
223{
224 if (LIKELY(used < *size)) return;
225 ensureRRLRspace_SLOW(info, size, used);
226}
sewardj432b1c92004-10-30 13:00:55 +0000227
228
sewardjc3bc60a2006-12-01 02:59:17 +0000229/* Sort an array of RRegLR entries by either the .live_after or
230 .dead_before fields. This is performance-critical. */
231static void sortRRLRarray ( RRegLR* arr,
232 Int size, Bool by_live_after )
233{
234 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
235 9841, 29524, 88573, 265720,
236 797161, 2391484 };
237 Int lo = 0;
238 Int hi = size-1;
239 Int i, j, h, bigN, hp;
240 RRegLR v;
241
242 vassert(size >= 0);
243 if (size == 0)
244 return;
245
246 bigN = hi - lo + 1; if (bigN < 2) return;
247 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
248
249 if (by_live_after) {
250
251 for ( ; hp >= 0; hp--) {
252 h = incs[hp];
253 for (i = lo + h; i <= hi; i++) {
254 v = arr[i];
255 j = i;
256 while (arr[j-h].live_after > v.live_after) {
257 arr[j] = arr[j-h];
258 j = j - h;
259 if (j <= (lo + h - 1)) break;
260 }
261 arr[j] = v;
262 }
263 }
264
265 } else {
266
267 for ( ; hp >= 0; hp--) {
268 h = incs[hp];
269 for (i = lo + h; i <= hi; i++) {
270 v = arr[i];
271 j = i;
272 while (arr[j-h].dead_before > v.dead_before) {
273 arr[j] = arr[j-h];
274 j = j - h;
275 if (j <= (lo + h - 1)) break;
276 }
277 arr[j] = v;
278 }
279 }
280
281 }
282}
283
284
sewardja5b50222015-03-26 07:18:32 +0000285/* Compute the index of the highest and lowest 1 in a ULong,
286 respectively. Results are undefined if the argument is zero.
287 Don't pass it zero :) */
288static inline UInt ULong__maxIndex ( ULong w64 ) {
289 return 63 - __builtin_clzll(w64);
290}
291
292static inline UInt ULong__minIndex ( ULong w64 ) {
293 return __builtin_ctzll(w64);
294}
295
296
297/* Vectorised memset, copied from Valgrind's m_libcbase.c. */
298static void* local_memset ( void *destV, Int c, SizeT sz )
299{
300# define IS_4_ALIGNED(aaa_p) (0 == (((HWord)(aaa_p)) & ((HWord)0x3)))
301
302 UInt c4;
303 UChar* d = destV;
304 UChar uc = c;
305
306 while ((!IS_4_ALIGNED(d)) && sz >= 1) {
307 d[0] = uc;
308 d++;
309 sz--;
310 }
311 if (sz == 0)
312 return destV;
313 c4 = uc;
314 c4 |= (c4 << 8);
315 c4 |= (c4 << 16);
316 while (sz >= 16) {
317 ((UInt*)d)[0] = c4;
318 ((UInt*)d)[1] = c4;
319 ((UInt*)d)[2] = c4;
320 ((UInt*)d)[3] = c4;
321 d += 16;
322 sz -= 16;
323 }
324 while (sz >= 4) {
325 ((UInt*)d)[0] = c4;
326 d += 4;
327 sz -= 4;
328 }
329 while (sz >= 1) {
330 d[0] = c;
331 d++;
332 sz--;
333 }
334 return destV;
335
336# undef IS_4_ALIGNED
337}
338
339
sewardj17bbc212004-12-30 00:14:54 +0000340/* A target-independent register allocator. Requires various
341 functions which it uses to deal abstractly with instructions and
342 registers, since it cannot have any target-specific knowledge.
sewardj432b1c92004-10-30 13:00:55 +0000343
344 Returns a new list of instructions, which, as a result of the
345 behaviour of mapRegs, will be in-place modifications of the
346 original instructions.
347
348 Requires that the incoming code has been generated using
349 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
350 that range is a checked run-time error.
351
352 Takes an expandable array of pointers to unallocated insns.
353 Returns an expandable array of pointers to allocated insns.
354*/
355HInstrArray* doRegisterAllocation (
356
357 /* Incoming virtual-registerised code. */
358 HInstrArray* instrs_in,
359
sewardja5b50222015-03-26 07:18:32 +0000360 /* The real-register universe to use. This contains facts about
361 real registers, one of which is the set of registers available
362 for allocation. */
363 const RRegUniverse* univ,
sewardj432b1c92004-10-30 13:00:55 +0000364
365 /* Return True iff the given insn is a reg-reg move, in which
366 case also return the src and dst regs. */
floriand8c64e02014-10-08 08:54:44 +0000367 Bool (*isMove) ( const HInstr*, HReg*, HReg* ),
sewardj432b1c92004-10-30 13:00:55 +0000368
369 /* Get info about register usage in this insn. */
floriand8c64e02014-10-08 08:54:44 +0000370 void (*getRegUsage) ( HRegUsage*, const HInstr*, Bool ),
sewardj432b1c92004-10-30 13:00:55 +0000371
372 /* Apply a reg-reg mapping to an insn. */
cerion92b64362005-12-13 12:02:26 +0000373 void (*mapRegs) ( HRegRemap*, HInstr*, Bool ),
sewardj432b1c92004-10-30 13:00:55 +0000374
sewardj6c299f32009-12-31 18:00:12 +0000375 /* Return one, or, if we're unlucky, two insn(s) to spill/restore a
376 real reg to a spill slot byte offset. The two leading HInstr**
377 args are out parameters, through which the generated insns are
378 returned. Also (optionally) a 'directReload' function, which
sewardjfb7373a2007-08-25 21:29:03 +0000379 attempts to replace a given instruction by one which reads
380 directly from a specified spill slot. May be NULL, in which
381 case the optimisation is not attempted. */
sewardj6c299f32009-12-31 18:00:12 +0000382 void (*genSpill) ( HInstr**, HInstr**, HReg, Int, Bool ),
383 void (*genReload) ( HInstr**, HInstr**, HReg, Int, Bool ),
sewardjfb7373a2007-08-25 21:29:03 +0000384 HInstr* (*directReload) ( HInstr*, HReg, Short ),
sewardj432b1c92004-10-30 13:00:55 +0000385 Int guest_sizeB,
386
387 /* For debug printing only. */
floriand8c64e02014-10-08 08:54:44 +0000388 void (*ppInstr) ( const HInstr*, Bool ),
cerion92b64362005-12-13 12:02:26 +0000389 void (*ppReg) ( HReg ),
390
391 /* 32/64bit mode */
392 Bool mode64
sewardj432b1c92004-10-30 13:00:55 +0000393)
394{
395# define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
396
sewardj607ca2d2007-08-25 21:11:33 +0000397 const Bool eq_spill_opt = True;
398
sewardj432b1c92004-10-30 13:00:55 +0000399 /* Info on vregs and rregs. Computed once and remains
400 unchanged. */
sewardjee7d2282005-07-04 09:38:58 +0000401 Int n_vregs;
402 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
sewardj432b1c92004-10-30 13:00:55 +0000403
sewardjc3bc60a2006-12-01 02:59:17 +0000404 /* We keep two copies of the real-reg live range info, one sorted
405 by .live_after and the other by .dead_before. First the
406 unsorted info is created in the _la variant is copied into the
407 _db variant. Once that's done both of them are sorted.
408 We also need two integer cursors which record the next
409 location in the two arrays to consider. */
410 RRegLR* rreg_lrs_la;
411 RRegLR* rreg_lrs_db;
sewardjad572dd2004-12-30 01:44:51 +0000412 Int rreg_lrs_size;
413 Int rreg_lrs_used;
sewardjc3bc60a2006-12-01 02:59:17 +0000414 Int rreg_lrs_la_next;
415 Int rreg_lrs_db_next;
sewardjad572dd2004-12-30 01:44:51 +0000416
sewardja5b50222015-03-26 07:18:32 +0000417 /* Info on register usage in the incoming instruction array.
418 Computed once and remains unchanged, more or less; updated
419 sometimes by the direct-reload optimisation. */
420 HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
421
sewardjad572dd2004-12-30 01:44:51 +0000422 /* Used when constructing vreg_lrs (for allocating stack
sewardj432b1c92004-10-30 13:00:55 +0000423 slots). */
sewardja5b50222015-03-26 07:18:32 +0000424 Short ss_busy_until_before[N_SPILL64S];
sewardj432b1c92004-10-30 13:00:55 +0000425
sewardjad572dd2004-12-30 01:44:51 +0000426 /* Used when constructing rreg_lrs. */
sewardj432b1c92004-10-30 13:00:55 +0000427 Int* rreg_live_after;
428 Int* rreg_dead_before;
429
430 /* Running state of the core allocation algorithm. */
sewardjee7d2282005-07-04 09:38:58 +0000431 RRegState* rreg_state; /* [0 .. n_rregs-1] */
432 Int n_rregs;
433
434 /* .. and the redundant backward map */
435 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
436 This inplies n_rregs must be <= 32768. */
437 Short* vreg_state; /* [0 .. n_vregs-1] */
sewardj432b1c92004-10-30 13:00:55 +0000438
439 /* The vreg -> rreg map constructed and then applied to each
440 instr. */
441 HRegRemap remap;
442
443 /* The output array of instructions. */
444 HInstrArray* instrs_out;
445
sewardjee7d2282005-07-04 09:38:58 +0000446 /* Sanity checks are expensive. They are only done periodically,
447 not at each insn processed. */
448 Bool do_sanity_check;
449
florian5074b492015-02-13 16:25:41 +0000450 vassert(0 == (guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
451 vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
florian95a487b2014-02-14 08:55:32 +0000452 vassert(0 == (N_SPILL64S % 2));
sewardj432b1c92004-10-30 13:00:55 +0000453
454 /* The live range numbers are signed shorts, and so limiting the
sewardj8c50d7d2012-07-14 09:18:02 +0000455 number of insns to 15000 comfortably guards against them
sewardj432b1c92004-10-30 13:00:55 +0000456 overflowing 32k. */
sewardj8c50d7d2012-07-14 09:18:02 +0000457 vassert(instrs_in->arr_used <= 15000);
sewardj432b1c92004-10-30 13:00:55 +0000458
459# define INVALID_INSTRNO (-2)
460
461# define EMIT_INSTR(_instr) \
462 do { \
463 HInstr* _tmp = (_instr); \
464 if (DEBUG_REGALLOC) { \
465 vex_printf("** "); \
cerion92b64362005-12-13 12:02:26 +0000466 (*ppInstr)(_tmp, mode64); \
sewardj432b1c92004-10-30 13:00:55 +0000467 vex_printf("\n\n"); \
468 } \
469 addHInstr ( instrs_out, _tmp ); \
470 } while (0)
471
sewardjee7d2282005-07-04 09:38:58 +0000472# define PRINT_STATE \
473 do { \
474 Int z, q; \
475 for (z = 0; z < n_rregs; z++) { \
476 vex_printf(" rreg_state[%2d] = ", z); \
sewardja5b50222015-03-26 07:18:32 +0000477 (*ppReg)(univ->regs[z]); \
sewardjee7d2282005-07-04 09:38:58 +0000478 vex_printf(" \t"); \
479 switch (rreg_state[z].disp) { \
480 case Free: vex_printf("Free\n"); break; \
481 case Unavail: vex_printf("Unavail\n"); break; \
482 case Bound: vex_printf("BoundTo "); \
483 (*ppReg)(rreg_state[z].vreg); \
484 vex_printf("\n"); break; \
485 } \
486 } \
487 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
488 q = 0; \
489 for (z = 0; z < n_vregs; z++) { \
490 if (vreg_state[z] == INVALID_RREG_NO) \
491 continue; \
492 vex_printf("[%d] -> %d ", z, vreg_state[z]); \
493 q++; \
494 if (q > 0 && (q % 6) == 0) \
495 vex_printf("\n "); \
496 } \
497 vex_printf("\n"); \
sewardj432b1c92004-10-30 13:00:55 +0000498 } while (0)
499
500
sewardjee7d2282005-07-04 09:38:58 +0000501 /* --------- Stage 0: set up output array --------- */
502 /* --------- and allocate/initialise running state. --------- */
503
sewardj432b1c92004-10-30 13:00:55 +0000504 instrs_out = newHInstrArray();
505
506 /* ... and initialise running state. */
sewardjee7d2282005-07-04 09:38:58 +0000507 /* n_rregs is no more than a short name for n_available_real_regs. */
sewardja5b50222015-03-26 07:18:32 +0000508 n_rregs = univ->allocable;
sewardjee7d2282005-07-04 09:38:58 +0000509 n_vregs = instrs_in->n_vregs;
sewardj432b1c92004-10-30 13:00:55 +0000510
sewardjee7d2282005-07-04 09:38:58 +0000511 /* If this is not so, vreg_state entries will overflow. */
512 vassert(n_vregs < 32767);
513
sewardja5b50222015-03-26 07:18:32 +0000514 /* If this is not so, the universe we have is nonsensical. */
515 vassert(n_rregs > 0);
516
floriand8e3eca2015-03-13 12:46:49 +0000517 rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
518 vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
sewardjee7d2282005-07-04 09:38:58 +0000519
sewardja5b50222015-03-26 07:18:32 +0000520 for (Int j = 0; j < n_rregs; j++) {
sewardjee7d2282005-07-04 09:38:58 +0000521 rreg_state[j].has_hlrs = False;
522 rreg_state[j].disp = Free;
523 rreg_state[j].vreg = INVALID_HREG;
524 rreg_state[j].is_spill_cand = False;
sewardj607ca2d2007-08-25 21:11:33 +0000525 rreg_state[j].eq_spill_slot = False;
sewardj432b1c92004-10-30 13:00:55 +0000526 }
527
sewardja5b50222015-03-26 07:18:32 +0000528 for (Int j = 0; j < n_vregs; j++)
sewardjee7d2282005-07-04 09:38:58 +0000529 vreg_state[j] = INVALID_RREG_NO;
530
531
sewardj432b1c92004-10-30 13:00:55 +0000532 /* --------- Stage 1: compute vreg live ranges. --------- */
533 /* --------- Stage 2: compute rreg live ranges. --------- */
534
535 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
536
537 /* This is relatively simple, because (1) we only seek the complete
538 end-to-end live range of each vreg, and are not interested in
539 any holes in it, and (2) the vregs are conveniently numbered 0
sewardjee7d2282005-07-04 09:38:58 +0000540 .. n_vregs-1, so we can just dump the results in a
sewardjad572dd2004-12-30 01:44:51 +0000541 pre-allocated array. */
sewardj432b1c92004-10-30 13:00:55 +0000542
sewardjad572dd2004-12-30 01:44:51 +0000543 vreg_lrs = NULL;
sewardjee7d2282005-07-04 09:38:58 +0000544 if (n_vregs > 0)
floriand8e3eca2015-03-13 12:46:49 +0000545 vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
sewardj432b1c92004-10-30 13:00:55 +0000546
sewardja5b50222015-03-26 07:18:32 +0000547 for (Int j = 0; j < n_vregs; j++) {
sewardjad572dd2004-12-30 01:44:51 +0000548 vreg_lrs[j].live_after = INVALID_INSTRNO;
549 vreg_lrs[j].dead_before = INVALID_INSTRNO;
550 vreg_lrs[j].spill_offset = 0;
551 vreg_lrs[j].spill_size = 0;
552 vreg_lrs[j].reg_class = HRcINVALID;
sewardj432b1c92004-10-30 13:00:55 +0000553 }
554
sewardja5b50222015-03-26 07:18:32 +0000555 /* An array to hold the reg-usage info for the incoming
556 instructions. */
557 reg_usage_arr
558 = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used-1);
559
sewardj432b1c92004-10-30 13:00:55 +0000560 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
561
562 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
563
564 /* This is more complex than Stage 1, because we need to compute
565 exactly all the live ranges of all the allocatable real regs,
566 and we don't know in advance how many there will be. */
567
sewardjad572dd2004-12-30 01:44:51 +0000568 rreg_lrs_used = 0;
569 rreg_lrs_size = 4;
floriand8e3eca2015-03-13 12:46:49 +0000570 rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
sewardjc3bc60a2006-12-01 02:59:17 +0000571 rreg_lrs_db = NULL; /* we'll create this later */
sewardj432b1c92004-10-30 13:00:55 +0000572
573 /* We'll need to track live range start/end points seperately for
574 each rreg. Sigh. */
sewardja5b50222015-03-26 07:18:32 +0000575 vassert(n_rregs > 0);
576 rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
577 rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
sewardj432b1c92004-10-30 13:00:55 +0000578
sewardja5b50222015-03-26 07:18:32 +0000579 for (Int j = 0; j < n_rregs; j++) {
sewardj432b1c92004-10-30 13:00:55 +0000580 rreg_live_after[j] =
581 rreg_dead_before[j] = INVALID_INSTRNO;
582 }
583
584 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
585
586 /* ------ start of ITERATE OVER INSNS ------ */
587
sewardja5b50222015-03-26 07:18:32 +0000588 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
sewardj432b1c92004-10-30 13:00:55 +0000589
sewardja5b50222015-03-26 07:18:32 +0000590 (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
sewardj432b1c92004-10-30 13:00:55 +0000591
sewardja5b50222015-03-26 07:18:32 +0000592 if (0) {
593 vex_printf("\n%d stage1: ", ii);
594 (*ppInstr)(instrs_in->arr[ii], mode64);
595 vex_printf("\n");
596 ppHRegUsage(univ, &reg_usage_arr[ii]);
597 }
sewardj432b1c92004-10-30 13:00:55 +0000598
599 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
600
sewardja5b50222015-03-26 07:18:32 +0000601 /* for each virtual reg mentioned in the insn ... */
602 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
sewardj432b1c92004-10-30 13:00:55 +0000603
sewardja5b50222015-03-26 07:18:32 +0000604 HReg vreg = reg_usage_arr[ii].vRegs[j];
605 vassert(hregIsVirtual(vreg));
606
607 Int k = hregIndex(vreg);
sewardjee7d2282005-07-04 09:38:58 +0000608 if (k < 0 || k >= n_vregs) {
sewardj432b1c92004-10-30 13:00:55 +0000609 vex_printf("\n");
cerion92b64362005-12-13 12:02:26 +0000610 (*ppInstr)(instrs_in->arr[ii], mode64);
sewardj432b1c92004-10-30 13:00:55 +0000611 vex_printf("\n");
sewardjee7d2282005-07-04 09:38:58 +0000612 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
sewardj432b1c92004-10-30 13:00:55 +0000613 vpanic("doRegisterAllocation: out-of-range vreg");
614 }
615
616 /* Take the opportunity to note its regclass. We'll need
617 that when allocating spill slots. */
sewardjad572dd2004-12-30 01:44:51 +0000618 if (vreg_lrs[k].reg_class == HRcINVALID) {
sewardj432b1c92004-10-30 13:00:55 +0000619 /* First mention of this vreg. */
sewardjad572dd2004-12-30 01:44:51 +0000620 vreg_lrs[k].reg_class = hregClass(vreg);
sewardj432b1c92004-10-30 13:00:55 +0000621 } else {
622 /* Seen it before, so check for consistency. */
sewardjad572dd2004-12-30 01:44:51 +0000623 vassert(vreg_lrs[k].reg_class == hregClass(vreg));
sewardj432b1c92004-10-30 13:00:55 +0000624 }
625
626 /* Now consider live ranges. */
sewardja5b50222015-03-26 07:18:32 +0000627 switch (reg_usage_arr[ii].vMode[j]) {
sewardj432b1c92004-10-30 13:00:55 +0000628 case HRmRead:
sewardjad572dd2004-12-30 01:44:51 +0000629 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
sewardjd08f2d72004-12-01 23:19:36 +0000630 vex_printf("\n\nOFFENDING VREG = %d\n", k);
sewardj432b1c92004-10-30 13:00:55 +0000631 vpanic("doRegisterAllocation: "
632 "first event for vreg is Read");
sewardjd08f2d72004-12-01 23:19:36 +0000633 }
sewardj40e144d2005-03-28 00:46:27 +0000634 vreg_lrs[k].dead_before = toShort(ii + 1);
sewardj432b1c92004-10-30 13:00:55 +0000635 break;
636 case HRmWrite:
sewardjad572dd2004-12-30 01:44:51 +0000637 if (vreg_lrs[k].live_after == INVALID_INSTRNO)
sewardj40e144d2005-03-28 00:46:27 +0000638 vreg_lrs[k].live_after = toShort(ii);
639 vreg_lrs[k].dead_before = toShort(ii + 1);
sewardj432b1c92004-10-30 13:00:55 +0000640 break;
641 case HRmModify:
sewardjad572dd2004-12-30 01:44:51 +0000642 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
sewardj109ffdb2004-12-10 21:45:38 +0000643 vex_printf("\n\nOFFENDING VREG = %d\n", k);
sewardj432b1c92004-10-30 13:00:55 +0000644 vpanic("doRegisterAllocation: "
645 "first event for vreg is Modify");
sewardj109ffdb2004-12-10 21:45:38 +0000646 }
sewardj40e144d2005-03-28 00:46:27 +0000647 vreg_lrs[k].dead_before = toShort(ii + 1);
sewardj432b1c92004-10-30 13:00:55 +0000648 break;
649 default:
650 vpanic("doRegisterAllocation(1)");
651 } /* switch */
652
sewardja5b50222015-03-26 07:18:32 +0000653 } /* iterate over virtual registers */
sewardj432b1c92004-10-30 13:00:55 +0000654
655 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
656
657 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
658
sewardja5b50222015-03-26 07:18:32 +0000659 /* If this doesn't hold, the following iteration over real registers
660 will fail miserably. */
661 vassert(N_RREGUNIVERSE_REGS == 64);
662
663 const ULong rRead = reg_usage_arr[ii].rRead;
664 const ULong rWritten = reg_usage_arr[ii].rWritten;
665 const ULong rMentioned = rRead | rWritten;
666
667 UInt rReg_minIndex;
668 UInt rReg_maxIndex;
669 if (rMentioned == 0) {
670 /* There are no real register uses in this insn. Set
671 rReg_{min,max}Index so that the following loop doesn't iterate
672 at all, so as to avoid wasting time. */
673 rReg_minIndex = 1;
674 rReg_maxIndex = 0;
675 } else {
676 rReg_minIndex = ULong__minIndex(rMentioned);
677 rReg_maxIndex = ULong__maxIndex(rMentioned);
678 /* Don't bother to look at registers which are not available
679 to the allocator. We asserted above that n_rregs > 0, so
680 n_rregs-1 is safe. */
681 if (rReg_maxIndex >= n_rregs)
682 rReg_maxIndex = n_rregs-1;
683 }
684
685 /* for each allocator-available real reg mentioned in the insn ... */
686 /* Note. We are allocating only over the real regs available to
687 the allocator. Others, eg the stack or baseblock pointers,
688 are unavailable to allocation and so we never visit them.
689 Hence the iteration is cut off at n_rregs-1, since n_rregs ==
690 univ->allocable. */
691 for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
692
693 const ULong jMask = 1ULL << j;
694 if (LIKELY((rMentioned & jMask) == 0))
695 continue;
696
697 const Bool isR = (rRead & jMask) != 0;
698 const Bool isW = (rWritten & jMask) != 0;
sewardj432b1c92004-10-30 13:00:55 +0000699
700 /* Dummy initialisations of flush_la and flush_db to avoid
701 possible bogus uninit-var warnings from gcc. */
702 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
sewardja5b50222015-03-26 07:18:32 +0000703 Bool flush = False;
sewardj432b1c92004-10-30 13:00:55 +0000704
sewardja5b50222015-03-26 07:18:32 +0000705 if (isW && !isR) {
706 flush_la = rreg_live_after[j];
707 flush_db = rreg_dead_before[j];
708 if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
709 flush = True;
710 rreg_live_after[j] = ii;
711 rreg_dead_before[j] = ii+1;
712 } else if (!isW && isR) {
713 if (rreg_live_after[j] == INVALID_INSTRNO) {
714 vex_printf("\nOFFENDING RREG = ");
715 (*ppReg)(univ->regs[j]);
716 vex_printf("\n");
717 vex_printf("\nOFFENDING instr = ");
718 (*ppInstr)(instrs_in->arr[ii], mode64);
719 vex_printf("\n");
720 vpanic("doRegisterAllocation: "
721 "first event for rreg is Read");
722 }
723 rreg_dead_before[j] = ii+1;
724 } else {
725 vassert(isR && isW);
726 if (rreg_live_after[j] == INVALID_INSTRNO) {
727 vex_printf("\nOFFENDING RREG = ");
728 (*ppReg)(univ->regs[j]);
729 vex_printf("\n");
730 vex_printf("\nOFFENDING instr = ");
731 (*ppInstr)(instrs_in->arr[ii], mode64);
732 vex_printf("\n");
733 vpanic("doRegisterAllocation: "
734 "first event for rreg is Modify");
735 }
736 rreg_dead_before[j] = ii+1;
sewardj432b1c92004-10-30 13:00:55 +0000737 }
738
739 if (flush) {
740 vassert(flush_la != INVALID_INSTRNO);
741 vassert(flush_db != INVALID_INSTRNO);
sewardjc3bc60a2006-12-01 02:59:17 +0000742 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
sewardj432b1c92004-10-30 13:00:55 +0000743 if (0)
744 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
sewardja5b50222015-03-26 07:18:32 +0000745 rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
sewardjc3bc60a2006-12-01 02:59:17 +0000746 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
747 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
sewardjad572dd2004-12-30 01:44:51 +0000748 rreg_lrs_used++;
sewardj432b1c92004-10-30 13:00:55 +0000749 }
750
sewardja5b50222015-03-26 07:18:32 +0000751 } /* iterate over rregs in the instr */
sewardj432b1c92004-10-30 13:00:55 +0000752
753 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
754
755 } /* iterate over insns */
756
757 /* ------ end of ITERATE OVER INSNS ------ */
758
759 /* ------ start of FINALISE RREG LIVE RANGES ------ */
760
761 /* Now finish up any live ranges left over. */
sewardja5b50222015-03-26 07:18:32 +0000762 for (Int j = 0; j < n_rregs; j++) {
sewardj432b1c92004-10-30 13:00:55 +0000763
sewardja5b50222015-03-26 07:18:32 +0000764 if (0) {
765 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
766 rreg_dead_before[j]);
767 }
sewardj432b1c92004-10-30 13:00:55 +0000768 vassert( (rreg_live_after[j] == INVALID_INSTRNO
sewardja5b50222015-03-26 07:18:32 +0000769 && rreg_dead_before[j] == INVALID_INSTRNO)
sewardj432b1c92004-10-30 13:00:55 +0000770 ||
sewardja5b50222015-03-26 07:18:32 +0000771 (rreg_live_after[j] != INVALID_INSTRNO
772 && rreg_dead_before[j] != INVALID_INSTRNO)
sewardj432b1c92004-10-30 13:00:55 +0000773 );
774
775 if (rreg_live_after[j] == INVALID_INSTRNO)
776 continue;
777
sewardjc3bc60a2006-12-01 02:59:17 +0000778 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
sewardj432b1c92004-10-30 13:00:55 +0000779 if (0)
780 vex_printf("FLUSH 2 (%d,%d)\n",
781 rreg_live_after[j], rreg_dead_before[j]);
sewardja5b50222015-03-26 07:18:32 +0000782 rreg_lrs_la[rreg_lrs_used].rreg = univ->regs[j];
sewardjc3bc60a2006-12-01 02:59:17 +0000783 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
784 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
sewardjad572dd2004-12-30 01:44:51 +0000785 rreg_lrs_used++;
sewardj432b1c92004-10-30 13:00:55 +0000786 }
787
788 /* Compute summary hints for choosing real regs. If a real reg is
789 involved in a hard live range, record that fact in the fixed
sewardjee7d2282005-07-04 09:38:58 +0000790 part of the running rreg_state. Later, when offered a choice between
sewardj432b1c92004-10-30 13:00:55 +0000791 rregs, it's better to choose one which is not marked as having
792 any HLRs, since ones with HLRs may need to be spilled around
793 their HLRs. Correctness of final assignment is unaffected by
sewardjee7d2282005-07-04 09:38:58 +0000794 this mechanism -- it is only an optimisation. */
florianee313662012-01-18 14:04:23 +0000795
sewardja5b50222015-03-26 07:18:32 +0000796 for (Int j = 0; j < rreg_lrs_used; j++) {
797 HReg rreg = rreg_lrs_la[j].rreg;
sewardj432b1c92004-10-30 13:00:55 +0000798 vassert(!hregIsVirtual(rreg));
799 /* rreg is involved in a HLR. Record this info in the array, if
800 there is space. */
sewardja5b50222015-03-26 07:18:32 +0000801 UInt ix = hregIndex(rreg);
802 vassert(ix < n_rregs);
803 rreg_state[ix].has_hlrs = True;
sewardj432b1c92004-10-30 13:00:55 +0000804 }
805 if (0) {
sewardja5b50222015-03-26 07:18:32 +0000806 for (Int j = 0; j < n_rregs; j++) {
sewardjee7d2282005-07-04 09:38:58 +0000807 if (!rreg_state[j].has_hlrs)
sewardj432b1c92004-10-30 13:00:55 +0000808 continue;
sewardja5b50222015-03-26 07:18:32 +0000809 ppReg(univ->regs[j]);
sewardj432b1c92004-10-30 13:00:55 +0000810 vex_printf(" hinted\n");
811 }
812 }
813
sewardjc3bc60a2006-12-01 02:59:17 +0000814 /* Finally, copy the _la variant into the _db variant and
815 sort both by their respective fields. */
floriand8e3eca2015-03-13 12:46:49 +0000816 rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
sewardja5b50222015-03-26 07:18:32 +0000817 for (Int j = 0; j < rreg_lrs_used; j++)
sewardjc3bc60a2006-12-01 02:59:17 +0000818 rreg_lrs_db[j] = rreg_lrs_la[j];
819
820 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
821 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
822
823 /* And set up the cursors. */
824 rreg_lrs_la_next = 0;
825 rreg_lrs_db_next = 0;
826
sewardja5b50222015-03-26 07:18:32 +0000827 for (Int j = 1; j < rreg_lrs_used; j++) {
sewardjc3bc60a2006-12-01 02:59:17 +0000828 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
829 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
830 }
831
sewardj432b1c92004-10-30 13:00:55 +0000832 /* ------ end of FINALISE RREG LIVE RANGES ------ */
833
sewardja5b50222015-03-26 07:18:32 +0000834 if (DEBUG_REGALLOC) {
835 for (Int j = 0; j < n_vregs; j++) {
836 vex_printf("vreg %d: la = %d, db = %d\n",
837 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
838 }
sewardj432b1c92004-10-30 13:00:55 +0000839 }
sewardj432b1c92004-10-30 13:00:55 +0000840
sewardja5b50222015-03-26 07:18:32 +0000841 if (DEBUG_REGALLOC) {
842 vex_printf("RRegLRs by LA:\n");
843 for (Int j = 0; j < rreg_lrs_used; j++) {
844 vex_printf(" ");
845 (*ppReg)(rreg_lrs_la[j].rreg);
846 vex_printf(" la = %d, db = %d\n",
847 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
848 }
849 vex_printf("RRegLRs by DB:\n");
850 for (Int j = 0; j < rreg_lrs_used; j++) {
851 vex_printf(" ");
852 (*ppReg)(rreg_lrs_db[j].rreg);
853 vex_printf(" la = %d, db = %d\n",
854 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
855 }
sewardjc3bc60a2006-12-01 02:59:17 +0000856 }
sewardj432b1c92004-10-30 13:00:55 +0000857
858 /* --------- Stage 3: allocate spill slots. --------- */
859
sewardj7fb65eb2007-03-25 04:14:58 +0000860 /* Each spill slot is 8 bytes long. For vregs which take more than
861 64 bits to spill (classes Flt64 and Vec128), we have to allocate
sewardjc4530ae2012-05-21 10:18:49 +0000862 two consecutive spill slots. For 256 bit registers (class
863 Vec256), we have to allocate four consecutive spill slots.
sewardj432b1c92004-10-30 13:00:55 +0000864
sewardj478646f2008-05-01 20:13:04 +0000865 For Vec128-class on PowerPC, the spill slot's actual address
866 must be 16-byte aligned. Since the spill slot's address is
867 computed as an offset from the guest state pointer, and since
868 the user of the generated code must set that pointer to a
sewardjc4530ae2012-05-21 10:18:49 +0000869 32-aligned value, we have the residual obligation here of
sewardj478646f2008-05-01 20:13:04 +0000870 choosing a 16-aligned spill slot offset for Vec128-class values.
871 Since each spill slot is 8 bytes long, that means for
872 Vec128-class values we must allocated a spill slot number which
873 is zero mod 2.
874
sewardja5b50222015-03-26 07:18:32 +0000875 Similarly, for Vec256 class on amd64, find a spill slot number
sewardjc4530ae2012-05-21 10:18:49 +0000876 which is zero mod 4. This guarantees it will be 32 byte
877 aligned, which isn't actually necessary on amd64 (we use movUpd
878 etc to spill), but seems like good practice.
879
sewardj432b1c92004-10-30 13:00:55 +0000880 Do a rank-based allocation of vregs to spill slot numbers. We
sewardj607ca2d2007-08-25 21:11:33 +0000881 put as few values as possible in spill slots, but nevertheless
sewardj432b1c92004-10-30 13:00:55 +0000882 need to have a spill slot available for all vregs, just in case.
883 */
sewardja5b50222015-03-26 07:18:32 +0000884 /* Int max_ss_no = -1; */
sewardj432b1c92004-10-30 13:00:55 +0000885
sewardja5b50222015-03-26 07:18:32 +0000886 local_memset(ss_busy_until_before, 0, sizeof(ss_busy_until_before));
sewardj432b1c92004-10-30 13:00:55 +0000887
sewardja5b50222015-03-26 07:18:32 +0000888 for (Int j = 0; j < n_vregs; j++) {
sewardj432b1c92004-10-30 13:00:55 +0000889
890 /* True iff this vreg is unused. In which case we also expect
891 that the reg_class field for it has not been set. */
sewardjad572dd2004-12-30 01:44:51 +0000892 if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
893 vassert(vreg_lrs[j].reg_class == HRcINVALID);
sewardj432b1c92004-10-30 13:00:55 +0000894 continue;
895 }
896
sewardj7fb65eb2007-03-25 04:14:58 +0000897 /* The spill slots are 64 bits in size. As per the comment on
sewardjc4530ae2012-05-21 10:18:49 +0000898 definition of HRegClass in host_generic_regs.h, that means,
899 to spill a vreg of class Flt64 or Vec128, we'll need to find
900 two adjacent spill slots to use. For Vec256, we'll need to
901 find four adjacent slots to use. Note, this logic needs to
902 kept in sync with the size info on the definition of
903 HRegClass. */
sewardja5b50222015-03-26 07:18:32 +0000904 Int ss_no = -1;
sewardjc4530ae2012-05-21 10:18:49 +0000905 switch (vreg_lrs[j].reg_class) {
sewardj432b1c92004-10-30 13:00:55 +0000906
sewardjc4530ae2012-05-21 10:18:49 +0000907 case HRcVec128: case HRcFlt64:
908 /* Find two adjacent free slots in which between them
909 provide up to 128 bits in which to spill the vreg.
910 Since we are trying to find an even:odd pair, move
911 along in steps of 2 (slots). */
sewardja5b50222015-03-26 07:18:32 +0000912 for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
913 if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
914 && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
sewardjc4530ae2012-05-21 10:18:49 +0000915 break;
sewardja5b50222015-03-26 07:18:32 +0000916 if (ss_no >= N_SPILL64S-1) {
sewardjc4530ae2012-05-21 10:18:49 +0000917 vpanic("LibVEX_N_SPILL_BYTES is too low. "
918 "Increase and recompile.");
919 }
sewardja5b50222015-03-26 07:18:32 +0000920 ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
921 ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
sewardjc4530ae2012-05-21 10:18:49 +0000922 break;
sewardj7fb65eb2007-03-25 04:14:58 +0000923
sewardjc4530ae2012-05-21 10:18:49 +0000924 default:
925 /* The ordinary case -- just find a single spill slot. */
926 /* Find the lowest-numbered spill slot which is available
927 at the start point of this interval, and assign the
928 interval to it. */
sewardja5b50222015-03-26 07:18:32 +0000929 for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
930 if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
sewardjc4530ae2012-05-21 10:18:49 +0000931 break;
sewardja5b50222015-03-26 07:18:32 +0000932 if (ss_no == N_SPILL64S) {
sewardjc4530ae2012-05-21 10:18:49 +0000933 vpanic("LibVEX_N_SPILL_BYTES is too low. "
934 "Increase and recompile.");
935 }
sewardja5b50222015-03-26 07:18:32 +0000936 ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
sewardjc4530ae2012-05-21 10:18:49 +0000937 break;
sewardj7fb65eb2007-03-25 04:14:58 +0000938
sewardja5b50222015-03-26 07:18:32 +0000939 } /* switch (vreg_lrs[j].reg_class) */
sewardj432b1c92004-10-30 13:00:55 +0000940
941 /* This reflects LibVEX's hard-wired knowledge of the baseBlock
sewardj478646f2008-05-01 20:13:04 +0000942 layout: the guest state, then two equal sized areas following
943 it for two sets of shadow state, and then the spill area. */
sewardja5b50222015-03-26 07:18:32 +0000944 vreg_lrs[j].spill_offset = toShort(guest_sizeB * 3 + ss_no * 8);
sewardj432b1c92004-10-30 13:00:55 +0000945
sewardj478646f2008-05-01 20:13:04 +0000946 /* Independent check that we've made a sane choice of slot */
947 sanity_check_spill_offset( &vreg_lrs[j] );
sewardj432b1c92004-10-30 13:00:55 +0000948 /* if (j > max_ss_no) */
949 /* max_ss_no = j; */
950 }
951
sewardja5b50222015-03-26 07:18:32 +0000952 if (0) {
953 vex_printf("\n\n");
954 for (Int j = 0; j < n_vregs; j++)
955 vex_printf("vreg %d --> spill offset %d\n",
956 j, vreg_lrs[j].spill_offset);
957 }
sewardj432b1c92004-10-30 13:00:55 +0000958
959 /* --------- Stage 4: establish rreg preferences --------- */
960
961 /* It may be advantageous to allocating certain vregs to specific
962 rregs, as a way of avoiding reg-reg moves later. Here we
963 establish which, if any, rreg each vreg would prefer to be in.
964 Note that this constrains the allocator -- ideally we end up
sewardj2fa60a32004-10-30 22:23:53 +0000965 with as few as possible vregs expressing a preference.
sewardj432b1c92004-10-30 13:00:55 +0000966
sewardj2fa60a32004-10-30 22:23:53 +0000967 This is an optimisation: if the .preferred_rreg field is never
968 set to anything different from INVALID_HREG, the allocator still
969 works. */
sewardj17bbc212004-12-30 00:14:54 +0000970
971 /* 30 Dec 04: removed this mechanism as it does not seem to
972 help. */
sewardj432b1c92004-10-30 13:00:55 +0000973
974 /* --------- Stage 5: process instructions --------- */
975
976 /* This is the main loop of the allocator. First, we need to
977 correctly set up our running state, which tracks the status of
978 each real register. */
979
980 /* ------ BEGIN: Process each insn in turn. ------ */
981
sewardja5b50222015-03-26 07:18:32 +0000982 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
sewardj432b1c92004-10-30 13:00:55 +0000983
sewardja5b50222015-03-26 07:18:32 +0000984 if (DEBUG_REGALLOC) {
985 vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
986 vex_printf("---- ");
987 (*ppInstr)(instrs_in->arr[ii], mode64);
988 vex_printf("\n\nInitial state:\n");
989 PRINT_STATE;
990 vex_printf("\n");
991 }
sewardj432b1c92004-10-30 13:00:55 +0000992
993 /* ------------ Sanity checks ------------ */
994
sewardjee7d2282005-07-04 09:38:58 +0000995 /* Sanity checks are expensive. So they are done only once
Elliott Hughesa0664b92017-04-18 17:46:52 -0700996 every 17 instructions, and just before the last
sewardjee7d2282005-07-04 09:38:58 +0000997 instruction. */
998 do_sanity_check
999 = toBool(
sewardja5b50222015-03-26 07:18:32 +00001000 False /* Set to True for sanity checking of all insns. */
sewardjee7d2282005-07-04 09:38:58 +00001001 || ii == instrs_in->arr_used-1
Elliott Hughesa0664b92017-04-18 17:46:52 -07001002 || (ii > 0 && (ii % 17) == 0)
sewardjee7d2282005-07-04 09:38:58 +00001003 );
sewardj432b1c92004-10-30 13:00:55 +00001004
sewardjee7d2282005-07-04 09:38:58 +00001005 if (do_sanity_check) {
sewardj432b1c92004-10-30 13:00:55 +00001006
sewardjee7d2282005-07-04 09:38:58 +00001007 /* Sanity check 1: all rregs with a hard live range crossing
1008 this insn must be marked as unavailable in the running
1009 state. */
sewardja5b50222015-03-26 07:18:32 +00001010 for (Int j = 0; j < rreg_lrs_used; j++) {
1011 if (rreg_lrs_la[j].live_after < ii
sewardjc3bc60a2006-12-01 02:59:17 +00001012 && ii < rreg_lrs_la[j].dead_before) {
sewardjee7d2282005-07-04 09:38:58 +00001013 /* ii is the middle of a hard live range for some real
1014 reg. Check it's marked as such in the running
1015 state. */
sewardja5b50222015-03-26 07:18:32 +00001016 HReg reg = rreg_lrs_la[j].rreg;
sewardj432b1c92004-10-30 13:00:55 +00001017
sewardja5b50222015-03-26 07:18:32 +00001018 if (0) {
1019 vex_printf("considering la %d .. db %d reg = ",
1020 rreg_lrs_la[j].live_after,
1021 rreg_lrs_la[j].dead_before);
1022 (*ppReg)(reg);
1023 vex_printf("\n");
1024 }
sewardjee7d2282005-07-04 09:38:58 +00001025
sewardja5b50222015-03-26 07:18:32 +00001026 /* assert that this rreg is marked as unavailable */
1027 vassert(!hregIsVirtual(reg));
1028 vassert(rreg_state[hregIndex(reg)].disp == Unavail);
sewardjee7d2282005-07-04 09:38:58 +00001029 }
sewardj432b1c92004-10-30 13:00:55 +00001030 }
sewardj432b1c92004-10-30 13:00:55 +00001031
sewardjee7d2282005-07-04 09:38:58 +00001032 /* Sanity check 2: conversely, all rregs marked as
1033 unavailable in the running rreg_state must have a
1034 corresponding hard live range entry in the rreg_lrs
1035 array. */
sewardja5b50222015-03-26 07:18:32 +00001036 for (Int j = 0; j < n_rregs; j++) {
sewardjee7d2282005-07-04 09:38:58 +00001037 vassert(rreg_state[j].disp == Bound
1038 || rreg_state[j].disp == Free
1039 || rreg_state[j].disp == Unavail);
1040 if (rreg_state[j].disp != Unavail)
1041 continue;
sewardja5b50222015-03-26 07:18:32 +00001042 Int k;
1043 for (k = 0; k < rreg_lrs_used; k++) {
1044 HReg reg = rreg_lrs_la[k].rreg;
1045 vassert(!hregIsVirtual(reg));
1046 if (hregIndex(reg) == j
sewardjc3bc60a2006-12-01 02:59:17 +00001047 && rreg_lrs_la[k].live_after < ii
1048 && ii < rreg_lrs_la[k].dead_before)
sewardjee7d2282005-07-04 09:38:58 +00001049 break;
sewardja5b50222015-03-26 07:18:32 +00001050 }
sewardjee7d2282005-07-04 09:38:58 +00001051 /* If this vassertion fails, we couldn't find a
1052 corresponding HLR. */
1053 vassert(k < rreg_lrs_used);
1054 }
sewardj432b1c92004-10-30 13:00:55 +00001055
sewardjee7d2282005-07-04 09:38:58 +00001056 /* Sanity check 3: all vreg-rreg bindings must bind registers
1057 of the same class. */
sewardja5b50222015-03-26 07:18:32 +00001058 for (Int j = 0; j < n_rregs; j++) {
sewardj607ca2d2007-08-25 21:11:33 +00001059 if (rreg_state[j].disp != Bound) {
1060 vassert(rreg_state[j].eq_spill_slot == False);
sewardjee7d2282005-07-04 09:38:58 +00001061 continue;
sewardj607ca2d2007-08-25 21:11:33 +00001062 }
sewardja5b50222015-03-26 07:18:32 +00001063 vassert(hregClass(univ->regs[j])
sewardjee7d2282005-07-04 09:38:58 +00001064 == hregClass(rreg_state[j].vreg));
1065 vassert( hregIsVirtual(rreg_state[j].vreg));
sewardjee7d2282005-07-04 09:38:58 +00001066 }
sewardj432b1c92004-10-30 13:00:55 +00001067
sewardjee7d2282005-07-04 09:38:58 +00001068 /* Sanity check 4: the vreg_state and rreg_state
1069 mutually-redundant mappings are consistent. If
1070 rreg_state[j].vreg points at some vreg_state entry then
1071 that vreg_state entry should point back at
1072 rreg_state[j]. */
sewardja5b50222015-03-26 07:18:32 +00001073 for (Int j = 0; j < n_rregs; j++) {
sewardjee7d2282005-07-04 09:38:58 +00001074 if (rreg_state[j].disp != Bound)
1075 continue;
sewardja5b50222015-03-26 07:18:32 +00001076 Int k = hregIndex(rreg_state[j].vreg);
sewardjee7d2282005-07-04 09:38:58 +00001077 vassert(IS_VALID_VREGNO(k));
1078 vassert(vreg_state[k] == j);
1079 }
sewardja5b50222015-03-26 07:18:32 +00001080 for (Int j = 0; j < n_vregs; j++) {
1081 Int k = vreg_state[j];
sewardjee7d2282005-07-04 09:38:58 +00001082 if (k == INVALID_RREG_NO)
1083 continue;
1084 vassert(IS_VALID_RREGNO(k));
1085 vassert(rreg_state[k].disp == Bound);
sewardja5b50222015-03-26 07:18:32 +00001086 vassert(hregIndex(rreg_state[k].vreg) == j);
sewardjee7d2282005-07-04 09:38:58 +00001087 }
1088
1089 } /* if (do_sanity_check) */
sewardj432b1c92004-10-30 13:00:55 +00001090
1091 /* ------------ end of Sanity checks ------------ */
1092
1093 /* Do various optimisations pertaining to register coalescing
1094 and preferencing:
1095 MOV v <-> v coalescing (done here).
1096 MOV v <-> r coalescing (not yet, if ever)
1097 */
1098 /* If doing a reg-reg move between two vregs, and the src's live
1099 range ends here and the dst's live range starts here, bind
1100 the dst to the src's rreg, and that's all. */
sewardja5b50222015-03-26 07:18:32 +00001101 HReg vregS = INVALID_HREG;
1102 HReg vregD = INVALID_HREG;
sewardj432b1c92004-10-30 13:00:55 +00001103 if ( (*isMove)( instrs_in->arr[ii], &vregS, &vregD ) ) {
1104 if (!hregIsVirtual(vregS)) goto cannot_coalesce;
1105 if (!hregIsVirtual(vregD)) goto cannot_coalesce;
1106 /* Check that *isMove is not telling us a bunch of lies ... */
1107 vassert(hregClass(vregS) == hregClass(vregD));
sewardja5b50222015-03-26 07:18:32 +00001108 Int k = hregIndex(vregS);
1109 Int m = hregIndex(vregD);
sewardjee7d2282005-07-04 09:38:58 +00001110 vassert(IS_VALID_VREGNO(k));
1111 vassert(IS_VALID_VREGNO(m));
sewardjad572dd2004-12-30 01:44:51 +00001112 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1113 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
sewardja5b50222015-03-26 07:18:32 +00001114 if (DEBUG_REGALLOC) {
sewardj432b1c92004-10-30 13:00:55 +00001115 vex_printf("COALESCE ");
sewardja5b50222015-03-26 07:18:32 +00001116 (*ppReg)(vregS);
1117 vex_printf(" -> ");
1118 (*ppReg)(vregD);
1119 vex_printf("\n\n");
1120 }
sewardj432b1c92004-10-30 13:00:55 +00001121 /* Find the state entry for vregS. */
sewardja5b50222015-03-26 07:18:32 +00001122 Int n = vreg_state[k]; /* k is the index of vregS */
1123 if (n == INVALID_RREG_NO) {
1124 /* vregS is not currently in a real register. So we can't
1125 do the coalescing. Give up. */
sewardj432b1c92004-10-30 13:00:55 +00001126 goto cannot_coalesce;
sewardja5b50222015-03-26 07:18:32 +00001127 }
1128 vassert(IS_VALID_RREGNO(n));
sewardj432b1c92004-10-30 13:00:55 +00001129
1130 /* Finally, we can do the coalescing. It's trivial -- merely
1131 claim vregS's register for vregD. */
sewardja5b50222015-03-26 07:18:32 +00001132 rreg_state[n].vreg = vregD;
1133 vassert(IS_VALID_VREGNO(hregIndex(vregD)));
1134 vassert(IS_VALID_VREGNO(hregIndex(vregS)));
1135 vreg_state[hregIndex(vregD)] = toShort(n);
1136 vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
sewardj432b1c92004-10-30 13:00:55 +00001137
sewardj607ca2d2007-08-25 21:11:33 +00001138 /* This rreg has become associated with a different vreg and
1139 hence with a different spill slot. Play safe. */
sewardja5b50222015-03-26 07:18:32 +00001140 rreg_state[n].eq_spill_slot = False;
sewardj607ca2d2007-08-25 21:11:33 +00001141
sewardj432b1c92004-10-30 13:00:55 +00001142 /* Move on to the next insn. We skip the post-insn stuff for
1143 fixed registers, since this move should not interact with
1144 them in any way. */
1145 continue;
1146 }
1147 cannot_coalesce:
1148
1149 /* ------ Free up rregs bound to dead vregs ------ */
1150
1151 /* Look for vregs whose live range has just ended, and
1152 mark the associated rreg as free. */
1153
sewardja5b50222015-03-26 07:18:32 +00001154 for (Int j = 0; j < n_rregs; j++) {
sewardjee7d2282005-07-04 09:38:58 +00001155 if (rreg_state[j].disp != Bound)
sewardj432b1c92004-10-30 13:00:55 +00001156 continue;
sewardja5b50222015-03-26 07:18:32 +00001157 UInt vregno = hregIndex(rreg_state[j].vreg);
florianc17ce022013-01-24 16:18:48 +00001158 vassert(IS_VALID_VREGNO(vregno));
1159 if (vreg_lrs[vregno].dead_before <= ii) {
sewardjee7d2282005-07-04 09:38:58 +00001160 rreg_state[j].disp = Free;
sewardj607ca2d2007-08-25 21:11:33 +00001161 rreg_state[j].eq_spill_slot = False;
sewardja5b50222015-03-26 07:18:32 +00001162 Int m = hregIndex(rreg_state[j].vreg);
sewardjee7d2282005-07-04 09:38:58 +00001163 vassert(IS_VALID_VREGNO(m));
1164 vreg_state[m] = INVALID_RREG_NO;
sewardj432b1c92004-10-30 13:00:55 +00001165 if (DEBUG_REGALLOC) {
1166 vex_printf("free up ");
sewardja5b50222015-03-26 07:18:32 +00001167 (*ppReg)(univ->regs[j]);
sewardj432b1c92004-10-30 13:00:55 +00001168 vex_printf("\n");
1169 }
1170 }
1171 }
1172
1173 /* ------ Pre-instruction actions for fixed rreg uses ------ */
1174
1175 /* Now we have to deal with rregs which are about to be made
1176 live by this instruction -- in other words, are entering into
1177 one of their live ranges. If any such rreg holds a vreg, we
1178 will have to free up the rreg. The simplest solution which
1179 is correct is to spill the rreg.
1180
1181 Note we could do better:
1182 * Could move it into some other free rreg, if one is available
1183
sewardjc3bc60a2006-12-01 02:59:17 +00001184 Do this efficiently, by incrementally stepping along an array
1185 of rreg HLRs that are known to be sorted by start point
1186 (their .live_after field).
sewardj432b1c92004-10-30 13:00:55 +00001187 */
sewardjc3bc60a2006-12-01 02:59:17 +00001188 while (True) {
1189 vassert(rreg_lrs_la_next >= 0);
1190 vassert(rreg_lrs_la_next <= rreg_lrs_used);
1191 if (rreg_lrs_la_next == rreg_lrs_used)
1192 break; /* no more real reg live ranges to consider */
1193 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1194 break; /* next live range does not yet start */
1195 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1196 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1197 Find the associated rreg_state entry. */
1198 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1199 Real register live ranges are guaranteed to be well-formed
1200 in that they start with a write to the register -- Stage 2
1201 rejects any code not satisfying this. So the correct
1202 question to ask is whether
1203 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1204 whether the reg becomes live after this insn -- rather
1205 than before it. */
sewardja5b50222015-03-26 07:18:32 +00001206 if (DEBUG_REGALLOC) {
1207 vex_printf("need to free up rreg: ");
1208 (*ppReg)(rreg_lrs_la[rreg_lrs_la_next].rreg);
1209 vex_printf("\n\n");
1210 }
1211 Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1212
sewardjc3bc60a2006-12-01 02:59:17 +00001213 /* If this fails, we don't have an entry for this rreg.
1214 Which we should. */
1215 vassert(IS_VALID_RREGNO(k));
sewardja5b50222015-03-26 07:18:32 +00001216 Int m = hregIndex(rreg_state[k].vreg);
sewardjc3bc60a2006-12-01 02:59:17 +00001217 if (rreg_state[k].disp == Bound) {
1218 /* Yes, there is an associated vreg. Spill it if it's
1219 still live. */
1220 vassert(IS_VALID_VREGNO(m));
1221 vreg_state[m] = INVALID_RREG_NO;
1222 if (vreg_lrs[m].dead_before > ii) {
1223 vassert(vreg_lrs[m].reg_class != HRcINVALID);
sewardj607ca2d2007-08-25 21:11:33 +00001224 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
sewardj6c299f32009-12-31 18:00:12 +00001225 HInstr* spill1 = NULL;
1226 HInstr* spill2 = NULL;
sewardja5b50222015-03-26 07:18:32 +00001227 (*genSpill)( &spill1, &spill2, univ->regs[k],
sewardj6c299f32009-12-31 18:00:12 +00001228 vreg_lrs[m].spill_offset, mode64 );
1229 vassert(spill1 || spill2); /* can't both be NULL */
1230 if (spill1)
1231 EMIT_INSTR(spill1);
1232 if (spill2)
1233 EMIT_INSTR(spill2);
sewardj607ca2d2007-08-25 21:11:33 +00001234 }
1235 rreg_state[k].eq_spill_slot = True;
sewardj432b1c92004-10-30 13:00:55 +00001236 }
sewardj432b1c92004-10-30 13:00:55 +00001237 }
sewardjc3bc60a2006-12-01 02:59:17 +00001238 rreg_state[k].disp = Unavail;
1239 rreg_state[k].vreg = INVALID_HREG;
sewardj607ca2d2007-08-25 21:11:33 +00001240 rreg_state[k].eq_spill_slot = False;
sewardjc3bc60a2006-12-01 02:59:17 +00001241
1242 /* check for further rregs entering HLRs at this point */
1243 rreg_lrs_la_next++;
sewardj432b1c92004-10-30 13:00:55 +00001244 }
1245
sewardja5b50222015-03-26 07:18:32 +00001246 if (DEBUG_REGALLOC) {
1247 vex_printf("After pre-insn actions for fixed regs:\n");
1248 PRINT_STATE;
1249 vex_printf("\n");
1250 }
sewardj432b1c92004-10-30 13:00:55 +00001251
1252 /* ------ Deal with the current instruction. ------ */
1253
1254 /* Finally we can begin the processing of this instruction
1255 itself. The aim is to free up enough rregs for this insn.
1256 This may generate spill stores since we may have to evict
1257 some vregs currently in rregs. Also generates spill loads.
1258 We also build up the final vreg->rreg mapping to be applied
1259 to the insn. */
sewardj432b1c92004-10-30 13:00:55 +00001260
1261 initHRegRemap(&remap);
1262
sewardjfb7373a2007-08-25 21:29:03 +00001263 /* ------------ BEGIN directReload optimisation ----------- */
1264
1265 /* If the instruction reads exactly one vreg which is currently
1266 in a spill slot, and this is last use of that vreg, see if we
sewardja5b50222015-03-26 07:18:32 +00001267 can convert the instruction into one that reads directly from
1268 the spill slot. This is clearly only possible for x86 and
1269 amd64 targets, since ppc and arm are load-store
1270 architectures. If successful, replace instrs_in->arr[ii]
1271 with this new instruction, and recompute its reg usage, so
1272 that the change is invisible to the standard-case handling
1273 that follows. */
sewardjfb7373a2007-08-25 21:29:03 +00001274
sewardja5b50222015-03-26 07:18:32 +00001275 if (directReload && reg_usage_arr[ii].n_vRegs <= 2) {
1276 Bool debug_direct_reload = False;
sewardjfb7373a2007-08-25 21:29:03 +00001277 HReg cand = INVALID_HREG;
1278 Bool nreads = 0;
1279 Short spilloff = 0;
1280
sewardja5b50222015-03-26 07:18:32 +00001281 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
sewardjfb7373a2007-08-25 21:29:03 +00001282
sewardja5b50222015-03-26 07:18:32 +00001283 HReg vreg = reg_usage_arr[ii].vRegs[j];
1284 vassert(hregIsVirtual(vreg));
sewardjfb7373a2007-08-25 21:29:03 +00001285
sewardja5b50222015-03-26 07:18:32 +00001286 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
sewardjfb7373a2007-08-25 21:29:03 +00001287 nreads++;
sewardja5b50222015-03-26 07:18:32 +00001288 Int m = hregIndex(vreg);
sewardjfb7373a2007-08-25 21:29:03 +00001289 vassert(IS_VALID_VREGNO(m));
sewardja5b50222015-03-26 07:18:32 +00001290 Int k = vreg_state[m];
sewardjfb7373a2007-08-25 21:29:03 +00001291 if (!IS_VALID_RREGNO(k)) {
1292 /* ok, it is spilled. Now, is this its last use? */
1293 vassert(vreg_lrs[m].dead_before >= ii+1);
1294 if (vreg_lrs[m].dead_before == ii+1
florian79efdc62013-02-11 00:47:35 +00001295 && hregIsInvalid(cand)) {
sewardjfb7373a2007-08-25 21:29:03 +00001296 spilloff = vreg_lrs[m].spill_offset;
1297 cand = vreg;
1298 }
1299 }
1300 }
1301 }
1302
florian79efdc62013-02-11 00:47:35 +00001303 if (nreads == 1 && ! hregIsInvalid(cand)) {
sewardjfb7373a2007-08-25 21:29:03 +00001304 HInstr* reloaded;
sewardja5b50222015-03-26 07:18:32 +00001305 if (reg_usage_arr[ii].n_vRegs == 2)
1306 vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1307 reg_usage_arr[ii].vRegs[1]));
sewardjfb7373a2007-08-25 21:29:03 +00001308
1309 reloaded = directReload ( instrs_in->arr[ii], cand, spilloff );
1310 if (debug_direct_reload && !reloaded) {
1311 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1312 ppInstr(instrs_in->arr[ii], mode64);
1313 }
1314 if (reloaded) {
1315 /* Update info about the insn, so it looks as if it had
1316 been in this form all along. */
1317 instrs_in->arr[ii] = reloaded;
sewardja5b50222015-03-26 07:18:32 +00001318 (*getRegUsage)( &reg_usage_arr[ii], instrs_in->arr[ii], mode64 );
sewardjfb7373a2007-08-25 21:29:03 +00001319 if (debug_direct_reload && !reloaded) {
1320 vex_printf(" --> ");
1321 ppInstr(reloaded, mode64);
1322 }
1323 }
1324
1325 if (debug_direct_reload && !reloaded)
1326 vex_printf("\n");
1327 }
1328
1329 }
1330
1331 /* ------------ END directReload optimisation ------------ */
1332
sewardja5b50222015-03-26 07:18:32 +00001333 /* for each virtual reg mentioned in the insn ... */
1334 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
sewardj432b1c92004-10-30 13:00:55 +00001335
sewardja5b50222015-03-26 07:18:32 +00001336 HReg vreg = reg_usage_arr[ii].vRegs[j];
1337 vassert(hregIsVirtual(vreg));
sewardj432b1c92004-10-30 13:00:55 +00001338
sewardja5b50222015-03-26 07:18:32 +00001339 if (0) {
1340 vex_printf("considering "); (*ppReg)(vreg); vex_printf("\n");
1341 }
sewardj432b1c92004-10-30 13:00:55 +00001342
1343 /* Now we're trying to find a rreg for "vreg". First of all,
1344 if it already has an rreg assigned, we don't need to do
sewardja5b50222015-03-26 07:18:32 +00001345 anything more. Inspect the current state to find out. */
1346 Int m = hregIndex(vreg);
sewardjee7d2282005-07-04 09:38:58 +00001347 vassert(IS_VALID_VREGNO(m));
sewardja5b50222015-03-26 07:18:32 +00001348 Int n = vreg_state[m];
1349 if (IS_VALID_RREGNO(n)) {
1350 vassert(rreg_state[n].disp == Bound);
1351 addToHRegRemap(&remap, vreg, univ->regs[n]);
sewardj607ca2d2007-08-25 21:11:33 +00001352 /* If this rreg is written or modified, mark it as different
1353 from any spill slot value. */
sewardja5b50222015-03-26 07:18:32 +00001354 if (reg_usage_arr[ii].vMode[j] != HRmRead)
1355 rreg_state[n].eq_spill_slot = False;
sewardj432b1c92004-10-30 13:00:55 +00001356 continue;
sewardjee7d2282005-07-04 09:38:58 +00001357 } else {
sewardja5b50222015-03-26 07:18:32 +00001358 vassert(n == INVALID_RREG_NO);
sewardj432b1c92004-10-30 13:00:55 +00001359 }
1360
1361 /* No luck. The next thing to do is see if there is a
1362 currently free rreg available, of the correct class. If
1363 so, bag it. NOTE, we could improve this by selecting an
1364 rreg for which the next live-range event is as far ahead
1365 as possible. */
sewardja5b50222015-03-26 07:18:32 +00001366 Int k_suboptimal = -1;
1367 Int k;
sewardjee7d2282005-07-04 09:38:58 +00001368 for (k = 0; k < n_rregs; k++) {
1369 if (rreg_state[k].disp != Free
sewardja5b50222015-03-26 07:18:32 +00001370 || hregClass(univ->regs[k]) != hregClass(vreg))
sewardj432b1c92004-10-30 13:00:55 +00001371 continue;
sewardjee7d2282005-07-04 09:38:58 +00001372 if (rreg_state[k].has_hlrs) {
sewardj432b1c92004-10-30 13:00:55 +00001373 /* Well, at least we can use k_suboptimal if we really
1374 have to. Keep on looking for a better candidate. */
1375 k_suboptimal = k;
1376 } else {
1377 /* Found a preferable reg. Use it. */
1378 k_suboptimal = -1;
1379 break;
1380 }
1381 }
1382 if (k_suboptimal >= 0)
1383 k = k_suboptimal;
1384
sewardjee7d2282005-07-04 09:38:58 +00001385 if (k < n_rregs) {
1386 rreg_state[k].disp = Bound;
1387 rreg_state[k].vreg = vreg;
sewardja5b50222015-03-26 07:18:32 +00001388 Int p = hregIndex(vreg);
1389 vassert(IS_VALID_VREGNO(p));
1390 vreg_state[p] = toShort(k);
1391 addToHRegRemap(&remap, vreg, univ->regs[k]);
sewardj607ca2d2007-08-25 21:11:33 +00001392 /* Generate a reload if needed. This only creates needed
1393 reloads because the live range builder for vregs will
1394 guarantee that the first event for a vreg is a write.
1395 Hence, if this reference is not a write, it cannot be
1396 the first reference for this vreg, and so a reload is
1397 indeed needed. */
sewardja5b50222015-03-26 07:18:32 +00001398 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1399 vassert(vreg_lrs[p].reg_class != HRcINVALID);
sewardj6c299f32009-12-31 18:00:12 +00001400 HInstr* reload1 = NULL;
1401 HInstr* reload2 = NULL;
sewardja5b50222015-03-26 07:18:32 +00001402 (*genReload)( &reload1, &reload2, univ->regs[k],
1403 vreg_lrs[p].spill_offset, mode64 );
sewardj6c299f32009-12-31 18:00:12 +00001404 vassert(reload1 || reload2); /* can't both be NULL */
1405 if (reload1)
1406 EMIT_INSTR(reload1);
1407 if (reload2)
1408 EMIT_INSTR(reload2);
sewardj7342b1b2008-05-30 22:58:07 +00001409 /* This rreg is read or modified by the instruction.
1410 If it's merely read we can claim it now equals the
1411 spill slot, but not so if it is modified. */
sewardja5b50222015-03-26 07:18:32 +00001412 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
sewardj7342b1b2008-05-30 22:58:07 +00001413 rreg_state[k].eq_spill_slot = True;
1414 } else {
sewardja5b50222015-03-26 07:18:32 +00001415 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
sewardj7342b1b2008-05-30 22:58:07 +00001416 rreg_state[k].eq_spill_slot = False;
1417 }
sewardj607ca2d2007-08-25 21:11:33 +00001418 } else {
1419 rreg_state[k].eq_spill_slot = False;
sewardj432b1c92004-10-30 13:00:55 +00001420 }
sewardj607ca2d2007-08-25 21:11:33 +00001421
sewardj432b1c92004-10-30 13:00:55 +00001422 continue;
1423 }
1424
1425 /* Well, now we have no option but to spill a vreg. It's
1426 important to make a good choice of vreg to spill, and of
1427 course we need to be careful not to spill a vreg which is
1428 needed by this insn. */
1429
sewardjee7d2282005-07-04 09:38:58 +00001430 /* First, mark in the rreg_state, those rregs which are not spill
sewardj432b1c92004-10-30 13:00:55 +00001431 candidates, due to holding a vreg mentioned by this
1432 instruction. Or being of the wrong class. */
sewardjee7d2282005-07-04 09:38:58 +00001433 for (k = 0; k < n_rregs; k++) {
1434 rreg_state[k].is_spill_cand = False;
1435 if (rreg_state[k].disp != Bound)
sewardj432b1c92004-10-30 13:00:55 +00001436 continue;
sewardja5b50222015-03-26 07:18:32 +00001437 if (hregClass(univ->regs[k]) != hregClass(vreg))
sewardj432b1c92004-10-30 13:00:55 +00001438 continue;
sewardjee7d2282005-07-04 09:38:58 +00001439 rreg_state[k].is_spill_cand = True;
sewardja5b50222015-03-26 07:18:32 +00001440 /* Note, the following loop visits only the virtual regs
1441 mentioned by the instruction. */
1442 for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1443 if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
sewardjee7d2282005-07-04 09:38:58 +00001444 rreg_state[k].is_spill_cand = False;
sewardj432b1c92004-10-30 13:00:55 +00001445 break;
1446 }
1447 }
1448 }
1449
1450 /* We can choose to spill any rreg satisfying
sewardjee7d2282005-07-04 09:38:58 +00001451 rreg_state[r].is_spill_cand (so to speak). Choose r so that
sewardj432b1c92004-10-30 13:00:55 +00001452 the next use of its associated vreg is as far ahead as
1453 possible, in the hope that this will minimise the number
1454 of consequent reloads required. */
sewardja5b50222015-03-26 07:18:32 +00001455 Int spillee
sewardj432b1c92004-10-30 13:00:55 +00001456 = findMostDistantlyMentionedVReg (
sewardja5b50222015-03-26 07:18:32 +00001457 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state, n_rregs );
sewardj432b1c92004-10-30 13:00:55 +00001458
1459 if (spillee == -1) {
1460 /* Hmmmmm. There don't appear to be any spill candidates.
1461 We're hosed. */
1462 vex_printf("reg_alloc: can't find a register in class: ");
1463 ppHRegClass(hregClass(vreg));
1464 vex_printf("\n");
1465 vpanic("reg_alloc: can't create a free register.");
1466 }
1467
sewardjee7d2282005-07-04 09:38:58 +00001468 /* Right. So we're going to spill rreg_state[spillee]. */
1469 vassert(IS_VALID_RREGNO(spillee));
1470 vassert(rreg_state[spillee].disp == Bound);
sewardj432b1c92004-10-30 13:00:55 +00001471 /* check it's the right class */
sewardja5b50222015-03-26 07:18:32 +00001472 vassert(hregClass(univ->regs[spillee]) == hregClass(vreg));
sewardj432b1c92004-10-30 13:00:55 +00001473 /* check we're not ejecting the vreg for which we are trying
1474 to free up a register. */
florian79efdc62013-02-11 00:47:35 +00001475 vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
sewardj432b1c92004-10-30 13:00:55 +00001476
sewardja5b50222015-03-26 07:18:32 +00001477 m = hregIndex(rreg_state[spillee].vreg);
sewardjee7d2282005-07-04 09:38:58 +00001478 vassert(IS_VALID_VREGNO(m));
sewardj432b1c92004-10-30 13:00:55 +00001479
1480 /* So here's the spill store. Assert that we're spilling a
1481 live vreg. */
sewardjad572dd2004-12-30 01:44:51 +00001482 vassert(vreg_lrs[m].dead_before > ii);
1483 vassert(vreg_lrs[m].reg_class != HRcINVALID);
sewardj607ca2d2007-08-25 21:11:33 +00001484 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
sewardj6c299f32009-12-31 18:00:12 +00001485 HInstr* spill1 = NULL;
1486 HInstr* spill2 = NULL;
sewardja5b50222015-03-26 07:18:32 +00001487 (*genSpill)( &spill1, &spill2, univ->regs[spillee],
sewardj6c299f32009-12-31 18:00:12 +00001488 vreg_lrs[m].spill_offset, mode64 );
1489 vassert(spill1 || spill2); /* can't both be NULL */
1490 if (spill1)
1491 EMIT_INSTR(spill1);
1492 if (spill2)
1493 EMIT_INSTR(spill2);
sewardj607ca2d2007-08-25 21:11:33 +00001494 }
sewardj432b1c92004-10-30 13:00:55 +00001495
sewardjee7d2282005-07-04 09:38:58 +00001496 /* Update the rreg_state to reflect the new assignment for this
sewardj432b1c92004-10-30 13:00:55 +00001497 rreg. */
sewardjee7d2282005-07-04 09:38:58 +00001498 rreg_state[spillee].vreg = vreg;
1499 vreg_state[m] = INVALID_RREG_NO;
1500
sewardj607ca2d2007-08-25 21:11:33 +00001501 rreg_state[spillee].eq_spill_slot = False; /* be safe */
1502
sewardja5b50222015-03-26 07:18:32 +00001503 m = hregIndex(vreg);
sewardjee7d2282005-07-04 09:38:58 +00001504 vassert(IS_VALID_VREGNO(m));
sewardjd4d3dd62005-07-04 10:08:24 +00001505 vreg_state[m] = toShort(spillee);
sewardj432b1c92004-10-30 13:00:55 +00001506
1507 /* Now, if this vreg is being read or modified (as opposed to
1508 written), we have to generate a reload for it. */
sewardja5b50222015-03-26 07:18:32 +00001509 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
sewardjad572dd2004-12-30 01:44:51 +00001510 vassert(vreg_lrs[m].reg_class != HRcINVALID);
sewardj6c299f32009-12-31 18:00:12 +00001511 HInstr* reload1 = NULL;
1512 HInstr* reload2 = NULL;
sewardja5b50222015-03-26 07:18:32 +00001513 (*genReload)( &reload1, &reload2, univ->regs[spillee],
sewardj6c299f32009-12-31 18:00:12 +00001514 vreg_lrs[m].spill_offset, mode64 );
1515 vassert(reload1 || reload2); /* can't both be NULL */
1516 if (reload1)
1517 EMIT_INSTR(reload1);
1518 if (reload2)
1519 EMIT_INSTR(reload2);
sewardj7342b1b2008-05-30 22:58:07 +00001520 /* This rreg is read or modified by the instruction.
1521 If it's merely read we can claim it now equals the
1522 spill slot, but not so if it is modified. */
sewardja5b50222015-03-26 07:18:32 +00001523 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
sewardj7342b1b2008-05-30 22:58:07 +00001524 rreg_state[spillee].eq_spill_slot = True;
1525 } else {
sewardja5b50222015-03-26 07:18:32 +00001526 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
sewardj7342b1b2008-05-30 22:58:07 +00001527 rreg_state[spillee].eq_spill_slot = False;
1528 }
sewardj432b1c92004-10-30 13:00:55 +00001529 }
1530
1531 /* So after much twisting and turning, we have vreg mapped to
sewardj7342b1b2008-05-30 22:58:07 +00001532 rreg_state[spillee].rreg. Note that in the map. */
sewardja5b50222015-03-26 07:18:32 +00001533 addToHRegRemap(&remap, vreg, univ->regs[spillee]);
sewardj432b1c92004-10-30 13:00:55 +00001534
sewardja5b50222015-03-26 07:18:32 +00001535 } /* iterate over virtual registers in this instruction. */
sewardj432b1c92004-10-30 13:00:55 +00001536
1537 /* We've finished clowning around with registers in this instruction.
1538 Three results:
sewardjee7d2282005-07-04 09:38:58 +00001539 - the running rreg_state[] has been updated
sewardj432b1c92004-10-30 13:00:55 +00001540 - a suitable vreg->rreg mapping for this instruction has been
1541 constructed
1542 - spill and reload instructions may have been emitted.
1543
1544 The final step is to apply the mapping to the instruction,
1545 and emit that.
1546 */
1547
1548 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
cerion92b64362005-12-13 12:02:26 +00001549 (*mapRegs)( &remap, instrs_in->arr[ii], mode64 );
sewardj432b1c92004-10-30 13:00:55 +00001550 EMIT_INSTR( instrs_in->arr[ii] );
1551
sewardja5b50222015-03-26 07:18:32 +00001552 if (DEBUG_REGALLOC) {
1553 vex_printf("After dealing with current insn:\n");
1554 PRINT_STATE;
1555 vex_printf("\n");
1556 }
sewardj432b1c92004-10-30 13:00:55 +00001557
1558 /* ------ Post-instruction actions for fixed rreg uses ------ */
1559
1560 /* Now we need to check for rregs exiting fixed live ranges
1561 after this instruction, and if so mark them as free. */
sewardjc3bc60a2006-12-01 02:59:17 +00001562 while (True) {
1563 vassert(rreg_lrs_db_next >= 0);
1564 vassert(rreg_lrs_db_next <= rreg_lrs_used);
1565 if (rreg_lrs_db_next == rreg_lrs_used)
1566 break; /* no more real reg live ranges to consider */
1567 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1568 break; /* next live range does not yet start */
1569 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1570 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1571 range. Mark it as such in the main rreg_state array. */
sewardja5b50222015-03-26 07:18:32 +00001572 HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1573 vassert(!hregIsVirtual(reg));
1574 Int k = hregIndex(reg);
1575 vassert(IS_VALID_RREGNO(k));
sewardjc3bc60a2006-12-01 02:59:17 +00001576 vassert(rreg_state[k].disp == Unavail);
1577 rreg_state[k].disp = Free;
1578 rreg_state[k].vreg = INVALID_HREG;
sewardj607ca2d2007-08-25 21:11:33 +00001579 rreg_state[k].eq_spill_slot = False;
sewardjc3bc60a2006-12-01 02:59:17 +00001580
1581 /* check for further rregs leaving HLRs at this point */
1582 rreg_lrs_db_next++;
sewardj432b1c92004-10-30 13:00:55 +00001583 }
1584
sewardja5b50222015-03-26 07:18:32 +00001585 if (DEBUG_REGALLOC) {
1586 vex_printf("After post-insn actions for fixed regs:\n");
1587 PRINT_STATE;
1588 vex_printf("\n");
1589 }
sewardj432b1c92004-10-30 13:00:55 +00001590
1591 } /* iterate over insns */
1592
1593 /* ------ END: Process each insn in turn. ------ */
1594
sewardjee7d2282005-07-04 09:38:58 +00001595 /* free(rreg_state); */
sewardjad572dd2004-12-30 01:44:51 +00001596 /* free(rreg_lrs); */
1597 /* if (vreg_lrs) free(vreg_lrs); */
sewardj432b1c92004-10-30 13:00:55 +00001598
1599 /* Paranoia */
sewardjc3bc60a2006-12-01 02:59:17 +00001600 vassert(rreg_lrs_la_next == rreg_lrs_used);
1601 vassert(rreg_lrs_db_next == rreg_lrs_used);
1602
sewardj432b1c92004-10-30 13:00:55 +00001603 return instrs_out;
1604
1605# undef INVALID_INSTRNO
1606# undef EMIT_INSTR
1607# undef PRINT_STATE
1608}
1609
1610
1611
1612/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00001613/*--- host_reg_alloc2.c ---*/
sewardj432b1c92004-10-30 13:00:55 +00001614/*---------------------------------------------------------------*/