Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright © 2010 Intel Corporation |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| 20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| 21 | * IN THE SOFTWARE. |
| 22 | * |
| 23 | * Authors: |
| 24 | * Eric Anholt <eric@anholt.net> |
| 25 | * |
| 26 | */ |
| 27 | |
| 28 | /** @file register_allocate.c |
| 29 | * |
| 30 | * Graph-coloring register allocator. |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 31 | * |
| 32 | * The basic idea of graph coloring is to make a node in a graph for |
| 33 | * every thing that needs a register (color) number assigned, and make |
| 34 | * edges in the graph between nodes that interfere (can't be allocated |
| 35 | * to the same register at the same time). |
| 36 | * |
| 37 | * During the "simplify" process, any any node with fewer edges than |
| 38 | * there are registers means that that edge can get assigned a |
| 39 | * register regardless of what its neighbors choose, so that node is |
| 40 | * pushed on a stack and removed (with its edges) from the graph. |
| 41 | * That likely causes other nodes to become trivially colorable as well. |
| 42 | * |
| 43 | * Then during the "select" process, nodes are popped off of that |
| 44 | * stack, their edges restored, and assigned a color different from |
| 45 | * their neighbors. Because they were pushed on the stack only when |
| 46 | * they were trivially colorable, any color chosen won't interfere |
| 47 | * with the registers to be popped later. |
| 48 | * |
| 49 | * The downside to most graph coloring is that real hardware often has |
| 50 | * limitations, like registers that need to be allocated to a node in |
| 51 | * pairs, or aligned on some boundary. This implementation follows |
| 52 | * the paper "Retargetable Graph-Coloring Register Allocation for |
| 53 | * Irregular Architectures" by Johan Runeson and Sven-Olof Nyström. |
| 54 | * |
| 55 | * In this system, there are register classes each containing various |
| 56 | * registers, and registers may interfere with other registers. For |
| 57 | * example, one might have a class of base registers, and a class of |
| 58 | * aligned register pairs that would each interfere with their pair of |
| 59 | * the base registers. Each node has a register class it needs to be |
| 60 | * assigned to. Define p(B) to be the size of register class B, and |
| 61 | * q(B,C) to be the number of registers in B that the worst choice |
| 62 | * register in C could conflict with. Then, this system replaces the |
| 63 | * basic graph coloring test of "fewer edges from this node than there |
| 64 | * are registers" with "For this node of class B, the sum of q(B,C) |
| 65 | * for each neighbor node of class C is less than pB". |
| 66 | * |
| 67 | * A nice feature of the pq test is that q(B,C) can be computed once |
| 68 | * up front and stored in a 2-dimensional array, so that the cost of |
| 69 | * coloring a node is constant with the number of registers. We do |
| 70 | * this during ra_set_finalize(). |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 71 | */ |
| 72 | |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 73 | #include <stdbool.h> |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 74 | |
Eric Anholt | 517e01b | 2014-09-22 12:24:21 -0700 | [diff] [blame] | 75 | #include "ralloc.h" |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 76 | #include "main/imports.h" |
| 77 | #include "main/macros.h" |
| 78 | #include "main/mtypes.h" |
Eric Anholt | b53d035 | 2015-02-11 15:05:06 -0800 | [diff] [blame] | 79 | #include "util/bitset.h" |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 80 | #include "register_allocate.h" |
| 81 | |
Jan Vesely | bc18b48 | 2014-12-11 15:05:17 -0500 | [diff] [blame] | 82 | #define NO_REG ~0U |
Tom Stellard | e4a765a | 2011-03-26 22:56:08 -0700 | [diff] [blame] | 83 | |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 84 | struct ra_reg { |
Eric Anholt | 41097db | 2014-03-17 14:53:08 -0700 | [diff] [blame] | 85 | BITSET_WORD *conflicts; |
Eric Anholt | 754b9c5 | 2011-01-17 18:34:43 -0800 | [diff] [blame] | 86 | unsigned int *conflict_list; |
| 87 | unsigned int conflict_list_size; |
| 88 | unsigned int num_conflicts; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 89 | }; |
| 90 | |
| 91 | struct ra_regs { |
| 92 | struct ra_reg *regs; |
| 93 | unsigned int count; |
| 94 | |
| 95 | struct ra_class **classes; |
| 96 | unsigned int class_count; |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 97 | |
| 98 | bool round_robin; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 99 | }; |
| 100 | |
| 101 | struct ra_class { |
Kenneth Graunke | da1cce2 | 2014-02-21 19:50:15 -0800 | [diff] [blame] | 102 | /** |
| 103 | * Bitset indicating which registers belong to this class. |
| 104 | * |
| 105 | * (If bit N is set, then register N belongs to this class.) |
| 106 | */ |
| 107 | BITSET_WORD *regs; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 108 | |
| 109 | /** |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 110 | * p(B) in Runeson/Nyström paper. |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 111 | * |
| 112 | * This is "how many regs are in the set." |
| 113 | */ |
| 114 | unsigned int p; |
| 115 | |
| 116 | /** |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 117 | * q(B,C) (indexed by C, B is this register class) in |
| 118 | * Runeson/Nyström paper. This is "how many registers of B could |
| 119 | * the worst choice register from C conflict with". |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 120 | */ |
| 121 | unsigned int *q; |
| 122 | }; |
| 123 | |
| 124 | struct ra_node { |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 125 | /** @{ |
| 126 | * |
| 127 | * List of which nodes this node interferes with. This should be |
| 128 | * symmetric with the other node. |
| 129 | */ |
Eric Anholt | 11b8df0 | 2013-02-19 17:01:41 -0800 | [diff] [blame] | 130 | BITSET_WORD *adjacency; |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 131 | unsigned int *adjacency_list; |
Eric Anholt | 6aa3afb | 2013-02-19 16:46:41 -0800 | [diff] [blame] | 132 | unsigned int adjacency_list_size; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 133 | unsigned int adjacency_count; |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 134 | /** @} */ |
| 135 | |
| 136 | unsigned int class; |
| 137 | |
Tom Stellard | e4a765a | 2011-03-26 22:56:08 -0700 | [diff] [blame] | 138 | /* Register, if assigned, or NO_REG. */ |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 139 | unsigned int reg; |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 140 | |
| 141 | /** |
| 142 | * Set when the node is in the trivially colorable stack. When |
| 143 | * set, the adjacency to this node is ignored, to implement the |
| 144 | * "remove the edge from the graph" in simplification without |
| 145 | * having to actually modify the adjacency_list. |
| 146 | */ |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 147 | bool in_stack; |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 148 | |
Connor Abbott | 567e276 | 2014-07-31 18:57:21 -0700 | [diff] [blame] | 149 | /** |
| 150 | * The q total, as defined in the Runeson/Nyström paper, for all the |
| 151 | * interfering nodes not in the stack. |
| 152 | */ |
| 153 | unsigned int q_total; |
| 154 | |
Eric Anholt | 04e3f1d | 2011-04-24 13:44:32 -0700 | [diff] [blame] | 155 | /* For an implementation that needs register spilling, this is the |
| 156 | * approximate cost of spilling this node. |
| 157 | */ |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 158 | float spill_cost; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 159 | }; |
| 160 | |
| 161 | struct ra_graph { |
| 162 | struct ra_regs *regs; |
| 163 | /** |
| 164 | * the variables that need register allocation. |
| 165 | */ |
| 166 | struct ra_node *nodes; |
| 167 | unsigned int count; /**< count of nodes. */ |
| 168 | |
| 169 | unsigned int *stack; |
| 170 | unsigned int stack_count; |
Francisco Jerez | f80af89 | 2015-02-16 13:38:39 +0200 | [diff] [blame] | 171 | |
| 172 | /** |
| 173 | * Tracks the start of the set of optimistically-colored registers in the |
| 174 | * stack. |
| 175 | */ |
| 176 | unsigned int stack_optimistic_start; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 177 | }; |
| 178 | |
Eric Anholt | b972744 | 2012-01-12 12:51:34 -0800 | [diff] [blame] | 179 | /** |
| 180 | * Creates a set of registers for the allocator. |
| 181 | * |
| 182 | * mem_ctx is a ralloc context for the allocator. The reg set may be freed |
| 183 | * using ralloc_free(). |
| 184 | */ |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 185 | struct ra_regs * |
Jason Ekstrand | f01bdb0 | 2015-08-15 09:58:32 -0700 | [diff] [blame] | 186 | ra_alloc_reg_set(void *mem_ctx, unsigned int count, bool need_conflict_lists) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 187 | { |
| 188 | unsigned int i; |
| 189 | struct ra_regs *regs; |
| 190 | |
Eric Anholt | b972744 | 2012-01-12 12:51:34 -0800 | [diff] [blame] | 191 | regs = rzalloc(mem_ctx, struct ra_regs); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 192 | regs->count = count; |
Kenneth Graunke | d3073f5 | 2011-01-21 14:32:31 -0800 | [diff] [blame] | 193 | regs->regs = rzalloc_array(regs, struct ra_reg, count); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 194 | |
| 195 | for (i = 0; i < count; i++) { |
Eric Anholt | 41097db | 2014-03-17 14:53:08 -0700 | [diff] [blame] | 196 | regs->regs[i].conflicts = rzalloc_array(regs->regs, BITSET_WORD, |
| 197 | BITSET_WORDS(count)); |
| 198 | BITSET_SET(regs->regs[i].conflicts, i); |
Eric Anholt | 754b9c5 | 2011-01-17 18:34:43 -0800 | [diff] [blame] | 199 | |
Jason Ekstrand | f01bdb0 | 2015-08-15 09:58:32 -0700 | [diff] [blame] | 200 | if (need_conflict_lists) { |
| 201 | regs->regs[i].conflict_list = ralloc_array(regs->regs, |
| 202 | unsigned int, 4); |
| 203 | regs->regs[i].conflict_list_size = 4; |
| 204 | regs->regs[i].conflict_list[0] = i; |
| 205 | } else { |
| 206 | regs->regs[i].conflict_list = NULL; |
| 207 | regs->regs[i].conflict_list_size = 0; |
| 208 | } |
Eric Anholt | 754b9c5 | 2011-01-17 18:34:43 -0800 | [diff] [blame] | 209 | regs->regs[i].num_conflicts = 1; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 210 | } |
| 211 | |
| 212 | return regs; |
| 213 | } |
| 214 | |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 215 | /** |
| 216 | * The register allocator by default prefers to allocate low register numbers, |
| 217 | * since it was written for hardware (gen4/5 Intel) that is limited in its |
| 218 | * multithreadedness by the number of registers used in a given shader. |
| 219 | * |
| 220 | * However, for hardware without that restriction, densely packed register |
| 221 | * allocation can put serious constraints on instruction scheduling. This |
| 222 | * function tells the allocator to rotate around the registers if possible as |
| 223 | * it allocates the nodes. |
| 224 | */ |
| 225 | void |
| 226 | ra_set_allocate_round_robin(struct ra_regs *regs) |
| 227 | { |
| 228 | regs->round_robin = true; |
| 229 | } |
| 230 | |
Eric Anholt | 754b9c5 | 2011-01-17 18:34:43 -0800 | [diff] [blame] | 231 | static void |
| 232 | ra_add_conflict_list(struct ra_regs *regs, unsigned int r1, unsigned int r2) |
| 233 | { |
| 234 | struct ra_reg *reg1 = ®s->regs[r1]; |
| 235 | |
Jason Ekstrand | f01bdb0 | 2015-08-15 09:58:32 -0700 | [diff] [blame] | 236 | if (reg1->conflict_list) { |
| 237 | if (reg1->conflict_list_size == reg1->num_conflicts) { |
| 238 | reg1->conflict_list_size *= 2; |
| 239 | reg1->conflict_list = reralloc(regs->regs, reg1->conflict_list, |
| 240 | unsigned int, reg1->conflict_list_size); |
| 241 | } |
| 242 | reg1->conflict_list[reg1->num_conflicts++] = r2; |
Eric Anholt | 754b9c5 | 2011-01-17 18:34:43 -0800 | [diff] [blame] | 243 | } |
Eric Anholt | 41097db | 2014-03-17 14:53:08 -0700 | [diff] [blame] | 244 | BITSET_SET(reg1->conflicts, r2); |
Eric Anholt | 754b9c5 | 2011-01-17 18:34:43 -0800 | [diff] [blame] | 245 | } |
| 246 | |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 247 | void |
| 248 | ra_add_reg_conflict(struct ra_regs *regs, unsigned int r1, unsigned int r2) |
| 249 | { |
Eric Anholt | 41097db | 2014-03-17 14:53:08 -0700 | [diff] [blame] | 250 | if (!BITSET_TEST(regs->regs[r1].conflicts, r2)) { |
Eric Anholt | 754b9c5 | 2011-01-17 18:34:43 -0800 | [diff] [blame] | 251 | ra_add_conflict_list(regs, r1, r2); |
| 252 | ra_add_conflict_list(regs, r2, r1); |
| 253 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 254 | } |
| 255 | |
Eric Anholt | fa43477 | 2011-05-04 13:27:33 -0700 | [diff] [blame] | 256 | /** |
| 257 | * Adds a conflict between base_reg and reg, and also between reg and |
| 258 | * anything that base_reg conflicts with. |
| 259 | * |
| 260 | * This can simplify code for setting up multiple register classes |
| 261 | * which are aggregates of some base hardware registers, compared to |
| 262 | * explicitly using ra_add_reg_conflict. |
| 263 | */ |
| 264 | void |
| 265 | ra_add_transitive_reg_conflict(struct ra_regs *regs, |
Roland Scheidegger | 2b40a14 | 2015-08-19 04:17:49 +0200 | [diff] [blame] | 266 | unsigned int base_reg, unsigned int reg) |
Eric Anholt | fa43477 | 2011-05-04 13:27:33 -0700 | [diff] [blame] | 267 | { |
Jan Vesely | bc18b48 | 2014-12-11 15:05:17 -0500 | [diff] [blame] | 268 | unsigned int i; |
Eric Anholt | fa43477 | 2011-05-04 13:27:33 -0700 | [diff] [blame] | 269 | |
| 270 | ra_add_reg_conflict(regs, reg, base_reg); |
| 271 | |
| 272 | for (i = 0; i < regs->regs[base_reg].num_conflicts; i++) { |
| 273 | ra_add_reg_conflict(regs, reg, regs->regs[base_reg].conflict_list[i]); |
| 274 | } |
| 275 | } |
| 276 | |
Jason Ekstrand | 9b49284 | 2015-08-15 09:43:05 -0700 | [diff] [blame] | 277 | /** |
| 278 | * Makes every conflict on the given register transitive. In other words, |
| 279 | * every register that conflicts with r will now conflict with every other |
| 280 | * register conflicting with r. |
| 281 | * |
| 282 | * This can simplify code for setting up multiple register classes |
| 283 | * which are aggregates of some base hardware registers, compared to |
| 284 | * explicitly using ra_add_reg_conflict. |
| 285 | */ |
| 286 | void |
| 287 | ra_make_reg_conflicts_transitive(struct ra_regs *regs, unsigned int r) |
| 288 | { |
| 289 | struct ra_reg *reg = ®s->regs[r]; |
| 290 | BITSET_WORD tmp; |
| 291 | int c; |
| 292 | |
| 293 | BITSET_FOREACH_SET(c, tmp, reg->conflicts, regs->count) { |
| 294 | struct ra_reg *other = ®s->regs[c]; |
Roland Scheidegger | 2b40a14 | 2015-08-19 04:17:49 +0200 | [diff] [blame] | 295 | unsigned i; |
| 296 | for (i = 0; i < BITSET_WORDS(regs->count); i++) |
Jason Ekstrand | 9b49284 | 2015-08-15 09:43:05 -0700 | [diff] [blame] | 297 | other->conflicts[i] |= reg->conflicts[i]; |
| 298 | } |
| 299 | } |
| 300 | |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 301 | unsigned int |
| 302 | ra_alloc_reg_class(struct ra_regs *regs) |
| 303 | { |
| 304 | struct ra_class *class; |
| 305 | |
Kenneth Graunke | d3073f5 | 2011-01-21 14:32:31 -0800 | [diff] [blame] | 306 | regs->classes = reralloc(regs->regs, regs->classes, struct ra_class *, |
Roland Scheidegger | 2b40a14 | 2015-08-19 04:17:49 +0200 | [diff] [blame] | 307 | regs->class_count + 1); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 308 | |
Kenneth Graunke | d3073f5 | 2011-01-21 14:32:31 -0800 | [diff] [blame] | 309 | class = rzalloc(regs, struct ra_class); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 310 | regs->classes[regs->class_count] = class; |
| 311 | |
Kenneth Graunke | da1cce2 | 2014-02-21 19:50:15 -0800 | [diff] [blame] | 312 | class->regs = rzalloc_array(class, BITSET_WORD, BITSET_WORDS(regs->count)); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 313 | |
| 314 | return regs->class_count++; |
| 315 | } |
| 316 | |
| 317 | void |
| 318 | ra_class_add_reg(struct ra_regs *regs, unsigned int c, unsigned int r) |
| 319 | { |
| 320 | struct ra_class *class = regs->classes[c]; |
| 321 | |
Kenneth Graunke | da1cce2 | 2014-02-21 19:50:15 -0800 | [diff] [blame] | 322 | BITSET_SET(class->regs, r); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 323 | class->p++; |
| 324 | } |
| 325 | |
| 326 | /** |
Kenneth Graunke | 8d856c3 | 2014-02-21 19:31:44 -0800 | [diff] [blame] | 327 | * Returns true if the register belongs to the given class. |
| 328 | */ |
| 329 | static bool |
| 330 | reg_belongs_to_class(unsigned int r, struct ra_class *c) |
| 331 | { |
Kenneth Graunke | da1cce2 | 2014-02-21 19:50:15 -0800 | [diff] [blame] | 332 | return BITSET_TEST(c->regs, r); |
Kenneth Graunke | 8d856c3 | 2014-02-21 19:31:44 -0800 | [diff] [blame] | 333 | } |
| 334 | |
| 335 | /** |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 336 | * Must be called after all conflicts and register classes have been |
| 337 | * set up and before the register set is used for allocation. |
Tom Stellard | e0f64a8 | 2012-09-03 10:43:45 -0400 | [diff] [blame] | 338 | * To avoid costly q value computation, use the q_values paramater |
| 339 | * to pass precomputed q values to this function. |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 340 | */ |
| 341 | void |
Tom Stellard | e0f64a8 | 2012-09-03 10:43:45 -0400 | [diff] [blame] | 342 | ra_set_finalize(struct ra_regs *regs, unsigned int **q_values) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 343 | { |
| 344 | unsigned int b, c; |
| 345 | |
| 346 | for (b = 0; b < regs->class_count; b++) { |
Kenneth Graunke | d3073f5 | 2011-01-21 14:32:31 -0800 | [diff] [blame] | 347 | regs->classes[b]->q = ralloc_array(regs, unsigned int, regs->class_count); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 348 | } |
| 349 | |
Tom Stellard | e0f64a8 | 2012-09-03 10:43:45 -0400 | [diff] [blame] | 350 | if (q_values) { |
| 351 | for (b = 0; b < regs->class_count; b++) { |
| 352 | for (c = 0; c < regs->class_count; c++) { |
| 353 | regs->classes[b]->q[c] = q_values[b][c]; |
Roland Scheidegger | 2b40a14 | 2015-08-19 04:17:49 +0200 | [diff] [blame] | 354 | } |
Tom Stellard | e0f64a8 | 2012-09-03 10:43:45 -0400 | [diff] [blame] | 355 | } |
Jason Ekstrand | 7539ac7 | 2015-07-30 20:49:22 -0700 | [diff] [blame] | 356 | } else { |
| 357 | /* Compute, for each class B and C, how many regs of B an |
| 358 | * allocation to C could conflict with. |
| 359 | */ |
| 360 | for (b = 0; b < regs->class_count; b++) { |
| 361 | for (c = 0; c < regs->class_count; c++) { |
| 362 | unsigned int rc; |
| 363 | int max_conflicts = 0; |
Tom Stellard | e0f64a8 | 2012-09-03 10:43:45 -0400 | [diff] [blame] | 364 | |
Jason Ekstrand | 7539ac7 | 2015-07-30 20:49:22 -0700 | [diff] [blame] | 365 | for (rc = 0; rc < regs->count; rc++) { |
| 366 | int conflicts = 0; |
| 367 | unsigned int i; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 368 | |
Jason Ekstrand | 7539ac7 | 2015-07-30 20:49:22 -0700 | [diff] [blame] | 369 | if (!reg_belongs_to_class(rc, regs->classes[c])) |
| 370 | continue; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 371 | |
Jason Ekstrand | 7539ac7 | 2015-07-30 20:49:22 -0700 | [diff] [blame] | 372 | for (i = 0; i < regs->regs[rc].num_conflicts; i++) { |
| 373 | unsigned int rb = regs->regs[rc].conflict_list[i]; |
| 374 | if (reg_belongs_to_class(rb, regs->classes[b])) |
| 375 | conflicts++; |
| 376 | } |
| 377 | max_conflicts = MAX2(max_conflicts, conflicts); |
| 378 | } |
| 379 | regs->classes[b]->q[c] = max_conflicts; |
| 380 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 381 | } |
| 382 | } |
Jason Ekstrand | bdcc8f3 | 2015-07-30 20:53:04 -0700 | [diff] [blame] | 383 | |
| 384 | for (b = 0; b < regs->count; b++) { |
| 385 | ralloc_free(regs->regs[b].conflict_list); |
| 386 | regs->regs[b].conflict_list = NULL; |
| 387 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 388 | } |
| 389 | |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 390 | static void |
| 391 | ra_add_node_adjacency(struct ra_graph *g, unsigned int n1, unsigned int n2) |
| 392 | { |
Eric Anholt | 11b8df0 | 2013-02-19 17:01:41 -0800 | [diff] [blame] | 393 | BITSET_SET(g->nodes[n1].adjacency, n2); |
Eric Anholt | 6aa3afb | 2013-02-19 16:46:41 -0800 | [diff] [blame] | 394 | |
Connor Abbott | 567e276 | 2014-07-31 18:57:21 -0700 | [diff] [blame] | 395 | if (n1 != n2) { |
| 396 | int n1_class = g->nodes[n1].class; |
| 397 | int n2_class = g->nodes[n2].class; |
| 398 | g->nodes[n1].q_total += g->regs->classes[n1_class]->q[n2_class]; |
| 399 | } |
| 400 | |
Eric Anholt | 6aa3afb | 2013-02-19 16:46:41 -0800 | [diff] [blame] | 401 | if (g->nodes[n1].adjacency_count >= |
| 402 | g->nodes[n1].adjacency_list_size) { |
| 403 | g->nodes[n1].adjacency_list_size *= 2; |
| 404 | g->nodes[n1].adjacency_list = reralloc(g, g->nodes[n1].adjacency_list, |
| 405 | unsigned int, |
| 406 | g->nodes[n1].adjacency_list_size); |
| 407 | } |
| 408 | |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 409 | g->nodes[n1].adjacency_list[g->nodes[n1].adjacency_count] = n2; |
| 410 | g->nodes[n1].adjacency_count++; |
| 411 | } |
| 412 | |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 413 | struct ra_graph * |
| 414 | ra_alloc_interference_graph(struct ra_regs *regs, unsigned int count) |
| 415 | { |
| 416 | struct ra_graph *g; |
| 417 | unsigned int i; |
| 418 | |
Matt Turner | 2e007fd | 2014-11-20 23:46:03 -0800 | [diff] [blame] | 419 | g = rzalloc(NULL, struct ra_graph); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 420 | g->regs = regs; |
Kenneth Graunke | d3073f5 | 2011-01-21 14:32:31 -0800 | [diff] [blame] | 421 | g->nodes = rzalloc_array(g, struct ra_node, count); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 422 | g->count = count; |
| 423 | |
Kenneth Graunke | d3073f5 | 2011-01-21 14:32:31 -0800 | [diff] [blame] | 424 | g->stack = rzalloc_array(g, unsigned int, count); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 425 | |
| 426 | for (i = 0; i < count; i++) { |
Eric Anholt | df25b4f | 2013-04-04 09:47:03 -0700 | [diff] [blame] | 427 | int bitset_count = BITSET_WORDS(count); |
Eric Anholt | 11b8df0 | 2013-02-19 17:01:41 -0800 | [diff] [blame] | 428 | g->nodes[i].adjacency = rzalloc_array(g, BITSET_WORD, bitset_count); |
| 429 | |
Eric Anholt | 6aa3afb | 2013-02-19 16:46:41 -0800 | [diff] [blame] | 430 | g->nodes[i].adjacency_list_size = 4; |
| 431 | g->nodes[i].adjacency_list = |
| 432 | ralloc_array(g, unsigned int, g->nodes[i].adjacency_list_size); |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 433 | g->nodes[i].adjacency_count = 0; |
Connor Abbott | 567e276 | 2014-07-31 18:57:21 -0700 | [diff] [blame] | 434 | g->nodes[i].q_total = 0; |
Eric Anholt | 11b8df0 | 2013-02-19 17:01:41 -0800 | [diff] [blame] | 435 | |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 436 | ra_add_node_adjacency(g, i, i); |
Tom Stellard | e4a765a | 2011-03-26 22:56:08 -0700 | [diff] [blame] | 437 | g->nodes[i].reg = NO_REG; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 438 | } |
| 439 | |
| 440 | return g; |
| 441 | } |
| 442 | |
| 443 | void |
| 444 | ra_set_node_class(struct ra_graph *g, |
Roland Scheidegger | 2b40a14 | 2015-08-19 04:17:49 +0200 | [diff] [blame] | 445 | unsigned int n, unsigned int class) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 446 | { |
| 447 | g->nodes[n].class = class; |
| 448 | } |
| 449 | |
| 450 | void |
| 451 | ra_add_node_interference(struct ra_graph *g, |
Roland Scheidegger | 2b40a14 | 2015-08-19 04:17:49 +0200 | [diff] [blame] | 452 | unsigned int n1, unsigned int n2) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 453 | { |
Eric Anholt | 11b8df0 | 2013-02-19 17:01:41 -0800 | [diff] [blame] | 454 | if (!BITSET_TEST(g->nodes[n1].adjacency, n2)) { |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 455 | ra_add_node_adjacency(g, n1, n2); |
| 456 | ra_add_node_adjacency(g, n2, n1); |
| 457 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 458 | } |
| 459 | |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 460 | static bool |
| 461 | pq_test(struct ra_graph *g, unsigned int n) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 462 | { |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 463 | int n_class = g->nodes[n].class; |
| 464 | |
Connor Abbott | 567e276 | 2014-07-31 18:57:21 -0700 | [diff] [blame] | 465 | return g->nodes[n].q_total < g->regs->classes[n_class]->p; |
| 466 | } |
| 467 | |
| 468 | static void |
| 469 | decrement_q(struct ra_graph *g, unsigned int n) |
| 470 | { |
| 471 | unsigned int i; |
| 472 | int n_class = g->nodes[n].class; |
| 473 | |
| 474 | for (i = 0; i < g->nodes[n].adjacency_count; i++) { |
| 475 | unsigned int n2 = g->nodes[n].adjacency_list[i]; |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 476 | unsigned int n2_class = g->nodes[n2].class; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 477 | |
Eric Anholt | 7cf648d | 2011-01-18 00:19:48 -0800 | [diff] [blame] | 478 | if (n != n2 && !g->nodes[n2].in_stack) { |
Connor Abbott | 2828680 | 2014-09-05 20:59:32 -0400 | [diff] [blame] | 479 | assert(g->nodes[n2].q_total >= g->regs->classes[n2_class]->q[n_class]); |
Roland Scheidegger | 2b40a14 | 2015-08-19 04:17:49 +0200 | [diff] [blame] | 480 | g->nodes[n2].q_total -= g->regs->classes[n2_class]->q[n_class]; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 481 | } |
| 482 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 483 | } |
| 484 | |
| 485 | /** |
| 486 | * Simplifies the interference graph by pushing all |
| 487 | * trivially-colorable nodes into a stack of nodes to be colored, |
| 488 | * removing them from the graph, and rinsing and repeating. |
| 489 | * |
Connor Abbott | e78a01d | 2014-07-31 18:57:23 -0700 | [diff] [blame] | 490 | * If we encounter a case where we can't push any nodes on the stack, then |
| 491 | * we optimistically choose a node and push it on the stack. We heuristically |
| 492 | * push the node with the lowest total q value, since it has the fewest |
| 493 | * neighbors and therefore is most likely to be allocated. |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 494 | */ |
Connor Abbott | e78a01d | 2014-07-31 18:57:23 -0700 | [diff] [blame] | 495 | static void |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 496 | ra_simplify(struct ra_graph *g) |
| 497 | { |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 498 | bool progress = true; |
Francisco Jerez | f80af89 | 2015-02-16 13:38:39 +0200 | [diff] [blame] | 499 | unsigned int stack_optimistic_start = UINT_MAX; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 500 | int i; |
| 501 | |
| 502 | while (progress) { |
Connor Abbott | e78a01d | 2014-07-31 18:57:23 -0700 | [diff] [blame] | 503 | unsigned int best_optimistic_node = ~0; |
| 504 | unsigned int lowest_q_total = ~0; |
| 505 | |
Brian Paul | 088106f | 2014-08-14 08:44:06 -0600 | [diff] [blame] | 506 | progress = false; |
| 507 | |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 508 | for (i = g->count - 1; i >= 0; i--) { |
Tom Stellard | e4a765a | 2011-03-26 22:56:08 -0700 | [diff] [blame] | 509 | if (g->nodes[i].in_stack || g->nodes[i].reg != NO_REG) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 510 | continue; |
| 511 | |
| 512 | if (pq_test(g, i)) { |
Connor Abbott | 567e276 | 2014-07-31 18:57:21 -0700 | [diff] [blame] | 513 | decrement_q(g, i); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 514 | g->stack[g->stack_count] = i; |
| 515 | g->stack_count++; |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 516 | g->nodes[i].in_stack = true; |
| 517 | progress = true; |
Connor Abbott | e78a01d | 2014-07-31 18:57:23 -0700 | [diff] [blame] | 518 | } else { |
| 519 | unsigned int new_q_total = g->nodes[i].q_total; |
| 520 | if (new_q_total < lowest_q_total) { |
| 521 | best_optimistic_node = i; |
| 522 | lowest_q_total = new_q_total; |
| 523 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 524 | } |
| 525 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 526 | |
Jan Vesely | bc18b48 | 2014-12-11 15:05:17 -0500 | [diff] [blame] | 527 | if (!progress && best_optimistic_node != ~0U) { |
Francisco Jerez | f80af89 | 2015-02-16 13:38:39 +0200 | [diff] [blame] | 528 | if (stack_optimistic_start == UINT_MAX) |
| 529 | stack_optimistic_start = g->stack_count; |
| 530 | |
Connor Abbott | e78a01d | 2014-07-31 18:57:23 -0700 | [diff] [blame] | 531 | decrement_q(g, best_optimistic_node); |
| 532 | g->stack[g->stack_count] = best_optimistic_node; |
| 533 | g->stack_count++; |
| 534 | g->nodes[best_optimistic_node].in_stack = true; |
| 535 | progress = true; |
| 536 | } |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 537 | } |
Francisco Jerez | f80af89 | 2015-02-16 13:38:39 +0200 | [diff] [blame] | 538 | |
| 539 | g->stack_optimistic_start = stack_optimistic_start; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 540 | } |
| 541 | |
| 542 | /** |
| 543 | * Pops nodes from the stack back into the graph, coloring them with |
| 544 | * registers as they go. |
| 545 | * |
| 546 | * If all nodes were trivially colorable, then this must succeed. If |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 547 | * not (optimistic coloring), then it may return false; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 548 | */ |
Connor Abbott | 9a0b52e | 2014-07-31 18:57:20 -0700 | [diff] [blame] | 549 | static bool |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 550 | ra_select(struct ra_graph *g) |
| 551 | { |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 552 | int start_search_reg = 0; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 553 | |
| 554 | while (g->stack_count != 0) { |
Jan Vesely | bc18b48 | 2014-12-11 15:05:17 -0500 | [diff] [blame] | 555 | unsigned int i; |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 556 | unsigned int ri; |
| 557 | unsigned int r = -1; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 558 | int n = g->stack[g->stack_count - 1]; |
| 559 | struct ra_class *c = g->regs->classes[g->nodes[n].class]; |
| 560 | |
| 561 | /* Find the lowest-numbered reg which is not used by a member |
| 562 | * of the graph adjacent to us. |
| 563 | */ |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 564 | for (ri = 0; ri < g->regs->count; ri++) { |
| 565 | r = (start_search_reg + ri) % g->regs->count; |
Kenneth Graunke | 8d856c3 | 2014-02-21 19:31:44 -0800 | [diff] [blame] | 566 | if (!reg_belongs_to_class(r, c)) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 567 | continue; |
| 568 | |
| 569 | /* Check if any of our neighbors conflict with this register choice. */ |
Eric Anholt | 604022a | 2011-01-18 00:33:35 -0800 | [diff] [blame] | 570 | for (i = 0; i < g->nodes[n].adjacency_count; i++) { |
| 571 | unsigned int n2 = g->nodes[n].adjacency_list[i]; |
| 572 | |
| 573 | if (!g->nodes[n2].in_stack && |
Eric Anholt | 41097db | 2014-03-17 14:53:08 -0700 | [diff] [blame] | 574 | BITSET_TEST(g->regs->regs[r].conflicts, g->nodes[n2].reg)) { |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 575 | break; |
| 576 | } |
| 577 | } |
Eric Anholt | 604022a | 2011-01-18 00:33:35 -0800 | [diff] [blame] | 578 | if (i == g->nodes[n].adjacency_count) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 579 | break; |
| 580 | } |
Connor Abbott | 03f4084 | 2014-07-31 18:57:22 -0700 | [diff] [blame] | 581 | |
| 582 | /* set this to false even if we return here so that |
| 583 | * ra_get_best_spill_node() considers this node later. |
| 584 | */ |
| 585 | g->nodes[n].in_stack = false; |
| 586 | |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 587 | if (ri == g->regs->count) |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 588 | return false; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 589 | |
| 590 | g->nodes[n].reg = r; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 591 | g->stack_count--; |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 592 | |
Francisco Jerez | f80af89 | 2015-02-16 13:38:39 +0200 | [diff] [blame] | 593 | /* Rotate the starting point except for any nodes above the lowest |
| 594 | * optimistically colorable node. The likelihood that we will succeed |
| 595 | * at allocating optimistically colorable nodes is highly dependent on |
| 596 | * the way that the previous nodes popped off the stack are laid out. |
| 597 | * The round-robin strategy increases the fragmentation of the register |
| 598 | * file and decreases the number of nearby nodes assigned to the same |
| 599 | * color, what increases the likelihood of spilling with respect to the |
| 600 | * dense packing strategy. |
| 601 | */ |
| 602 | if (g->regs->round_robin && |
| 603 | g->stack_count - 1 <= g->stack_optimistic_start) |
Eric Anholt | b6e9b54 | 2012-11-30 16:34:09 -0800 | [diff] [blame] | 604 | start_search_reg = r + 1; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 605 | } |
| 606 | |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 607 | return true; |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 608 | } |
| 609 | |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 610 | bool |
Connor Abbott | 9a0b52e | 2014-07-31 18:57:20 -0700 | [diff] [blame] | 611 | ra_allocate(struct ra_graph *g) |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 612 | { |
Connor Abbott | e78a01d | 2014-07-31 18:57:23 -0700 | [diff] [blame] | 613 | ra_simplify(g); |
Eric Anholt | 9ff90b7 | 2010-09-27 12:34:33 -0700 | [diff] [blame] | 614 | return ra_select(g); |
| 615 | } |
| 616 | |
| 617 | unsigned int |
| 618 | ra_get_node_reg(struct ra_graph *g, unsigned int n) |
| 619 | { |
| 620 | return g->nodes[n].reg; |
| 621 | } |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 622 | |
Tom Stellard | e4a765a | 2011-03-26 22:56:08 -0700 | [diff] [blame] | 623 | /** |
| 624 | * Forces a node to a specific register. This can be used to avoid |
| 625 | * creating a register class containing one node when handling data |
| 626 | * that must live in a fixed location and is known to not conflict |
| 627 | * with other forced register assignment (as is common with shader |
| 628 | * input data). These nodes do not end up in the stack during |
| 629 | * ra_simplify(), and thus at ra_select() time it is as if they were |
| 630 | * the first popped off the stack and assigned their fixed locations. |
Tom Stellard | cfeb99c | 2012-09-03 08:23:02 -0400 | [diff] [blame] | 631 | * Nodes that use this function do not need to be assigned a register |
| 632 | * class. |
Tom Stellard | e4a765a | 2011-03-26 22:56:08 -0700 | [diff] [blame] | 633 | * |
| 634 | * Must be called before ra_simplify(). |
| 635 | */ |
| 636 | void |
| 637 | ra_set_node_reg(struct ra_graph *g, unsigned int n, unsigned int reg) |
| 638 | { |
| 639 | g->nodes[n].reg = reg; |
Kenneth Graunke | 786a647 | 2014-02-21 19:32:24 -0800 | [diff] [blame] | 640 | g->nodes[n].in_stack = false; |
Tom Stellard | e4a765a | 2011-03-26 22:56:08 -0700 | [diff] [blame] | 641 | } |
| 642 | |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 643 | static float |
| 644 | ra_get_spill_benefit(struct ra_graph *g, unsigned int n) |
| 645 | { |
Jan Vesely | bc18b48 | 2014-12-11 15:05:17 -0500 | [diff] [blame] | 646 | unsigned int j; |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 647 | float benefit = 0; |
| 648 | int n_class = g->nodes[n].class; |
| 649 | |
Eric Anholt | d5a53ad | 2011-01-18 01:08:51 -0800 | [diff] [blame] | 650 | /* Define the benefit of eliminating an interference between n, n2 |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 651 | * through spilling as q(C, B) / p(C). This is similar to the |
| 652 | * "count number of edges" approach of traditional graph coloring, |
| 653 | * but takes classes into account. |
| 654 | */ |
Eric Anholt | d5a53ad | 2011-01-18 01:08:51 -0800 | [diff] [blame] | 655 | for (j = 0; j < g->nodes[n].adjacency_count; j++) { |
| 656 | unsigned int n2 = g->nodes[n].adjacency_list[j]; |
| 657 | if (n != n2) { |
| 658 | unsigned int n2_class = g->nodes[n2].class; |
| 659 | benefit += ((float)g->regs->classes[n_class]->q[n2_class] / |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 660 | g->regs->classes[n_class]->p); |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 661 | } |
| 662 | } |
| 663 | |
| 664 | return benefit; |
| 665 | } |
| 666 | |
| 667 | /** |
| 668 | * Returns a node number to be spilled according to the cost/benefit using |
| 669 | * the pq test, or -1 if there are no spillable nodes. |
| 670 | */ |
| 671 | int |
| 672 | ra_get_best_spill_node(struct ra_graph *g) |
| 673 | { |
| 674 | unsigned int best_node = -1; |
Matt Turner | 2e177bc | 2013-04-02 13:38:07 -0700 | [diff] [blame] | 675 | float best_benefit = 0.0; |
Connor Abbott | 03f4084 | 2014-07-31 18:57:22 -0700 | [diff] [blame] | 676 | unsigned int n; |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 677 | |
Connor Abbott | 03f4084 | 2014-07-31 18:57:22 -0700 | [diff] [blame] | 678 | /* Consider any nodes that we colored successfully or the node we failed to |
| 679 | * color for spilling. When we failed to color a node in ra_select(), we |
| 680 | * only considered these nodes, so spilling any other ones would not result |
| 681 | * in us making progress. |
Eric Anholt | da00782 | 2013-06-06 13:21:21 -0700 | [diff] [blame] | 682 | */ |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 683 | for (n = 0; n < g->count; n++) { |
| 684 | float cost = g->nodes[n].spill_cost; |
Brian Paul | dd2499b | 2010-10-22 08:59:06 -0600 | [diff] [blame] | 685 | float benefit; |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 686 | |
Matt Turner | b568a5f | 2015-07-12 18:01:54 -0700 | [diff] [blame] | 687 | if (cost <= 0.0f) |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 688 | continue; |
| 689 | |
Paul Berry | 551c991 | 2012-09-28 14:21:38 -0700 | [diff] [blame] | 690 | if (g->nodes[n].in_stack) |
| 691 | continue; |
| 692 | |
Brian Paul | dd2499b | 2010-10-22 08:59:06 -0600 | [diff] [blame] | 693 | benefit = ra_get_spill_benefit(g, n); |
Eric Anholt | 99b2c85 | 2010-10-19 09:25:51 -0700 | [diff] [blame] | 694 | |
| 695 | if (benefit / cost > best_benefit) { |
| 696 | best_benefit = benefit / cost; |
| 697 | best_node = n; |
| 698 | } |
| 699 | } |
| 700 | |
| 701 | return best_node; |
| 702 | } |
| 703 | |
| 704 | /** |
| 705 | * Only nodes with a spill cost set (cost != 0.0) will be considered |
| 706 | * for register spilling. |
| 707 | */ |
| 708 | void |
| 709 | ra_set_node_spill_cost(struct ra_graph *g, unsigned int n, float cost) |
| 710 | { |
| 711 | g->nodes[n].spill_cost = cost; |
| 712 | } |