Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 1 | /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
| 2 | |
| 3 | /* |
| 4 | * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> |
| 5 | * |
| 6 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 7 | * copy of this software and associated documentation files (the "Software"), |
| 8 | * to deal in the Software without restriction, including without limitation |
| 9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 10 | * and/or sell copies of the Software, and to permit persons to whom the |
| 11 | * Software is furnished to do so, subject to the following conditions: |
| 12 | * |
| 13 | * The above copyright notice and this permission notice (including the next |
| 14 | * paragraph) shall be included in all copies or substantial portions of the |
| 15 | * Software. |
| 16 | * |
| 17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 23 | * SOFTWARE. |
| 24 | * |
| 25 | * Authors: |
| 26 | * Rob Clark <robclark@freedesktop.org> |
| 27 | */ |
| 28 | |
| 29 | #include "pipe/p_shader_tokens.h" |
| 30 | #include "util/u_math.h" |
| 31 | |
| 32 | #include "ir3.h" |
| 33 | #include "ir3_visitor.h" |
| 34 | |
| 35 | /* |
| 36 | * Register Assignment: |
| 37 | * |
| 38 | * NOTE: currently only works on a single basic block.. need to think |
| 39 | * about how multiple basic blocks are going to get scheduled. But |
| 40 | * I think I want to re-arrange how blocks work, ie. get rid of the |
| 41 | * block nesting thing.. |
| 42 | * |
| 43 | * NOTE: we could do register coalescing (eliminate moves) as part of |
| 44 | * the RA step.. OTOH I think we need to do scheduling before register |
| 45 | * assignment. And if we remove a mov that effects scheduling (unless |
| 46 | * we leave a placeholder nop, which seems lame), so I'm not really |
| 47 | * sure how practical this is to do both in a single stage. But OTOH |
| 48 | * I'm not really sure a sane way for the CP stage to realize when it |
| 49 | * cannot remove a mov due to multi-register constraints.. |
| 50 | * |
| 51 | */ |
| 52 | |
| 53 | struct ir3_ra_ctx { |
| 54 | struct ir3_block *block; |
| 55 | enum shader_t type; |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 56 | bool half_precision; |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 57 | bool frag_coord; |
| 58 | bool frag_face; |
Rob Clark | ee839cc | 2014-04-08 14:14:43 -0400 | [diff] [blame] | 59 | bool has_samp; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 60 | int cnt; |
| 61 | bool error; |
| 62 | }; |
| 63 | |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 64 | /* sorta ugly way to retrofit half-precision support.. rather than |
| 65 | * passing extra param around, just OR in a high bit. All the low |
| 66 | * value arithmetic (ie. +/- offset within a contiguous vec4, etc) |
| 67 | * will continue to work as long as you don't underflow (and that |
| 68 | * would go badly anyways). |
| 69 | */ |
| 70 | #define REG_HALF 0x8000 |
| 71 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 72 | struct ir3_ra_assignment { |
| 73 | int8_t off; /* offset of instruction dst within range */ |
| 74 | uint8_t num; /* number of components for the range */ |
| 75 | }; |
| 76 | |
| 77 | static void ra_assign(struct ir3_ra_ctx *ctx, |
| 78 | struct ir3_instruction *assigner, int num); |
| 79 | static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr); |
| 80 | |
| 81 | /* |
| 82 | * Register Allocation: |
| 83 | */ |
| 84 | |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 85 | #define REG(n, wm, f) (struct ir3_register){ \ |
| 86 | .flags = (f), \ |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 87 | .num = (n), \ |
| 88 | .wrmask = TGSI_WRITEMASK_ ## wm, \ |
| 89 | } |
| 90 | |
| 91 | /* check that the register exists, is a GPR and is not special (a0/p0) */ |
| 92 | static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n) |
| 93 | { |
| 94 | if ((n < instr->regs_count) && reg_gpr(instr->regs[n])) |
| 95 | return instr->regs[n]; |
| 96 | return NULL; |
| 97 | } |
| 98 | |
| 99 | static int output_base(struct ir3_ra_ctx *ctx) |
| 100 | { |
| 101 | /* ugg, for fragment shader we need to have input at r0.x |
| 102 | * (or at least if there is a way to configure it, I can't |
| 103 | * see how because the blob driver always uses r0.x (ie. |
| 104 | * all zeros) |
| 105 | */ |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 106 | if (ctx->type == SHADER_FRAGMENT) { |
| 107 | if (ctx->half_precision) |
Rob Clark | 83808a9 | 2014-03-29 14:32:38 -0400 | [diff] [blame] | 108 | return ctx->frag_face ? 4 : 3; |
| 109 | return ctx->frag_coord ? 8 : 4; |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 110 | } |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 111 | return 0; |
| 112 | } |
| 113 | |
| 114 | /* live means read before written */ |
| 115 | static void compute_liveregs(struct ir3_ra_ctx *ctx, |
| 116 | struct ir3_instruction *instr, regmask_t *liveregs) |
| 117 | { |
| 118 | struct ir3_block *block = instr->block; |
| 119 | regmask_t written; |
| 120 | unsigned i, j; |
| 121 | |
| 122 | regmask_init(liveregs); |
| 123 | regmask_init(&written); |
| 124 | |
| 125 | for (instr = instr->next; instr; instr = instr->next) { |
| 126 | struct ir3_register *r; |
| 127 | |
| 128 | if (is_meta(instr)) |
| 129 | continue; |
| 130 | |
| 131 | /* check first src's read: */ |
| 132 | for (j = 1; j < instr->regs_count; j++) { |
| 133 | r = reg_check(instr, j); |
| 134 | if (r) |
| 135 | regmask_set_if_not(liveregs, r, &written); |
| 136 | } |
| 137 | |
| 138 | /* then dst written (if assigned already): */ |
| 139 | if (instr->flags & IR3_INSTR_MARK) { |
| 140 | r = reg_check(instr, 0); |
| 141 | if (r) |
| 142 | regmask_set(&written, r); |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | /* be sure to account for output registers too: */ |
| 147 | for (i = 0; i < block->noutputs; i++) { |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 148 | struct ir3_register reg = REG(output_base(ctx) + i, X, 0); |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 149 | regmask_set_if_not(liveregs, ®, &written); |
| 150 | } |
| 151 | } |
| 152 | |
| 153 | /* calculate registers that are clobbered before last use of 'assigner'. |
| 154 | * This needs to be done backwards, although it could possibly be |
| 155 | * combined into compute_liveregs(). (Ie. compute_liveregs() could |
| 156 | * reverse the list, then do this part backwards reversing the list |
| 157 | * again back to original order.) Otoh, probably I should try to |
| 158 | * construct a proper interference graph instead. |
| 159 | * |
| 160 | * XXX this need to follow the same recursion path that is used for |
| 161 | * to rename/assign registers (ie. ra_assign_src()).. this is a bit |
| 162 | * ugly right now, maybe refactor into node iterator sort of things |
| 163 | * that iterates nodes in the correct order? |
| 164 | */ |
| 165 | static bool compute_clobbers(struct ir3_ra_ctx *ctx, |
| 166 | struct ir3_instruction *instr, struct ir3_instruction *assigner, |
| 167 | regmask_t *liveregs) |
| 168 | { |
| 169 | unsigned i; |
| 170 | bool live = false, was_live = false; |
| 171 | |
| 172 | if (instr == NULL) { |
| 173 | struct ir3_block *block = ctx->block; |
| 174 | |
| 175 | /* if at the end, check outputs: */ |
| 176 | for (i = 0; i < block->noutputs; i++) |
| 177 | if (block->outputs[i] == assigner) |
| 178 | return true; |
| 179 | return false; |
| 180 | } |
| 181 | |
| 182 | for (i = 1; i < instr->regs_count; i++) { |
| 183 | struct ir3_register *reg = instr->regs[i]; |
| 184 | if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) { |
| 185 | if (is_meta(instr)) { |
| 186 | switch (instr->opc) { |
| 187 | case OPC_META_INPUT: |
| 188 | // TODO |
| 189 | assert(0); |
| 190 | break; |
| 191 | case OPC_META_FO: |
| 192 | case OPC_META_FI: |
| 193 | was_live |= compute_clobbers(ctx, instr->next, |
| 194 | instr, liveregs); |
| 195 | break; |
| 196 | default: |
| 197 | break; |
| 198 | } |
| 199 | } |
| 200 | live = true; |
| 201 | break; |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs); |
| 206 | |
| 207 | if (was_live && (instr->regs_count > 0) && |
| 208 | (instr->flags & IR3_INSTR_MARK) && |
| 209 | !is_meta(instr)) |
| 210 | regmask_set(liveregs, instr->regs[0]); |
| 211 | |
| 212 | return live || was_live; |
| 213 | } |
| 214 | |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 215 | static int find_available(regmask_t *liveregs, int size, bool half) |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 216 | { |
| 217 | unsigned i; |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 218 | unsigned f = half ? IR3_REG_HALF : 0; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 219 | for (i = 0; i < MAX_REG - size; i++) { |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 220 | if (!regmask_get(liveregs, ®(i, X, f))) { |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 221 | unsigned start = i++; |
| 222 | for (; (i < MAX_REG) && ((i - start) < size); i++) |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 223 | if (regmask_get(liveregs, ®(i, X, f))) |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 224 | break; |
| 225 | if ((i - start) >= size) |
| 226 | return start; |
| 227 | } |
| 228 | } |
| 229 | assert(0); |
| 230 | return -1; |
| 231 | } |
| 232 | |
| 233 | static int alloc_block(struct ir3_ra_ctx *ctx, |
| 234 | struct ir3_instruction *instr, int size) |
| 235 | { |
| 236 | if (!instr) { |
| 237 | /* special case, allocating shader outputs. At this |
| 238 | * point, nothing is allocated, just start the shader |
| 239 | * outputs at r0.x and let compute_liveregs() take |
| 240 | * care of the rest from here: |
| 241 | */ |
| 242 | return 0; |
| 243 | } else { |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 244 | struct ir3_register *dst = instr->regs[0]; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 245 | regmask_t liveregs; |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 246 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 247 | compute_liveregs(ctx, instr, &liveregs); |
| 248 | |
| 249 | // XXX XXX XXX XXX XXX XXX XXX XXX XXX |
| 250 | // XXX hack.. maybe ra_calc should give us a list of |
| 251 | // instrs to compute_clobbers() on? |
| 252 | if (is_meta(instr) && (instr->opc == OPC_META_INPUT) && |
| 253 | (instr->regs_count == 1)) { |
| 254 | unsigned i, base = instr->regs[0]->num & ~0x3; |
| 255 | for (i = 0; i < 4; i++) { |
| 256 | struct ir3_instruction *in = ctx->block->inputs[base + i]; |
| 257 | if (in) |
| 258 | compute_clobbers(ctx, in->next, in, &liveregs); |
| 259 | } |
| 260 | } else |
| 261 | // XXX XXX XXX XXX XXX XXX XXX XXX XXX |
| 262 | compute_clobbers(ctx, instr->next, instr, &liveregs); |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 263 | |
| 264 | return find_available(&liveregs, size, |
| 265 | !!(dst->flags & IR3_REG_HALF)); |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 266 | } |
| 267 | } |
| 268 | |
| 269 | /* |
| 270 | * Constraint Calculation: |
| 271 | */ |
| 272 | |
| 273 | struct ra_calc_visitor { |
| 274 | struct ir3_visitor base; |
| 275 | struct ir3_ra_assignment a; |
| 276 | }; |
| 277 | |
| 278 | static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v) |
| 279 | { |
| 280 | return (struct ra_calc_visitor *)v; |
| 281 | } |
| 282 | |
| 283 | /* calculate register assignment for the instruction. If the register |
| 284 | * written by this instruction is required to be part of a range, to |
| 285 | * handle other (input/output/sam/bary.f/etc) contiguous register range |
| 286 | * constraints, that is calculated handled here. |
| 287 | */ |
| 288 | static void ra_calc_dst(struct ir3_visitor *v, |
| 289 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 290 | { |
| 291 | struct ra_calc_visitor *c = ra_calc_visitor(v); |
| 292 | if (is_tex(instr)) { |
| 293 | c->a.off = 0; |
| 294 | c->a.num = 4; |
| 295 | } else { |
| 296 | c->a.off = 0; |
| 297 | c->a.num = 1; |
| 298 | } |
| 299 | } |
| 300 | |
| 301 | static void |
| 302 | ra_calc_dst_shader_input(struct ir3_visitor *v, |
| 303 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 304 | { |
| 305 | struct ra_calc_visitor *c = ra_calc_visitor(v); |
| 306 | struct ir3_block *block = instr->block; |
| 307 | struct ir3_register *dst = instr->regs[0]; |
| 308 | unsigned base = dst->num & ~0x3; |
| 309 | unsigned i, num = 0; |
| 310 | |
| 311 | assert(!(dst->flags & IR3_REG_IA)); |
| 312 | |
| 313 | /* check what input components we need: */ |
| 314 | for (i = 0; i < 4; i++) { |
| 315 | unsigned idx = base + i; |
| 316 | if ((idx < block->ninputs) && block->inputs[idx]) |
| 317 | num = i + 1; |
| 318 | } |
| 319 | |
| 320 | c->a.off = dst->num - base; |
| 321 | c->a.num = num; |
| 322 | } |
| 323 | |
| 324 | static void ra_calc_src_fanin(struct ir3_visitor *v, |
| 325 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 326 | { |
| 327 | struct ra_calc_visitor *c = ra_calc_visitor(v); |
| 328 | unsigned srcn = ir3_instr_regno(instr, reg) - 1; |
Rob Clark | e8cca57 | 2014-02-16 07:41:59 -0500 | [diff] [blame] | 329 | c->a.off += srcn; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 330 | c->a.num += srcn; |
| 331 | c->a.num = MAX2(c->a.num, instr->regs_count - 1); |
| 332 | } |
| 333 | |
| 334 | static const struct ir3_visitor_funcs calc_visitor_funcs = { |
| 335 | .instr = ir3_visit_instr, |
| 336 | .dst_shader_input = ra_calc_dst_shader_input, |
| 337 | .dst_fanout = ra_calc_dst, |
| 338 | .dst_fanin = ra_calc_dst, |
| 339 | .dst = ra_calc_dst, |
| 340 | .src_fanout = ir3_visit_reg, |
| 341 | .src_fanin = ra_calc_src_fanin, |
| 342 | .src = ir3_visit_reg, |
| 343 | }; |
| 344 | |
| 345 | static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner) |
| 346 | { |
| 347 | struct ra_calc_visitor v = { |
| 348 | .base.funcs = &calc_visitor_funcs, |
| 349 | }; |
| 350 | |
| 351 | ir3_visit_instr(&v.base, assigner); |
| 352 | |
| 353 | return v.a; |
| 354 | } |
| 355 | |
| 356 | /* |
| 357 | * Register Assignment: |
| 358 | */ |
| 359 | |
| 360 | struct ra_assign_visitor { |
| 361 | struct ir3_visitor base; |
| 362 | struct ir3_ra_ctx *ctx; |
| 363 | int num; |
| 364 | }; |
| 365 | |
| 366 | static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v) |
| 367 | { |
| 368 | return (struct ra_assign_visitor *)v; |
| 369 | } |
| 370 | |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 371 | static type_t half_type(type_t type) |
| 372 | { |
| 373 | switch (type) { |
| 374 | case TYPE_F32: return TYPE_F16; |
| 375 | case TYPE_U32: return TYPE_U16; |
| 376 | case TYPE_S32: return TYPE_S16; |
| 377 | /* instructions may already be fixed up: */ |
| 378 | case TYPE_F16: |
| 379 | case TYPE_U16: |
| 380 | case TYPE_S16: |
| 381 | return type; |
| 382 | default: |
| 383 | assert(0); |
| 384 | return ~0; |
| 385 | } |
| 386 | } |
| 387 | |
| 388 | /* some instructions need fix-up if dst register is half precision: */ |
| 389 | static void fixup_half_instr_dst(struct ir3_instruction *instr) |
| 390 | { |
| 391 | switch (instr->category) { |
| 392 | case 1: /* move instructions */ |
| 393 | instr->cat1.dst_type = half_type(instr->cat1.dst_type); |
| 394 | break; |
| 395 | case 3: |
| 396 | switch (instr->opc) { |
| 397 | case OPC_MAD_F32: |
| 398 | instr->opc = OPC_MAD_F16; |
| 399 | break; |
| 400 | case OPC_SEL_B32: |
| 401 | instr->opc = OPC_SEL_B16; |
| 402 | break; |
| 403 | case OPC_SEL_S32: |
| 404 | instr->opc = OPC_SEL_S16; |
| 405 | break; |
| 406 | case OPC_SEL_F32: |
| 407 | instr->opc = OPC_SEL_F16; |
| 408 | break; |
| 409 | case OPC_SAD_S32: |
| 410 | instr->opc = OPC_SAD_S16; |
| 411 | break; |
| 412 | /* instructions may already be fixed up: */ |
| 413 | case OPC_MAD_F16: |
| 414 | case OPC_SEL_B16: |
| 415 | case OPC_SEL_S16: |
| 416 | case OPC_SEL_F16: |
| 417 | case OPC_SAD_S16: |
| 418 | break; |
| 419 | default: |
| 420 | assert(0); |
| 421 | break; |
| 422 | } |
| 423 | break; |
| 424 | case 5: |
| 425 | instr->cat5.type = half_type(instr->cat5.type); |
| 426 | break; |
| 427 | } |
| 428 | } |
| 429 | /* some instructions need fix-up if src register is half precision: */ |
| 430 | static void fixup_half_instr_src(struct ir3_instruction *instr) |
| 431 | { |
| 432 | switch (instr->category) { |
| 433 | case 1: /* move instructions */ |
| 434 | instr->cat1.src_type = half_type(instr->cat1.src_type); |
| 435 | break; |
| 436 | } |
| 437 | } |
| 438 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 439 | static void ra_assign_reg(struct ir3_visitor *v, |
| 440 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 441 | { |
| 442 | struct ra_assign_visitor *a = ra_assign_visitor(v); |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 443 | |
| 444 | if (is_flow(instr) && (instr->opc == OPC_KILL)) |
| 445 | return; |
| 446 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 447 | reg->flags &= ~IR3_REG_SSA; |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 448 | reg->num = a->num & ~REG_HALF; |
Rob Clark | ae5efaf | 2014-03-29 11:42:01 -0400 | [diff] [blame] | 449 | |
| 450 | assert(reg->num >= 0); |
| 451 | |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 452 | if (a->num & REG_HALF) { |
| 453 | reg->flags |= IR3_REG_HALF; |
| 454 | /* if dst reg being assigned, patch up the instr: */ |
| 455 | if (reg == instr->regs[0]) |
| 456 | fixup_half_instr_dst(instr); |
| 457 | else |
| 458 | fixup_half_instr_src(instr); |
| 459 | } |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 460 | } |
| 461 | |
| 462 | static void ra_assign_dst_shader_input(struct ir3_visitor *v, |
| 463 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 464 | { |
| 465 | struct ra_assign_visitor *a = ra_assign_visitor(v); |
| 466 | unsigned i, base = reg->num & ~0x3; |
| 467 | int off = base - reg->num; |
| 468 | |
| 469 | ra_assign_reg(v, instr, reg); |
| 470 | reg->flags |= IR3_REG_IA; |
| 471 | |
| 472 | /* trigger assignment of all our companion input components: */ |
| 473 | for (i = 0; i < 4; i++) { |
| 474 | struct ir3_instruction *in = instr->block->inputs[i+base]; |
| 475 | if (in && is_meta(in) && (in->opc == OPC_META_INPUT)) |
| 476 | ra_assign(a->ctx, in, a->num + off + i); |
| 477 | } |
| 478 | } |
| 479 | |
| 480 | static void ra_assign_dst_fanout(struct ir3_visitor *v, |
| 481 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 482 | { |
| 483 | struct ra_assign_visitor *a = ra_assign_visitor(v); |
| 484 | struct ir3_register *src = instr->regs[1]; |
| 485 | ra_assign_reg(v, instr, reg); |
| 486 | if (src->flags & IR3_REG_SSA) |
| 487 | ra_assign(a->ctx, src->instr, a->num - instr->fo.off); |
| 488 | } |
| 489 | |
| 490 | static void ra_assign_src_fanout(struct ir3_visitor *v, |
| 491 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 492 | { |
| 493 | struct ra_assign_visitor *a = ra_assign_visitor(v); |
| 494 | ra_assign_reg(v, instr, reg); |
| 495 | ra_assign(a->ctx, instr, a->num + instr->fo.off); |
| 496 | } |
| 497 | |
| 498 | |
| 499 | static void ra_assign_src_fanin(struct ir3_visitor *v, |
| 500 | struct ir3_instruction *instr, struct ir3_register *reg) |
| 501 | { |
| 502 | struct ra_assign_visitor *a = ra_assign_visitor(v); |
| 503 | unsigned j, srcn = ir3_instr_regno(instr, reg) - 1; |
| 504 | ra_assign_reg(v, instr, reg); |
| 505 | ra_assign(a->ctx, instr, a->num - srcn); |
| 506 | for (j = 1; j < instr->regs_count; j++) { |
| 507 | struct ir3_register *reg = instr->regs[j]; |
| 508 | if (reg->flags & IR3_REG_SSA) /* could be renamed already */ |
| 509 | ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1); |
| 510 | } |
| 511 | } |
| 512 | |
| 513 | static const struct ir3_visitor_funcs assign_visitor_funcs = { |
| 514 | .instr = ir3_visit_instr, |
| 515 | .dst_shader_input = ra_assign_dst_shader_input, |
| 516 | .dst_fanout = ra_assign_dst_fanout, |
| 517 | .dst_fanin = ra_assign_reg, |
| 518 | .dst = ra_assign_reg, |
| 519 | .src_fanout = ra_assign_src_fanout, |
| 520 | .src_fanin = ra_assign_src_fanin, |
| 521 | .src = ra_assign_reg, |
| 522 | }; |
| 523 | |
| 524 | static void ra_assign(struct ir3_ra_ctx *ctx, |
| 525 | struct ir3_instruction *assigner, int num) |
| 526 | { |
| 527 | struct ra_assign_visitor v = { |
| 528 | .base.funcs = &assign_visitor_funcs, |
| 529 | .ctx = ctx, |
| 530 | .num = num, |
| 531 | }; |
| 532 | |
| 533 | /* if we've already visited this instruction, bail now: */ |
| 534 | if (ir3_instr_check_mark(assigner)) { |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 535 | debug_assert(assigner->regs[0]->num == (num & ~REG_HALF)); |
| 536 | if (assigner->regs[0]->num != (num & ~REG_HALF)) { |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 537 | /* impossible situation, should have been resolved |
| 538 | * at an earlier stage by inserting extra mov's: |
| 539 | */ |
| 540 | ctx->error = true; |
| 541 | } |
| 542 | return; |
| 543 | } |
| 544 | |
| 545 | ir3_visit_instr(&v.base, assigner); |
| 546 | } |
| 547 | |
| 548 | /* |
| 549 | * |
| 550 | */ |
| 551 | |
| 552 | static void ir3_instr_ra(struct ir3_ra_ctx *ctx, |
| 553 | struct ir3_instruction *instr) |
| 554 | { |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 555 | struct ir3_register *dst; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 556 | unsigned num; |
| 557 | |
| 558 | /* skip over nop's */ |
| 559 | if (instr->regs_count == 0) |
| 560 | return; |
| 561 | |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 562 | dst = instr->regs[0]; |
Rob Clark | 579473f | 2014-02-16 07:35:20 -0500 | [diff] [blame] | 563 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 564 | /* if we've already visited this instruction, bail now: */ |
| 565 | if (instr->flags & IR3_INSTR_MARK) |
| 566 | return; |
| 567 | |
| 568 | /* allocate register(s): */ |
Rob Clark | 9f39132 | 2014-07-25 09:49:41 -0400 | [diff] [blame] | 569 | if (is_addr(instr)) { |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 570 | num = instr->regs[2]->num; |
| 571 | } else if (reg_gpr(dst)) { |
| 572 | struct ir3_ra_assignment a; |
| 573 | a = ra_calc(instr); |
| 574 | num = alloc_block(ctx, instr, a.num) + a.off; |
| 575 | } else if (dst->flags & IR3_REG_ADDR) { |
| 576 | dst->flags &= ~IR3_REG_ADDR; |
| 577 | num = regid(REG_A0, 0) | REG_HALF; |
| 578 | } else { |
Rob Clark | 2f181bc | 2014-07-23 15:08:40 -0400 | [diff] [blame] | 579 | /* predicate register (p0).. etc */ |
| 580 | return; |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 581 | } |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 582 | |
| 583 | ra_assign(ctx, instr, num); |
| 584 | } |
| 585 | |
| 586 | /* flatten into shader: */ |
| 587 | // XXX this should probably be somewhere else: |
| 588 | static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) |
| 589 | { |
| 590 | struct ir3_instruction *n; |
Rob Clark | 7d7e6ae | 2014-07-25 10:56:23 -0400 | [diff] [blame^] | 591 | struct ir3 *shader = block->shader; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 592 | struct ir3_instruction *end = |
| 593 | ir3_instr_create(block, 0, OPC_END); |
| 594 | struct ir3_instruction *last_input = NULL; |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 595 | struct ir3_instruction *last_rel = NULL; |
Rob Clark | 76924e3 | 2014-02-25 08:02:28 -0500 | [diff] [blame] | 596 | regmask_t needs_ss_war; /* write after read */ |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 597 | regmask_t needs_ss; |
| 598 | regmask_t needs_sy; |
| 599 | |
Rob Clark | d73b2c0 | 2014-02-15 19:01:38 -0500 | [diff] [blame] | 600 | regmask_init(&needs_ss_war); |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 601 | regmask_init(&needs_ss); |
| 602 | regmask_init(&needs_sy); |
| 603 | |
| 604 | shader->instrs_count = 0; |
| 605 | |
| 606 | for (n = block->head; n; n = n->next) { |
Rob Clark | d73b2c0 | 2014-02-15 19:01:38 -0500 | [diff] [blame] | 607 | struct ir3_register *reg; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 608 | unsigned i; |
| 609 | |
| 610 | if (is_meta(n)) |
| 611 | continue; |
| 612 | |
| 613 | for (i = 1; i < n->regs_count; i++) { |
Rob Clark | d73b2c0 | 2014-02-15 19:01:38 -0500 | [diff] [blame] | 614 | reg = n->regs[i]; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 615 | |
Rob Clark | 579473f | 2014-02-16 07:35:20 -0500 | [diff] [blame] | 616 | if (reg_gpr(reg)) { |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 617 | |
| 618 | /* TODO: we probably only need (ss) for alu |
| 619 | * instr consuming sfu result.. need to make |
| 620 | * some tests for both this and (sy).. |
| 621 | */ |
| 622 | if (regmask_get(&needs_ss, reg)) { |
| 623 | n->flags |= IR3_INSTR_SS; |
| 624 | regmask_init(&needs_ss); |
| 625 | } |
| 626 | |
| 627 | if (regmask_get(&needs_sy, reg)) { |
| 628 | n->flags |= IR3_INSTR_SY; |
| 629 | regmask_init(&needs_sy); |
| 630 | } |
| 631 | } |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 632 | |
| 633 | /* TODO: is it valid to have address reg loaded from a |
| 634 | * relative src (ie. mova a0, c<a0.x+4>)? If so, the |
| 635 | * last_rel check below should be moved ahead of this: |
| 636 | */ |
| 637 | if (reg->flags & IR3_REG_RELATIV) |
| 638 | last_rel = n; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 639 | } |
| 640 | |
Rob Clark | d73b2c0 | 2014-02-15 19:01:38 -0500 | [diff] [blame] | 641 | if (n->regs_count > 0) { |
| 642 | reg = n->regs[0]; |
| 643 | if (regmask_get(&needs_ss_war, reg)) { |
| 644 | n->flags |= IR3_INSTR_SS; |
| 645 | regmask_init(&needs_ss_war); // ??? I assume? |
| 646 | } |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 647 | |
| 648 | if (last_rel && (reg->num == regid(REG_A0, 0))) { |
| 649 | last_rel->flags |= IR3_INSTR_UL; |
| 650 | last_rel = NULL; |
| 651 | } |
Rob Clark | d73b2c0 | 2014-02-15 19:01:38 -0500 | [diff] [blame] | 652 | } |
| 653 | |
Rob Clark | 5993723 | 2014-02-19 11:55:25 -0500 | [diff] [blame] | 654 | /* cat5+ does not have an (ss) bit, if needed we need to |
| 655 | * insert a nop to carry the sync flag. Would be kinda |
| 656 | * clever if we were aware of this during scheduling, but |
| 657 | * this should be a pretty rare case: |
| 658 | */ |
| 659 | if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) { |
| 660 | struct ir3_instruction *nop; |
| 661 | nop = ir3_instr_create(block, 0, OPC_NOP); |
| 662 | nop->flags |= IR3_INSTR_SS; |
| 663 | n->flags &= ~IR3_INSTR_SS; |
| 664 | } |
| 665 | |
| 666 | /* need to be able to set (ss) on first instruction: */ |
| 667 | if ((shader->instrs_count == 0) && (n->category >= 5)) |
| 668 | ir3_instr_create(block, 0, OPC_NOP); |
| 669 | |
Rob Clark | 9bbfae6 | 2014-02-21 18:03:30 -0500 | [diff] [blame] | 670 | if (is_nop(n) && shader->instrs_count) { |
| 671 | struct ir3_instruction *last = |
| 672 | shader->instrs[shader->instrs_count-1]; |
| 673 | if (is_nop(last) && (last->repeat < 5)) { |
| 674 | last->repeat++; |
| 675 | last->flags |= n->flags; |
| 676 | continue; |
| 677 | } |
| 678 | } |
| 679 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 680 | shader->instrs[shader->instrs_count++] = n; |
| 681 | |
| 682 | if (is_sfu(n)) |
| 683 | regmask_set(&needs_ss, n->regs[0]); |
Rob Clark | 5993723 | 2014-02-19 11:55:25 -0500 | [diff] [blame] | 684 | |
Rob Clark | ee839cc | 2014-04-08 14:14:43 -0400 | [diff] [blame] | 685 | if (is_tex(n)) { |
| 686 | /* this ends up being the # of samp instructions.. but that |
| 687 | * is ok, everything else only cares whether it is zero or |
| 688 | * not. We do this here, rather than when we encounter a |
| 689 | * SAMP decl, because (especially in binning pass shader) |
| 690 | * the samp instruction(s) could get eliminated if the |
| 691 | * result is not used. |
| 692 | */ |
| 693 | ctx->has_samp = true; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 694 | regmask_set(&needs_sy, n->regs[0]); |
Rob Clark | ee839cc | 2014-04-08 14:14:43 -0400 | [diff] [blame] | 695 | } |
Rob Clark | 5993723 | 2014-02-19 11:55:25 -0500 | [diff] [blame] | 696 | |
| 697 | /* both tex/sfu appear to not always immediately consume |
| 698 | * their src register(s): |
| 699 | */ |
| 700 | if (is_tex(n) || is_sfu(n)) { |
Rob Clark | d73b2c0 | 2014-02-15 19:01:38 -0500 | [diff] [blame] | 701 | for (i = 1; i < n->regs_count; i++) { |
| 702 | reg = n->regs[i]; |
| 703 | if (reg_gpr(reg)) |
| 704 | regmask_set(&needs_ss_war, reg); |
| 705 | } |
| 706 | } |
Rob Clark | 5993723 | 2014-02-19 11:55:25 -0500 | [diff] [blame] | 707 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 708 | if (is_input(n)) |
| 709 | last_input = n; |
| 710 | } |
| 711 | |
| 712 | if (last_input) |
| 713 | last_input->regs[0]->flags |= IR3_REG_EI; |
| 714 | |
Rob Clark | a5ac36a | 2014-07-21 15:24:30 -0400 | [diff] [blame] | 715 | if (last_rel) |
| 716 | last_rel->flags |= IR3_INSTR_UL; |
| 717 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 718 | shader->instrs[shader->instrs_count++] = end; |
| 719 | |
| 720 | shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY; |
| 721 | } |
| 722 | |
| 723 | static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block) |
| 724 | { |
| 725 | struct ir3_instruction *n; |
| 726 | |
| 727 | if (!block->parent) { |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 728 | unsigned i, j; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 729 | int base, off = output_base(ctx); |
| 730 | |
| 731 | base = alloc_block(ctx, NULL, block->noutputs + off); |
| 732 | |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 733 | if (ctx->half_precision) |
| 734 | base |= REG_HALF; |
| 735 | |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 736 | for (i = 0; i < block->noutputs; i++) |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 737 | if (block->outputs[i] && !is_kill(block->outputs[i])) |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 738 | ra_assign(ctx, block->outputs[i], base + i + off); |
| 739 | |
| 740 | if (ctx->type == SHADER_FRAGMENT) { |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 741 | i = 0; |
| 742 | if (ctx->frag_face) { |
| 743 | /* if we have frag_face, it gets hr0.x */ |
| 744 | ra_assign(ctx, block->inputs[i], REG_HALF | 0); |
| 745 | i += 4; |
| 746 | } |
| 747 | for (j = 0; i < block->ninputs; i++, j++) |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 748 | if (block->inputs[i]) |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 749 | ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j); |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 750 | } else { |
| 751 | for (i = 0; i < block->ninputs; i++) |
| 752 | if (block->inputs[i]) |
| 753 | ir3_instr_ra(ctx, block->inputs[i]); |
| 754 | } |
| 755 | } |
| 756 | |
| 757 | /* then loop over instruction list and assign registers: |
| 758 | */ |
| 759 | n = block->head; |
| 760 | while (n) { |
| 761 | ir3_instr_ra(ctx, n); |
| 762 | if (ctx->error) |
| 763 | return -1; |
| 764 | n = n->next; |
| 765 | } |
| 766 | |
| 767 | legalize(ctx, block); |
| 768 | |
| 769 | return 0; |
| 770 | } |
| 771 | |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 772 | int ir3_block_ra(struct ir3_block *block, enum shader_t type, |
Rob Clark | ee839cc | 2014-04-08 14:14:43 -0400 | [diff] [blame] | 773 | bool half_precision, bool frag_coord, bool frag_face, |
| 774 | bool *has_samp) |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 775 | { |
| 776 | struct ir3_ra_ctx ctx = { |
| 777 | .block = block, |
| 778 | .type = type, |
Rob Clark | 3f7239c | 2014-02-22 09:46:39 -0500 | [diff] [blame] | 779 | .half_precision = half_precision, |
Rob Clark | 6640457 | 2014-02-25 08:51:30 -0500 | [diff] [blame] | 780 | .frag_coord = frag_coord, |
| 781 | .frag_face = frag_face, |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 782 | }; |
Rob Clark | ee839cc | 2014-04-08 14:14:43 -0400 | [diff] [blame] | 783 | int ret; |
| 784 | |
Rob Clark | 7d7e6ae | 2014-07-25 10:56:23 -0400 | [diff] [blame^] | 785 | ir3_clear_mark(block->shader); |
Rob Clark | ee839cc | 2014-04-08 14:14:43 -0400 | [diff] [blame] | 786 | ret = block_ra(&ctx, block); |
| 787 | *has_samp = ctx.has_samp; |
| 788 | |
| 789 | return ret; |
Rob Clark | 554f1ac | 2014-01-29 17:18:49 -0500 | [diff] [blame] | 790 | } |