blob: b916dd513936f3b8a9736cd9eae909d3a775e1e7 [file] [log] [blame]
Rob Clark554f1ac2014-01-29 17:18:49 -05001/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3/*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29#include "pipe/p_shader_tokens.h"
30#include "util/u_math.h"
31
32#include "ir3.h"
33#include "ir3_visitor.h"
34
35/*
36 * Register Assignment:
37 *
38 * NOTE: currently only works on a single basic block.. need to think
39 * about how multiple basic blocks are going to get scheduled. But
40 * I think I want to re-arrange how blocks work, ie. get rid of the
41 * block nesting thing..
42 *
43 * NOTE: we could do register coalescing (eliminate moves) as part of
44 * the RA step.. OTOH I think we need to do scheduling before register
45 * assignment. And if we remove a mov that effects scheduling (unless
46 * we leave a placeholder nop, which seems lame), so I'm not really
47 * sure how practical this is to do both in a single stage. But OTOH
48 * I'm not really sure a sane way for the CP stage to realize when it
49 * cannot remove a mov due to multi-register constraints..
50 *
51 */
52
53struct ir3_ra_ctx {
54 struct ir3_block *block;
55 enum shader_t type;
Rob Clark3f7239c2014-02-22 09:46:39 -050056 bool half_precision;
Rob Clark66404572014-02-25 08:51:30 -050057 bool frag_coord;
58 bool frag_face;
Rob Clarkee839cc2014-04-08 14:14:43 -040059 bool has_samp;
Rob Clark554f1ac2014-01-29 17:18:49 -050060 int cnt;
61 bool error;
62};
63
Rob Clark3f7239c2014-02-22 09:46:39 -050064/* sorta ugly way to retrofit half-precision support.. rather than
65 * passing extra param around, just OR in a high bit. All the low
66 * value arithmetic (ie. +/- offset within a contiguous vec4, etc)
67 * will continue to work as long as you don't underflow (and that
68 * would go badly anyways).
69 */
70#define REG_HALF 0x8000
71
Rob Clark554f1ac2014-01-29 17:18:49 -050072struct ir3_ra_assignment {
73 int8_t off; /* offset of instruction dst within range */
74 uint8_t num; /* number of components for the range */
75};
76
77static void ra_assign(struct ir3_ra_ctx *ctx,
78 struct ir3_instruction *assigner, int num);
79static struct ir3_ra_assignment ra_calc(struct ir3_instruction *instr);
80
81/*
82 * Register Allocation:
83 */
84
Rob Clarka5ac36a2014-07-21 15:24:30 -040085#define REG(n, wm, f) (struct ir3_register){ \
86 .flags = (f), \
Rob Clark554f1ac2014-01-29 17:18:49 -050087 .num = (n), \
88 .wrmask = TGSI_WRITEMASK_ ## wm, \
89 }
90
91/* check that the register exists, is a GPR and is not special (a0/p0) */
92static struct ir3_register * reg_check(struct ir3_instruction *instr, unsigned n)
93{
94 if ((n < instr->regs_count) && reg_gpr(instr->regs[n]))
95 return instr->regs[n];
96 return NULL;
97}
98
99static int output_base(struct ir3_ra_ctx *ctx)
100{
101 /* ugg, for fragment shader we need to have input at r0.x
102 * (or at least if there is a way to configure it, I can't
103 * see how because the blob driver always uses r0.x (ie.
104 * all zeros)
105 */
Rob Clark66404572014-02-25 08:51:30 -0500106 if (ctx->type == SHADER_FRAGMENT) {
107 if (ctx->half_precision)
Rob Clark83808a92014-03-29 14:32:38 -0400108 return ctx->frag_face ? 4 : 3;
109 return ctx->frag_coord ? 8 : 4;
Rob Clark66404572014-02-25 08:51:30 -0500110 }
Rob Clark554f1ac2014-01-29 17:18:49 -0500111 return 0;
112}
113
114/* live means read before written */
115static void compute_liveregs(struct ir3_ra_ctx *ctx,
116 struct ir3_instruction *instr, regmask_t *liveregs)
117{
118 struct ir3_block *block = instr->block;
119 regmask_t written;
120 unsigned i, j;
121
122 regmask_init(liveregs);
123 regmask_init(&written);
124
125 for (instr = instr->next; instr; instr = instr->next) {
126 struct ir3_register *r;
127
128 if (is_meta(instr))
129 continue;
130
131 /* check first src's read: */
132 for (j = 1; j < instr->regs_count; j++) {
133 r = reg_check(instr, j);
134 if (r)
135 regmask_set_if_not(liveregs, r, &written);
136 }
137
138 /* then dst written (if assigned already): */
139 if (instr->flags & IR3_INSTR_MARK) {
140 r = reg_check(instr, 0);
141 if (r)
142 regmask_set(&written, r);
143 }
144 }
145
146 /* be sure to account for output registers too: */
147 for (i = 0; i < block->noutputs; i++) {
Rob Clarka5ac36a2014-07-21 15:24:30 -0400148 struct ir3_register reg = REG(output_base(ctx) + i, X, 0);
Rob Clark554f1ac2014-01-29 17:18:49 -0500149 regmask_set_if_not(liveregs, &reg, &written);
150 }
151}
152
153/* calculate registers that are clobbered before last use of 'assigner'.
154 * This needs to be done backwards, although it could possibly be
155 * combined into compute_liveregs(). (Ie. compute_liveregs() could
156 * reverse the list, then do this part backwards reversing the list
157 * again back to original order.) Otoh, probably I should try to
158 * construct a proper interference graph instead.
159 *
160 * XXX this need to follow the same recursion path that is used for
161 * to rename/assign registers (ie. ra_assign_src()).. this is a bit
162 * ugly right now, maybe refactor into node iterator sort of things
163 * that iterates nodes in the correct order?
164 */
165static bool compute_clobbers(struct ir3_ra_ctx *ctx,
166 struct ir3_instruction *instr, struct ir3_instruction *assigner,
167 regmask_t *liveregs)
168{
169 unsigned i;
170 bool live = false, was_live = false;
171
172 if (instr == NULL) {
173 struct ir3_block *block = ctx->block;
174
175 /* if at the end, check outputs: */
176 for (i = 0; i < block->noutputs; i++)
177 if (block->outputs[i] == assigner)
178 return true;
179 return false;
180 }
181
182 for (i = 1; i < instr->regs_count; i++) {
183 struct ir3_register *reg = instr->regs[i];
184 if ((reg->flags & IR3_REG_SSA) && (reg->instr == assigner)) {
185 if (is_meta(instr)) {
186 switch (instr->opc) {
187 case OPC_META_INPUT:
188 // TODO
189 assert(0);
190 break;
191 case OPC_META_FO:
192 case OPC_META_FI:
193 was_live |= compute_clobbers(ctx, instr->next,
194 instr, liveregs);
195 break;
196 default:
197 break;
198 }
199 }
200 live = true;
201 break;
202 }
203 }
204
205 was_live |= compute_clobbers(ctx, instr->next, assigner, liveregs);
206
207 if (was_live && (instr->regs_count > 0) &&
208 (instr->flags & IR3_INSTR_MARK) &&
209 !is_meta(instr))
210 regmask_set(liveregs, instr->regs[0]);
211
212 return live || was_live;
213}
214
Rob Clarka5ac36a2014-07-21 15:24:30 -0400215static int find_available(regmask_t *liveregs, int size, bool half)
Rob Clark554f1ac2014-01-29 17:18:49 -0500216{
217 unsigned i;
Rob Clarka5ac36a2014-07-21 15:24:30 -0400218 unsigned f = half ? IR3_REG_HALF : 0;
Rob Clark554f1ac2014-01-29 17:18:49 -0500219 for (i = 0; i < MAX_REG - size; i++) {
Rob Clarka5ac36a2014-07-21 15:24:30 -0400220 if (!regmask_get(liveregs, &REG(i, X, f))) {
Rob Clark554f1ac2014-01-29 17:18:49 -0500221 unsigned start = i++;
222 for (; (i < MAX_REG) && ((i - start) < size); i++)
Rob Clarka5ac36a2014-07-21 15:24:30 -0400223 if (regmask_get(liveregs, &REG(i, X, f)))
Rob Clark554f1ac2014-01-29 17:18:49 -0500224 break;
225 if ((i - start) >= size)
226 return start;
227 }
228 }
229 assert(0);
230 return -1;
231}
232
233static int alloc_block(struct ir3_ra_ctx *ctx,
234 struct ir3_instruction *instr, int size)
235{
236 if (!instr) {
237 /* special case, allocating shader outputs. At this
238 * point, nothing is allocated, just start the shader
239 * outputs at r0.x and let compute_liveregs() take
240 * care of the rest from here:
241 */
242 return 0;
243 } else {
Rob Clarka5ac36a2014-07-21 15:24:30 -0400244 struct ir3_register *dst = instr->regs[0];
Rob Clark554f1ac2014-01-29 17:18:49 -0500245 regmask_t liveregs;
Rob Clarka5ac36a2014-07-21 15:24:30 -0400246
Rob Clark554f1ac2014-01-29 17:18:49 -0500247 compute_liveregs(ctx, instr, &liveregs);
248
249 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
250 // XXX hack.. maybe ra_calc should give us a list of
251 // instrs to compute_clobbers() on?
252 if (is_meta(instr) && (instr->opc == OPC_META_INPUT) &&
253 (instr->regs_count == 1)) {
254 unsigned i, base = instr->regs[0]->num & ~0x3;
255 for (i = 0; i < 4; i++) {
256 struct ir3_instruction *in = ctx->block->inputs[base + i];
257 if (in)
258 compute_clobbers(ctx, in->next, in, &liveregs);
259 }
260 } else
261 // XXX XXX XXX XXX XXX XXX XXX XXX XXX
262 compute_clobbers(ctx, instr->next, instr, &liveregs);
Rob Clarka5ac36a2014-07-21 15:24:30 -0400263
264 return find_available(&liveregs, size,
265 !!(dst->flags & IR3_REG_HALF));
Rob Clark554f1ac2014-01-29 17:18:49 -0500266 }
267}
268
269/*
270 * Constraint Calculation:
271 */
272
273struct ra_calc_visitor {
274 struct ir3_visitor base;
275 struct ir3_ra_assignment a;
276};
277
278static inline struct ra_calc_visitor *ra_calc_visitor(struct ir3_visitor *v)
279{
280 return (struct ra_calc_visitor *)v;
281}
282
283/* calculate register assignment for the instruction. If the register
284 * written by this instruction is required to be part of a range, to
285 * handle other (input/output/sam/bary.f/etc) contiguous register range
286 * constraints, that is calculated handled here.
287 */
288static void ra_calc_dst(struct ir3_visitor *v,
289 struct ir3_instruction *instr, struct ir3_register *reg)
290{
291 struct ra_calc_visitor *c = ra_calc_visitor(v);
292 if (is_tex(instr)) {
293 c->a.off = 0;
294 c->a.num = 4;
295 } else {
296 c->a.off = 0;
297 c->a.num = 1;
298 }
299}
300
301static void
302ra_calc_dst_shader_input(struct ir3_visitor *v,
303 struct ir3_instruction *instr, struct ir3_register *reg)
304{
305 struct ra_calc_visitor *c = ra_calc_visitor(v);
306 struct ir3_block *block = instr->block;
307 struct ir3_register *dst = instr->regs[0];
308 unsigned base = dst->num & ~0x3;
309 unsigned i, num = 0;
310
311 assert(!(dst->flags & IR3_REG_IA));
312
313 /* check what input components we need: */
314 for (i = 0; i < 4; i++) {
315 unsigned idx = base + i;
316 if ((idx < block->ninputs) && block->inputs[idx])
317 num = i + 1;
318 }
319
320 c->a.off = dst->num - base;
321 c->a.num = num;
322}
323
324static void ra_calc_src_fanin(struct ir3_visitor *v,
325 struct ir3_instruction *instr, struct ir3_register *reg)
326{
327 struct ra_calc_visitor *c = ra_calc_visitor(v);
328 unsigned srcn = ir3_instr_regno(instr, reg) - 1;
Rob Clarke8cca572014-02-16 07:41:59 -0500329 c->a.off += srcn;
Rob Clark554f1ac2014-01-29 17:18:49 -0500330 c->a.num += srcn;
331 c->a.num = MAX2(c->a.num, instr->regs_count - 1);
332}
333
334static const struct ir3_visitor_funcs calc_visitor_funcs = {
335 .instr = ir3_visit_instr,
336 .dst_shader_input = ra_calc_dst_shader_input,
337 .dst_fanout = ra_calc_dst,
338 .dst_fanin = ra_calc_dst,
339 .dst = ra_calc_dst,
340 .src_fanout = ir3_visit_reg,
341 .src_fanin = ra_calc_src_fanin,
342 .src = ir3_visit_reg,
343};
344
345static struct ir3_ra_assignment ra_calc(struct ir3_instruction *assigner)
346{
347 struct ra_calc_visitor v = {
348 .base.funcs = &calc_visitor_funcs,
349 };
350
351 ir3_visit_instr(&v.base, assigner);
352
353 return v.a;
354}
355
356/*
357 * Register Assignment:
358 */
359
360struct ra_assign_visitor {
361 struct ir3_visitor base;
362 struct ir3_ra_ctx *ctx;
363 int num;
364};
365
366static inline struct ra_assign_visitor *ra_assign_visitor(struct ir3_visitor *v)
367{
368 return (struct ra_assign_visitor *)v;
369}
370
Rob Clark3f7239c2014-02-22 09:46:39 -0500371static type_t half_type(type_t type)
372{
373 switch (type) {
374 case TYPE_F32: return TYPE_F16;
375 case TYPE_U32: return TYPE_U16;
376 case TYPE_S32: return TYPE_S16;
377 /* instructions may already be fixed up: */
378 case TYPE_F16:
379 case TYPE_U16:
380 case TYPE_S16:
381 return type;
382 default:
383 assert(0);
384 return ~0;
385 }
386}
387
388/* some instructions need fix-up if dst register is half precision: */
389static void fixup_half_instr_dst(struct ir3_instruction *instr)
390{
391 switch (instr->category) {
392 case 1: /* move instructions */
393 instr->cat1.dst_type = half_type(instr->cat1.dst_type);
394 break;
395 case 3:
396 switch (instr->opc) {
397 case OPC_MAD_F32:
398 instr->opc = OPC_MAD_F16;
399 break;
400 case OPC_SEL_B32:
401 instr->opc = OPC_SEL_B16;
402 break;
403 case OPC_SEL_S32:
404 instr->opc = OPC_SEL_S16;
405 break;
406 case OPC_SEL_F32:
407 instr->opc = OPC_SEL_F16;
408 break;
409 case OPC_SAD_S32:
410 instr->opc = OPC_SAD_S16;
411 break;
412 /* instructions may already be fixed up: */
413 case OPC_MAD_F16:
414 case OPC_SEL_B16:
415 case OPC_SEL_S16:
416 case OPC_SEL_F16:
417 case OPC_SAD_S16:
418 break;
419 default:
420 assert(0);
421 break;
422 }
423 break;
424 case 5:
425 instr->cat5.type = half_type(instr->cat5.type);
426 break;
427 }
428}
429/* some instructions need fix-up if src register is half precision: */
430static void fixup_half_instr_src(struct ir3_instruction *instr)
431{
432 switch (instr->category) {
433 case 1: /* move instructions */
434 instr->cat1.src_type = half_type(instr->cat1.src_type);
435 break;
436 }
437}
438
Rob Clark554f1ac2014-01-29 17:18:49 -0500439static void ra_assign_reg(struct ir3_visitor *v,
440 struct ir3_instruction *instr, struct ir3_register *reg)
441{
442 struct ra_assign_visitor *a = ra_assign_visitor(v);
Rob Clark66404572014-02-25 08:51:30 -0500443
444 if (is_flow(instr) && (instr->opc == OPC_KILL))
445 return;
446
Rob Clark554f1ac2014-01-29 17:18:49 -0500447 reg->flags &= ~IR3_REG_SSA;
Rob Clark3f7239c2014-02-22 09:46:39 -0500448 reg->num = a->num & ~REG_HALF;
Rob Clarkae5efaf2014-03-29 11:42:01 -0400449
450 assert(reg->num >= 0);
451
Rob Clark3f7239c2014-02-22 09:46:39 -0500452 if (a->num & REG_HALF) {
453 reg->flags |= IR3_REG_HALF;
454 /* if dst reg being assigned, patch up the instr: */
455 if (reg == instr->regs[0])
456 fixup_half_instr_dst(instr);
457 else
458 fixup_half_instr_src(instr);
459 }
Rob Clark554f1ac2014-01-29 17:18:49 -0500460}
461
462static void ra_assign_dst_shader_input(struct ir3_visitor *v,
463 struct ir3_instruction *instr, struct ir3_register *reg)
464{
465 struct ra_assign_visitor *a = ra_assign_visitor(v);
466 unsigned i, base = reg->num & ~0x3;
467 int off = base - reg->num;
468
469 ra_assign_reg(v, instr, reg);
470 reg->flags |= IR3_REG_IA;
471
472 /* trigger assignment of all our companion input components: */
473 for (i = 0; i < 4; i++) {
474 struct ir3_instruction *in = instr->block->inputs[i+base];
475 if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
476 ra_assign(a->ctx, in, a->num + off + i);
477 }
478}
479
480static void ra_assign_dst_fanout(struct ir3_visitor *v,
481 struct ir3_instruction *instr, struct ir3_register *reg)
482{
483 struct ra_assign_visitor *a = ra_assign_visitor(v);
484 struct ir3_register *src = instr->regs[1];
485 ra_assign_reg(v, instr, reg);
486 if (src->flags & IR3_REG_SSA)
487 ra_assign(a->ctx, src->instr, a->num - instr->fo.off);
488}
489
490static void ra_assign_src_fanout(struct ir3_visitor *v,
491 struct ir3_instruction *instr, struct ir3_register *reg)
492{
493 struct ra_assign_visitor *a = ra_assign_visitor(v);
494 ra_assign_reg(v, instr, reg);
495 ra_assign(a->ctx, instr, a->num + instr->fo.off);
496}
497
498
499static void ra_assign_src_fanin(struct ir3_visitor *v,
500 struct ir3_instruction *instr, struct ir3_register *reg)
501{
502 struct ra_assign_visitor *a = ra_assign_visitor(v);
503 unsigned j, srcn = ir3_instr_regno(instr, reg) - 1;
504 ra_assign_reg(v, instr, reg);
505 ra_assign(a->ctx, instr, a->num - srcn);
506 for (j = 1; j < instr->regs_count; j++) {
507 struct ir3_register *reg = instr->regs[j];
508 if (reg->flags & IR3_REG_SSA) /* could be renamed already */
509 ra_assign(a->ctx, reg->instr, a->num - srcn + j - 1);
510 }
511}
512
513static const struct ir3_visitor_funcs assign_visitor_funcs = {
514 .instr = ir3_visit_instr,
515 .dst_shader_input = ra_assign_dst_shader_input,
516 .dst_fanout = ra_assign_dst_fanout,
517 .dst_fanin = ra_assign_reg,
518 .dst = ra_assign_reg,
519 .src_fanout = ra_assign_src_fanout,
520 .src_fanin = ra_assign_src_fanin,
521 .src = ra_assign_reg,
522};
523
524static void ra_assign(struct ir3_ra_ctx *ctx,
525 struct ir3_instruction *assigner, int num)
526{
527 struct ra_assign_visitor v = {
528 .base.funcs = &assign_visitor_funcs,
529 .ctx = ctx,
530 .num = num,
531 };
532
533 /* if we've already visited this instruction, bail now: */
534 if (ir3_instr_check_mark(assigner)) {
Rob Clark3f7239c2014-02-22 09:46:39 -0500535 debug_assert(assigner->regs[0]->num == (num & ~REG_HALF));
536 if (assigner->regs[0]->num != (num & ~REG_HALF)) {
Rob Clark554f1ac2014-01-29 17:18:49 -0500537 /* impossible situation, should have been resolved
538 * at an earlier stage by inserting extra mov's:
539 */
540 ctx->error = true;
541 }
542 return;
543 }
544
545 ir3_visit_instr(&v.base, assigner);
546}
547
548/*
549 *
550 */
551
552static void ir3_instr_ra(struct ir3_ra_ctx *ctx,
553 struct ir3_instruction *instr)
554{
Rob Clarka5ac36a2014-07-21 15:24:30 -0400555 struct ir3_register *dst;
Rob Clark554f1ac2014-01-29 17:18:49 -0500556 unsigned num;
557
558 /* skip over nop's */
559 if (instr->regs_count == 0)
560 return;
561
Rob Clarka5ac36a2014-07-21 15:24:30 -0400562 dst = instr->regs[0];
Rob Clark579473f2014-02-16 07:35:20 -0500563
Rob Clark554f1ac2014-01-29 17:18:49 -0500564 /* if we've already visited this instruction, bail now: */
565 if (instr->flags & IR3_INSTR_MARK)
566 return;
567
568 /* allocate register(s): */
Rob Clark9f391322014-07-25 09:49:41 -0400569 if (is_addr(instr)) {
Rob Clarka5ac36a2014-07-21 15:24:30 -0400570 num = instr->regs[2]->num;
571 } else if (reg_gpr(dst)) {
572 struct ir3_ra_assignment a;
573 a = ra_calc(instr);
574 num = alloc_block(ctx, instr, a.num) + a.off;
575 } else if (dst->flags & IR3_REG_ADDR) {
576 dst->flags &= ~IR3_REG_ADDR;
577 num = regid(REG_A0, 0) | REG_HALF;
578 } else {
Rob Clark2f181bc2014-07-23 15:08:40 -0400579 /* predicate register (p0).. etc */
580 return;
Rob Clarka5ac36a2014-07-21 15:24:30 -0400581 }
Rob Clark554f1ac2014-01-29 17:18:49 -0500582
583 ra_assign(ctx, instr, num);
584}
585
586/* flatten into shader: */
587// XXX this should probably be somewhere else:
588static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
589{
590 struct ir3_instruction *n;
Rob Clark7d7e6ae2014-07-25 10:56:23 -0400591 struct ir3 *shader = block->shader;
Rob Clark554f1ac2014-01-29 17:18:49 -0500592 struct ir3_instruction *end =
593 ir3_instr_create(block, 0, OPC_END);
594 struct ir3_instruction *last_input = NULL;
Rob Clarka5ac36a2014-07-21 15:24:30 -0400595 struct ir3_instruction *last_rel = NULL;
Rob Clark76924e32014-02-25 08:02:28 -0500596 regmask_t needs_ss_war; /* write after read */
Rob Clark554f1ac2014-01-29 17:18:49 -0500597 regmask_t needs_ss;
598 regmask_t needs_sy;
599
Rob Clarkd73b2c02014-02-15 19:01:38 -0500600 regmask_init(&needs_ss_war);
Rob Clark554f1ac2014-01-29 17:18:49 -0500601 regmask_init(&needs_ss);
602 regmask_init(&needs_sy);
603
604 shader->instrs_count = 0;
605
606 for (n = block->head; n; n = n->next) {
Rob Clarkd73b2c02014-02-15 19:01:38 -0500607 struct ir3_register *reg;
Rob Clark554f1ac2014-01-29 17:18:49 -0500608 unsigned i;
609
610 if (is_meta(n))
611 continue;
612
613 for (i = 1; i < n->regs_count; i++) {
Rob Clarkd73b2c02014-02-15 19:01:38 -0500614 reg = n->regs[i];
Rob Clark554f1ac2014-01-29 17:18:49 -0500615
Rob Clark579473f2014-02-16 07:35:20 -0500616 if (reg_gpr(reg)) {
Rob Clark554f1ac2014-01-29 17:18:49 -0500617
618 /* TODO: we probably only need (ss) for alu
619 * instr consuming sfu result.. need to make
620 * some tests for both this and (sy)..
621 */
622 if (regmask_get(&needs_ss, reg)) {
623 n->flags |= IR3_INSTR_SS;
624 regmask_init(&needs_ss);
625 }
626
627 if (regmask_get(&needs_sy, reg)) {
628 n->flags |= IR3_INSTR_SY;
629 regmask_init(&needs_sy);
630 }
631 }
Rob Clarka5ac36a2014-07-21 15:24:30 -0400632
633 /* TODO: is it valid to have address reg loaded from a
634 * relative src (ie. mova a0, c<a0.x+4>)? If so, the
635 * last_rel check below should be moved ahead of this:
636 */
637 if (reg->flags & IR3_REG_RELATIV)
638 last_rel = n;
Rob Clark554f1ac2014-01-29 17:18:49 -0500639 }
640
Rob Clarkd73b2c02014-02-15 19:01:38 -0500641 if (n->regs_count > 0) {
642 reg = n->regs[0];
643 if (regmask_get(&needs_ss_war, reg)) {
644 n->flags |= IR3_INSTR_SS;
645 regmask_init(&needs_ss_war); // ??? I assume?
646 }
Rob Clarka5ac36a2014-07-21 15:24:30 -0400647
648 if (last_rel && (reg->num == regid(REG_A0, 0))) {
649 last_rel->flags |= IR3_INSTR_UL;
650 last_rel = NULL;
651 }
Rob Clarkd73b2c02014-02-15 19:01:38 -0500652 }
653
Rob Clark59937232014-02-19 11:55:25 -0500654 /* cat5+ does not have an (ss) bit, if needed we need to
655 * insert a nop to carry the sync flag. Would be kinda
656 * clever if we were aware of this during scheduling, but
657 * this should be a pretty rare case:
658 */
659 if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
660 struct ir3_instruction *nop;
661 nop = ir3_instr_create(block, 0, OPC_NOP);
662 nop->flags |= IR3_INSTR_SS;
663 n->flags &= ~IR3_INSTR_SS;
664 }
665
666 /* need to be able to set (ss) on first instruction: */
667 if ((shader->instrs_count == 0) && (n->category >= 5))
668 ir3_instr_create(block, 0, OPC_NOP);
669
Rob Clark9bbfae62014-02-21 18:03:30 -0500670 if (is_nop(n) && shader->instrs_count) {
671 struct ir3_instruction *last =
672 shader->instrs[shader->instrs_count-1];
673 if (is_nop(last) && (last->repeat < 5)) {
674 last->repeat++;
675 last->flags |= n->flags;
676 continue;
677 }
678 }
679
Rob Clark554f1ac2014-01-29 17:18:49 -0500680 shader->instrs[shader->instrs_count++] = n;
681
682 if (is_sfu(n))
683 regmask_set(&needs_ss, n->regs[0]);
Rob Clark59937232014-02-19 11:55:25 -0500684
Rob Clarkee839cc2014-04-08 14:14:43 -0400685 if (is_tex(n)) {
686 /* this ends up being the # of samp instructions.. but that
687 * is ok, everything else only cares whether it is zero or
688 * not. We do this here, rather than when we encounter a
689 * SAMP decl, because (especially in binning pass shader)
690 * the samp instruction(s) could get eliminated if the
691 * result is not used.
692 */
693 ctx->has_samp = true;
Rob Clark554f1ac2014-01-29 17:18:49 -0500694 regmask_set(&needs_sy, n->regs[0]);
Rob Clarkee839cc2014-04-08 14:14:43 -0400695 }
Rob Clark59937232014-02-19 11:55:25 -0500696
697 /* both tex/sfu appear to not always immediately consume
698 * their src register(s):
699 */
700 if (is_tex(n) || is_sfu(n)) {
Rob Clarkd73b2c02014-02-15 19:01:38 -0500701 for (i = 1; i < n->regs_count; i++) {
702 reg = n->regs[i];
703 if (reg_gpr(reg))
704 regmask_set(&needs_ss_war, reg);
705 }
706 }
Rob Clark59937232014-02-19 11:55:25 -0500707
Rob Clark554f1ac2014-01-29 17:18:49 -0500708 if (is_input(n))
709 last_input = n;
710 }
711
712 if (last_input)
713 last_input->regs[0]->flags |= IR3_REG_EI;
714
Rob Clarka5ac36a2014-07-21 15:24:30 -0400715 if (last_rel)
716 last_rel->flags |= IR3_INSTR_UL;
717
Rob Clark554f1ac2014-01-29 17:18:49 -0500718 shader->instrs[shader->instrs_count++] = end;
719
720 shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
721}
722
723static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
724{
725 struct ir3_instruction *n;
726
727 if (!block->parent) {
Rob Clark66404572014-02-25 08:51:30 -0500728 unsigned i, j;
Rob Clark554f1ac2014-01-29 17:18:49 -0500729 int base, off = output_base(ctx);
730
731 base = alloc_block(ctx, NULL, block->noutputs + off);
732
Rob Clark3f7239c2014-02-22 09:46:39 -0500733 if (ctx->half_precision)
734 base |= REG_HALF;
735
Rob Clark554f1ac2014-01-29 17:18:49 -0500736 for (i = 0; i < block->noutputs; i++)
Rob Clark66404572014-02-25 08:51:30 -0500737 if (block->outputs[i] && !is_kill(block->outputs[i]))
Rob Clark554f1ac2014-01-29 17:18:49 -0500738 ra_assign(ctx, block->outputs[i], base + i + off);
739
740 if (ctx->type == SHADER_FRAGMENT) {
Rob Clark66404572014-02-25 08:51:30 -0500741 i = 0;
742 if (ctx->frag_face) {
743 /* if we have frag_face, it gets hr0.x */
744 ra_assign(ctx, block->inputs[i], REG_HALF | 0);
745 i += 4;
746 }
747 for (j = 0; i < block->ninputs; i++, j++)
Rob Clark554f1ac2014-01-29 17:18:49 -0500748 if (block->inputs[i])
Rob Clark66404572014-02-25 08:51:30 -0500749 ra_assign(ctx, block->inputs[i], (base & ~REG_HALF) + j);
Rob Clark554f1ac2014-01-29 17:18:49 -0500750 } else {
751 for (i = 0; i < block->ninputs; i++)
752 if (block->inputs[i])
753 ir3_instr_ra(ctx, block->inputs[i]);
754 }
755 }
756
757 /* then loop over instruction list and assign registers:
758 */
759 n = block->head;
760 while (n) {
761 ir3_instr_ra(ctx, n);
762 if (ctx->error)
763 return -1;
764 n = n->next;
765 }
766
767 legalize(ctx, block);
768
769 return 0;
770}
771
Rob Clark3f7239c2014-02-22 09:46:39 -0500772int ir3_block_ra(struct ir3_block *block, enum shader_t type,
Rob Clarkee839cc2014-04-08 14:14:43 -0400773 bool half_precision, bool frag_coord, bool frag_face,
774 bool *has_samp)
Rob Clark554f1ac2014-01-29 17:18:49 -0500775{
776 struct ir3_ra_ctx ctx = {
777 .block = block,
778 .type = type,
Rob Clark3f7239c2014-02-22 09:46:39 -0500779 .half_precision = half_precision,
Rob Clark66404572014-02-25 08:51:30 -0500780 .frag_coord = frag_coord,
781 .frag_face = frag_face,
Rob Clark554f1ac2014-01-29 17:18:49 -0500782 };
Rob Clarkee839cc2014-04-08 14:14:43 -0400783 int ret;
784
Rob Clark7d7e6ae2014-07-25 10:56:23 -0400785 ir3_clear_mark(block->shader);
Rob Clarkee839cc2014-04-08 14:14:43 -0400786 ret = block_ra(&ctx, block);
787 *has_samp = ctx.has_samp;
788
789 return ret;
Rob Clark554f1ac2014-01-29 17:18:49 -0500790}