blob: d8cbb0b3edeb66a45326f339eb2840b91697ea92 [file] [log] [blame]
Boris Brezillona72bab12020-03-05 09:30:58 +01001/*
2 * Copyright (C) 2018 Alyssa Rosenzweig
3 * Copyright (C) 2020 Collabora Ltd.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
Boris Brezillon0d75eb02020-03-06 09:59:56 +010025#include "util/macros.h"
Boris Brezillon836686d2020-03-06 09:45:31 +010026#include "util/u_prim.h"
Boris Brezillon5d9995e2020-03-06 08:02:14 +010027#include "util/u_vbuf.h"
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -040028#include "util/u_helpers.h"
Boris Brezillon0d75eb02020-03-06 09:59:56 +010029
30#include "panfrost-quirks.h"
31
Alyssa Rosenzweigc8d848b2020-07-07 16:24:41 -040032#include "pan_pool.h"
Boris Brezillon0b735a22020-03-05 09:46:42 +010033#include "pan_bo.h"
Boris Brezillona72bab12020-03-05 09:30:58 +010034#include "pan_cmdstream.h"
35#include "pan_context.h"
36#include "pan_job.h"
37
Alyssa Rosenzweig02a638c2020-03-23 19:10:06 -040038/* If a BO is accessed for a particular shader stage, will it be in the primary
39 * batch (vertex/tiler) or the secondary batch (fragment)? Anything but
40 * fragment will be primary, e.g. compute jobs will be considered
41 * "vertex/tiler" by analogy */
42
43static inline uint32_t
44panfrost_bo_access_for_stage(enum pipe_shader_type stage)
45{
46 assert(stage == PIPE_SHADER_FRAGMENT ||
47 stage == PIPE_SHADER_VERTEX ||
48 stage == PIPE_SHADER_COMPUTE);
49
50 return stage == PIPE_SHADER_FRAGMENT ?
51 PAN_BO_ACCESS_FRAGMENT :
52 PAN_BO_ACCESS_VERTEX_TILER;
53}
54
Alyssa Rosenzweig136fd5c2020-08-25 12:52:45 -040055mali_ptr
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -040056panfrost_vt_emit_shared_memory(struct panfrost_batch *batch)
Tomeu Vizoso7b10d4e2020-04-08 10:55:28 +020057{
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -040058 struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
Tomeu Vizoso7b10d4e2020-04-08 10:55:28 +020059
Boris Brezillon3a06fc32020-09-03 09:18:09 +020060 struct panfrost_transfer t =
61 panfrost_pool_alloc_aligned(&batch->pool,
62 MALI_LOCAL_STORAGE_LENGTH,
63 64);
Alyssa Rosenzweigb41692c2020-08-17 12:30:49 -040064
Boris Brezillon3a06fc32020-09-03 09:18:09 +020065 pan_pack(t.cpu, LOCAL_STORAGE, ls) {
66 ls.wls_instances = MALI_LOCAL_STORAGE_NO_WORKGROUP_MEM;
67 if (batch->stack_size) {
68 struct panfrost_bo *stack =
69 panfrost_batch_get_scratchpad(batch, batch->stack_size,
70 dev->thread_tls_alloc,
71 dev->core_count);
Alyssa Rosenzweigb41692c2020-08-17 12:30:49 -040072
Boris Brezillon3a06fc32020-09-03 09:18:09 +020073 ls.tls_size = panfrost_get_stack_shift(batch->stack_size);
74 ls.tls_base_pointer = stack->gpu;
75 }
Alyssa Rosenzweigb41692c2020-08-17 12:30:49 -040076 }
77
Boris Brezillon3a06fc32020-09-03 09:18:09 +020078 return t.gpu;
Boris Brezillon0d75eb02020-03-06 09:59:56 +010079}
80
Boris Brezillon5d9995e2020-03-06 08:02:14 +010081/* Gets a GPU address for the associated index buffer. Only gauranteed to be
82 * good for the duration of the draw (transient), could last longer. Also get
83 * the bounds on the index buffer for the range accessed by the draw. We do
84 * these operations together because there are natural optimizations which
85 * require them to be together. */
86
Alyssa Rosenzweig3a4d9302020-08-25 13:25:29 -040087mali_ptr
Boris Brezillon5d9995e2020-03-06 08:02:14 +010088panfrost_get_index_buffer_bounded(struct panfrost_context *ctx,
89 const struct pipe_draw_info *info,
90 unsigned *min_index, unsigned *max_index)
91{
92 struct panfrost_resource *rsrc = pan_resource(info->index.resource);
93 struct panfrost_batch *batch = panfrost_get_batch_for_fbo(ctx);
94 off_t offset = info->start * info->index_size;
95 bool needs_indices = true;
96 mali_ptr out = 0;
97
98 if (info->max_index != ~0u) {
99 *min_index = info->min_index;
100 *max_index = info->max_index;
101 needs_indices = false;
102 }
103
104 if (!info->has_user_indices) {
105 /* Only resources can be directly mapped */
106 panfrost_batch_add_bo(batch, rsrc->bo,
107 PAN_BO_ACCESS_SHARED |
108 PAN_BO_ACCESS_READ |
109 PAN_BO_ACCESS_VERTEX_TILER);
110 out = rsrc->bo->gpu + offset;
111
112 /* Check the cache */
113 needs_indices = !panfrost_minmax_cache_get(rsrc->index_cache,
114 info->start,
115 info->count,
116 min_index,
117 max_index);
118 } else {
119 /* Otherwise, we need to upload to transient memory */
120 const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -0400121 struct panfrost_transfer T =
122 panfrost_pool_alloc_aligned(&batch->pool,
123 info->count * info->index_size,
124 info->index_size);
125
126 memcpy(T.cpu, ibuf8 + offset, info->count * info->index_size);
127 out = T.gpu;
Boris Brezillon5d9995e2020-03-06 08:02:14 +0100128 }
129
130 if (needs_indices) {
131 /* Fallback */
132 u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
133
134 if (!info->has_user_indices)
135 panfrost_minmax_cache_add(rsrc->index_cache,
136 info->start, info->count,
137 *min_index, *max_index);
138 }
139
140 return out;
141}
142
Boris Brezillon2b946a12020-03-05 16:26:56 +0100143static unsigned
Boris Brezillon2b946a12020-03-05 16:26:56 +0100144translate_tex_wrap(enum pipe_tex_wrap w)
145{
146 switch (w) {
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400147 case PIPE_TEX_WRAP_REPEAT: return MALI_WRAP_MODE_REPEAT;
148 case PIPE_TEX_WRAP_CLAMP: return MALI_WRAP_MODE_CLAMP;
149 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE;
150 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER;
151 case PIPE_TEX_WRAP_MIRROR_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT;
152 case PIPE_TEX_WRAP_MIRROR_CLAMP: return MALI_WRAP_MODE_MIRRORED_CLAMP;
153 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE;
154 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_BORDER;
155 default: unreachable("Invalid wrap");
156 }
157}
Boris Brezillon2b946a12020-03-05 16:26:56 +0100158
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400159/* The hardware compares in the wrong order order, so we have to flip before
160 * encoding. Yes, really. */
Boris Brezillon2b946a12020-03-05 16:26:56 +0100161
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400162static enum mali_func
163panfrost_sampler_compare_func(const struct pipe_sampler_state *cso)
164{
165 if (!cso->compare_mode)
166 return MALI_FUNC_NEVER;
Boris Brezillon2b946a12020-03-05 16:26:56 +0100167
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400168 enum mali_func f = panfrost_translate_compare_func(cso->compare_func);
169 return panfrost_flip_compare_func(f);
170}
Boris Brezillon2b946a12020-03-05 16:26:56 +0100171
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400172static enum mali_mipmap_mode
173pan_pipe_to_mipmode(enum pipe_tex_mipfilter f)
174{
175 switch (f) {
176 case PIPE_TEX_MIPFILTER_NEAREST: return MALI_MIPMAP_MODE_NEAREST;
177 case PIPE_TEX_MIPFILTER_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR;
178 case PIPE_TEX_MIPFILTER_NONE: return MALI_MIPMAP_MODE_NONE;
179 default: unreachable("Invalid");
Boris Brezillon2b946a12020-03-05 16:26:56 +0100180 }
181}
182
183void panfrost_sampler_desc_init(const struct pipe_sampler_state *cso,
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400184 struct mali_midgard_sampler_packed *hw)
Boris Brezillon2b946a12020-03-05 16:26:56 +0100185{
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400186 pan_pack(hw, MIDGARD_SAMPLER, cfg) {
187 cfg.magnify_nearest = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
188 cfg.minify_nearest = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
189 cfg.mipmap_mode = (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) ?
190 MALI_MIPMAP_MODE_TRILINEAR : MALI_MIPMAP_MODE_NEAREST;
191 cfg.normalized_coordinates = cso->normalized_coords;
Boris Brezillon2b946a12020-03-05 16:26:56 +0100192
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400193 cfg.lod_bias = FIXED_16(cso->lod_bias, true);
Boris Brezillon2b946a12020-03-05 16:26:56 +0100194
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400195 cfg.minimum_lod = FIXED_16(cso->min_lod, false);
Boris Brezillon2b946a12020-03-05 16:26:56 +0100196
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400197 /* If necessary, we disable mipmapping in the sampler descriptor by
198 * clamping the LOD as tight as possible (from 0 to epsilon,
199 * essentially -- remember these are fixed point numbers, so
200 * epsilon=1/256) */
201
202 cfg.maximum_lod = (cso->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) ?
203 cfg.minimum_lod + 1 :
204 FIXED_16(cso->max_lod, false);
205
206 cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s);
207 cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t);
208 cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r);
209
210 cfg.compare_function = panfrost_sampler_compare_func(cso);
211 cfg.seamless_cube_map = cso->seamless_cube_map;
212
213 cfg.border_color_r = cso->border_color.f[0];
Icecream958557b1a2020-08-13 19:35:00 +1200214 cfg.border_color_g = cso->border_color.f[1];
215 cfg.border_color_b = cso->border_color.f[2];
216 cfg.border_color_a = cso->border_color.f[3];
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400217 }
Boris Brezillon2b946a12020-03-05 16:26:56 +0100218}
219
Tomeu Vizosod3eb23a2020-04-17 14:23:39 +0200220void panfrost_sampler_desc_init_bifrost(const struct pipe_sampler_state *cso,
Alyssa Rosenzweigb10c3c82020-08-11 18:25:03 -0400221 struct mali_bifrost_sampler_packed *hw)
Tomeu Vizosod3eb23a2020-04-17 14:23:39 +0200222{
Alyssa Rosenzweigb10c3c82020-08-11 18:25:03 -0400223 pan_pack(hw, BIFROST_SAMPLER, cfg) {
Alyssa Rosenzweig3943bce2020-10-06 21:31:18 -0400224 cfg.point_sample_magnify = cso->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
225 cfg.point_sample_minify = cso->min_img_filter == PIPE_TEX_FILTER_NEAREST;
Alyssa Rosenzweigb10c3c82020-08-11 18:25:03 -0400226 cfg.mipmap_mode = pan_pipe_to_mipmode(cso->min_mip_filter);
227 cfg.normalized_coordinates = cso->normalized_coords;
Tomeu Vizosod3eb23a2020-04-17 14:23:39 +0200228
Alyssa Rosenzweigb10c3c82020-08-11 18:25:03 -0400229 cfg.lod_bias = FIXED_16(cso->lod_bias, true);
230 cfg.minimum_lod = FIXED_16(cso->min_lod, false);
231 cfg.maximum_lod = FIXED_16(cso->max_lod, false);
Tomeu Vizosod3eb23a2020-04-17 14:23:39 +0200232
Alyssa Rosenzweigb10c3c82020-08-11 18:25:03 -0400233 cfg.wrap_mode_s = translate_tex_wrap(cso->wrap_s);
234 cfg.wrap_mode_t = translate_tex_wrap(cso->wrap_t);
235 cfg.wrap_mode_r = translate_tex_wrap(cso->wrap_r);
236
237 cfg.compare_function = panfrost_sampler_compare_func(cso);
238 cfg.seamless_cube_map = cso->seamless_cube_map;
239 }
Tomeu Vizosod3eb23a2020-04-17 14:23:39 +0200240}
241
Alyssa Rosenzweig1085f742020-05-21 15:49:30 -0400242static bool
243panfrost_fs_required(
244 struct panfrost_shader_state *fs,
245 struct panfrost_blend_final *blend,
246 unsigned rt_count)
247{
248 /* If we generally have side effects */
249 if (fs->fs_sidefx)
250 return true;
251
252 /* If colour is written we need to execute */
253 for (unsigned i = 0; i < rt_count; ++i) {
254 if (!blend[i].no_colour)
255 return true;
256 }
257
258 /* If depth is written and not implied we need to execute.
259 * TODO: Predicate on Z/S writes being enabled */
260 return (fs->writes_depth || fs->writes_stencil);
261}
262
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100263static void
Boris Brezillon01121c72020-09-15 18:07:42 +0200264panfrost_emit_bifrost_blend(struct panfrost_batch *batch,
265 struct panfrost_blend_final *blend,
266 void *rts)
Alyssa Rosenzweigbbec4ff2020-08-18 16:50:38 -0400267{
Alyssa Rosenzweigbbec4ff2020-08-18 16:50:38 -0400268 unsigned rt_count = batch->key.nr_cbufs;
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100269
Alyssa Rosenzweig87e35102020-08-26 09:44:12 -0400270 if (rt_count == 0) {
Boris Brezillon01121c72020-09-15 18:07:42 +0200271 /* Disable blending for depth-only */
Boris Brezillon83899762020-09-16 13:31:37 +0200272 pan_pack(rts, BLEND, cfg) {
273 cfg.enable = false;
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200274 cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OFF;
Boris Brezillon83899762020-09-16 13:31:37 +0200275 }
Boris Brezillon01121c72020-09-15 18:07:42 +0200276 return;
Alyssa Rosenzweig87e35102020-08-26 09:44:12 -0400277 }
Alyssa Rosenzweig8249e2b2020-08-17 19:41:48 -0400278
Boris Brezillon01121c72020-09-15 18:07:42 +0200279 const struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
280 struct panfrost_shader_state *fs = panfrost_get_shader_state(batch->ctx, PIPE_SHADER_FRAGMENT);
281
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100282 for (unsigned i = 0; i < rt_count; ++i) {
Boris Brezillon83899762020-09-16 13:31:37 +0200283 pan_pack(rts + i * MALI_BLEND_LENGTH, BLEND, cfg) {
Alyssa Rosenzweig6beac112020-08-18 17:51:22 -0400284 if (blend[i].no_colour) {
285 cfg.enable = false;
Alyssa Rosenzweig8249e2b2020-08-17 19:41:48 -0400286 } else {
Boris Brezillon01121c72020-09-15 18:07:42 +0200287 cfg.srgb = util_format_is_srgb(batch->key.cbufs[i]->format);
288 cfg.load_destination = blend[i].load_dest;
289 cfg.round_to_fb_precision = !batch->ctx->blend->base.dither;
Tomeu Vizoso3c98c452020-04-24 08:40:51 +0200290 }
Boris Brezillon01121c72020-09-15 18:07:42 +0200291
Boris Brezillon83899762020-09-16 13:31:37 +0200292 if (blend[i].is_shader) {
293 /* The blend shader's address needs to be at
294 * the same top 32 bit as the fragment shader.
295 * TODO: Ensure that's always the case.
296 */
297 assert((blend[i].shader.gpu & (0xffffffffull << 32)) ==
298 (fs->bo->gpu & (0xffffffffull << 32)));
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200299 cfg.bifrost.internal.shader.pc = (u32)blend[i].shader.gpu;
Boris Brezillon91d9c552020-10-12 15:18:35 +0200300 assert(!(fs->blend_ret_addrs[i] & 0x7));
301 cfg.bifrost.internal.shader.return_value = fs->blend_ret_addrs[i];
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200302 cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_SHADER;
Boris Brezillon83899762020-09-16 13:31:37 +0200303 } else {
304 enum pipe_format format = batch->key.cbufs[i]->format;
305 const struct util_format_description *format_desc;
306 unsigned chan_size = 0;
Boris Brezillon01121c72020-09-15 18:07:42 +0200307
Boris Brezillon83899762020-09-16 13:31:37 +0200308 format_desc = util_format_description(format);
Boris Brezillon01121c72020-09-15 18:07:42 +0200309
Boris Brezillon83899762020-09-16 13:31:37 +0200310 for (unsigned i = 0; i < format_desc->nr_channels; i++)
311 chan_size = MAX2(format_desc->channel[0].size, chan_size);
312
313 cfg.bifrost.equation = blend[i].equation.equation;
314
315 /* Fixed point constant */
Boris Brezillond8326ce2020-10-09 14:00:28 +0200316 u16 constant = blend[i].equation.constant * ((1 << chan_size) - 1);
Boris Brezillon83899762020-09-16 13:31:37 +0200317 constant <<= 16 - chan_size;
318 cfg.bifrost.constant = constant;
319
320 if (blend[i].opaque)
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200321 cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_OPAQUE;
Boris Brezillon83899762020-09-16 13:31:37 +0200322 else
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200323 cfg.bifrost.internal.mode = MALI_BIFROST_BLEND_MODE_FIXED_FUNCTION;
Boris Brezillon83899762020-09-16 13:31:37 +0200324
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200325 cfg.bifrost.internal.fixed_function.num_comps = format_desc->nr_channels;
326 cfg.bifrost.internal.fixed_function.conversion.memory_format.format =
Boris Brezillon83899762020-09-16 13:31:37 +0200327 panfrost_format_to_bifrost_blend(format_desc);
328 if (dev->quirks & HAS_SWIZZLES) {
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200329 cfg.bifrost.internal.fixed_function.conversion.memory_format.swizzle =
Boris Brezillon83899762020-09-16 13:31:37 +0200330 panfrost_get_default_swizzle(4);
331 }
Boris Brezillon8d707cd2020-10-12 14:16:53 +0200332 cfg.bifrost.internal.fixed_function.conversion.register_format =
333 fs->blend_types[i];
Boris Brezillon713419e2020-09-16 10:26:06 +0200334 }
Boris Brezillon01121c72020-09-15 18:07:42 +0200335 }
336 }
337}
338
339static void
340panfrost_emit_midgard_blend(struct panfrost_batch *batch,
341 struct panfrost_blend_final *blend,
342 void *rts)
343{
344 unsigned rt_count = batch->key.nr_cbufs;
345
346 if (rt_count == 0) {
347 /* Disable blending for depth-only */
Boris Brezillon83899762020-09-16 13:31:37 +0200348 pan_pack(rts, BLEND, cfg) {
349 cfg.midgard.equation.color_mask = 0xf;
350 cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
351 cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
352 cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
353 cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
354 cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
355 cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
Boris Brezillon01121c72020-09-15 18:07:42 +0200356 }
357 return;
358 }
359
360 for (unsigned i = 0; i < rt_count; ++i) {
Boris Brezillon83899762020-09-16 13:31:37 +0200361 pan_pack(rts + i * MALI_BLEND_LENGTH, BLEND, cfg) {
Boris Brezillon01121c72020-09-15 18:07:42 +0200362 if (blend[i].no_colour) {
Boris Brezillon83899762020-09-16 13:31:37 +0200363 cfg.enable = false;
Boris Brezillon01121c72020-09-15 18:07:42 +0200364 continue;
365 }
366
Boris Brezillon83899762020-09-16 13:31:37 +0200367 cfg.srgb = util_format_is_srgb(batch->key.cbufs[i]->format);
368 cfg.load_destination = blend[i].load_dest;
369 cfg.round_to_fb_precision = !batch->ctx->blend->base.dither;
370 cfg.midgard.blend_shader = blend[i].is_shader;
Boris Brezillon01121c72020-09-15 18:07:42 +0200371 if (blend[i].is_shader) {
Boris Brezillon83899762020-09-16 13:31:37 +0200372 cfg.midgard.shader_pc = blend[i].shader.gpu | blend[i].shader.first_tag;
Boris Brezillon01121c72020-09-15 18:07:42 +0200373 } else {
Boris Brezillon83899762020-09-16 13:31:37 +0200374 cfg.midgard.equation = blend[i].equation.equation;
375 cfg.midgard.constant = blend[i].equation.constant;
Boris Brezillon01121c72020-09-15 18:07:42 +0200376 }
377 }
378 }
379}
380
381static void
382panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
383 struct panfrost_blend_final *blend)
384{
385 const struct panfrost_device *dev = pan_device(batch->ctx->base.screen);
386
387 if (dev->quirks & IS_BIFROST)
388 panfrost_emit_bifrost_blend(batch, blend, rts);
389 else
390 panfrost_emit_midgard_blend(batch, blend, rts);
391
392 for (unsigned i = 0; i < batch->key.nr_cbufs; ++i) {
393 if (!blend[i].no_colour)
394 batch->draws |= (PIPE_CLEAR_COLOR0 << i);
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100395 }
396}
397
398static void
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200399panfrost_prepare_bifrost_fs_state(struct panfrost_context *ctx,
400 struct panfrost_blend_final *blend,
401 struct MALI_RENDERER_STATE *state)
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100402{
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400403 struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
Alyssa Rosenzweig96a91532020-08-20 15:52:32 -0400404 unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
Alyssa Rosenzweig9a2df302020-08-19 10:25:32 -0400405
Alyssa Rosenzweige0a6af92020-08-21 13:22:11 -0400406 if (!panfrost_fs_required(fs, blend, rt_count)) {
Boris Brezillon519643b2020-10-13 18:32:14 +0200407 state->properties.uniform_buffer_count = 32;
408 state->properties.bifrost.shader_modifies_coverage = true;
409 state->properties.bifrost.allow_forward_pixel_to_kill = true;
410 state->properties.bifrost.allow_forward_pixel_to_be_killed = true;
411 state->properties.bifrost.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY;
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200412 } else {
Alyssa Rosenzweigacf77cb2020-08-20 16:41:41 -0400413 bool no_blend = true;
414
415 for (unsigned i = 0; i < rt_count; ++i)
416 no_blend &= (!blend[i].load_dest | blend[i].no_colour);
417
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200418 state->properties = fs->properties;
Boris Brezillon519643b2020-10-13 18:32:14 +0200419 state->properties.bifrost.allow_forward_pixel_to_kill =
420 !fs->can_discard && !fs->writes_depth && no_blend;
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200421 state->shader = fs->shader;
422 state->preload = fs->preload;
423 }
424}
Alyssa Rosenzweigacf77cb2020-08-20 16:41:41 -0400425
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200426static void
427panfrost_prepare_midgard_fs_state(struct panfrost_context *ctx,
428 struct panfrost_blend_final *blend,
429 struct MALI_RENDERER_STATE *state)
430{
431 const struct panfrost_device *dev = pan_device(ctx->base.screen);
432 struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
433 const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
434 unsigned rt_count = ctx->pipe_framebuffer.nr_cbufs;
435 bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
436
437 if (!panfrost_fs_required(fs, blend, rt_count)) {
438 state->shader.shader = 0x1;
Boris Brezillon519643b2020-10-13 18:32:14 +0200439 state->properties.midgard.work_register_count = 1;
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200440 state->properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
Boris Brezillon519643b2020-10-13 18:32:14 +0200441 state->properties.midgard.force_early_z = true;
Alyssa Rosenzweig9a2df302020-08-19 10:25:32 -0400442 } else {
Alyssa Rosenzweig19ded1e2020-08-20 08:06:39 -0400443 /* Reasons to disable early-Z from a shader perspective */
444 bool late_z = fs->can_discard || fs->writes_global ||
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200445 fs->writes_depth || fs->writes_stencil;
Alyssa Rosenzweig9a2df302020-08-19 10:25:32 -0400446
Alyssa Rosenzweig19ded1e2020-08-20 08:06:39 -0400447 /* If either depth or stencil is enabled, discard matters */
448 bool zs_enabled =
449 (zsa->base.depth.enabled && zsa->base.depth.func != PIPE_FUNC_ALWAYS) ||
450 zsa->base.stencil[0].enabled;
451
Alyssa Rosenzweig96a91532020-08-20 15:52:32 -0400452 bool has_blend_shader = false;
453
454 for (unsigned c = 0; c < rt_count; ++c)
455 has_blend_shader |= blend[c].is_shader;
456
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200457 /* TODO: Reduce this limit? */
458 state->properties = fs->properties;
459 if (has_blend_shader)
Boris Brezillon519643b2020-10-13 18:32:14 +0200460 state->properties.midgard.work_register_count = MAX2(fs->work_reg_count, 8);
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200461 else
Boris Brezillon519643b2020-10-13 18:32:14 +0200462 state->properties.midgard.work_register_count = fs->work_reg_count;
Alyssa Rosenzweig96a91532020-08-20 15:52:32 -0400463
Boris Brezillon519643b2020-10-13 18:32:14 +0200464 state->properties.midgard.force_early_z = !(late_z || alpha_to_coverage);
465
466 /* Workaround a hardware errata where early-z cannot be enabled
467 * when discarding even when the depth buffer is read-only, by
468 * lying to the hardware about the discard and setting the
469 * reads tilebuffer? flag to compensate */
470 state->properties.midgard.shader_reads_tilebuffer =
471 fs->outputs_read || (!zs_enabled && fs->can_discard);
472 state->properties.midgard.shader_contains_discard = zs_enabled && fs->can_discard;
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200473 state->shader = fs->shader;
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400474 }
475
476 if (dev->quirks & MIDGARD_SFBD) {
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200477 state->multisample_misc.sfbd_load_destination = blend[0].load_dest;
478 state->multisample_misc.sfbd_blend_shader = blend[0].is_shader;
479 state->stencil_mask_misc.sfbd_write_enable = !blend[0].no_colour;
480 state->stencil_mask_misc.sfbd_srgb = util_format_is_srgb(ctx->pipe_framebuffer.cbufs[0]->format);
481 state->stencil_mask_misc.sfbd_dither_disable = !ctx->blend->base.dither;
482
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400483 if (blend[0].is_shader) {
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200484 state->sfbd_blend_shader = blend[0].shader.gpu |
485 blend[0].shader.first_tag;
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400486 } else {
Boris Brezillon713419e2020-09-16 10:26:06 +0200487 state->sfbd_blend_equation = blend[0].equation.equation;
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200488 state->sfbd_blend_constant = blend[0].equation.constant;
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400489 }
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200490 } else {
Alyssa Rosenzweig58ae50f2020-08-19 15:50:25 -0400491 /* Bug where MRT-capable hw apparently reads the last blend
492 * shader from here instead of the usual location? */
493
494 for (signed rt = ((signed) rt_count - 1); rt >= 0; --rt) {
495 if (!blend[rt].is_shader)
496 continue;
497
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200498 state->sfbd_blend_shader = blend[rt].shader.gpu |
499 blend[rt].shader.first_tag;
Alyssa Rosenzweig58ae50f2020-08-19 15:50:25 -0400500 break;
501 }
Alyssa Rosenzweige5689a52020-08-19 09:27:42 -0400502 }
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200503}
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400504
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200505static void
506panfrost_prepare_fs_state(struct panfrost_context *ctx,
507 struct panfrost_blend_final *blend,
508 struct MALI_RENDERER_STATE *state)
509{
510 const struct panfrost_device *dev = pan_device(ctx->base.screen);
511 struct panfrost_shader_state *fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
512 struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
513 const struct panfrost_zsa_state *zsa = ctx->depth_stencil;
514 bool alpha_to_coverage = ctx->blend->base.alpha_to_coverage;
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400515
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200516 if (dev->quirks & IS_BIFROST)
517 panfrost_prepare_bifrost_fs_state(ctx, blend, state);
518 else
519 panfrost_prepare_midgard_fs_state(ctx, blend, state);
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400520
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200521 bool msaa = rast->multisample;
522 state->multisample_misc.multisample_enable = msaa;
523 state->multisample_misc.sample_mask = (msaa ? ctx->sample_mask : ~0) & 0xFFFF;
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400524
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200525 /* EXT_shader_framebuffer_fetch requires per-sample */
526 bool per_sample = ctx->min_samples > 1 || fs->outputs_read;
527 state->multisample_misc.evaluate_per_sample = msaa && per_sample;
528 state->multisample_misc.depth_function = zsa->base.depth.enabled ?
529 panfrost_translate_compare_func(zsa->base.depth.func) :
530 MALI_FUNC_ALWAYS;
531
532 state->multisample_misc.depth_write_mask = zsa->base.depth.writemask;
533 state->multisample_misc.fixed_function_near_discard = rast->depth_clip_near;
534 state->multisample_misc.fixed_function_far_discard = rast->depth_clip_far;
535 state->multisample_misc.unknown_2 = true;
536
537 state->stencil_mask_misc.stencil_mask_front = zsa->stencil_mask_front;
538 state->stencil_mask_misc.stencil_mask_back = zsa->stencil_mask_back;
539 state->stencil_mask_misc.stencil_enable = zsa->base.stencil[0].enabled;
540 state->stencil_mask_misc.alpha_to_coverage = alpha_to_coverage;
541 state->stencil_mask_misc.unknown_1 = 0x7;
542 state->stencil_mask_misc.depth_range_1 = rast->offset_tri;
543 state->stencil_mask_misc.depth_range_2 = rast->offset_tri;
544 state->stencil_mask_misc.single_sampled_lines = !rast->multisample;
545 state->depth_units = rast->offset_units * 2.0f;
546 state->depth_factor = rast->offset_scale;
547
548 bool back_enab = zsa->base.stencil[1].enabled;
549 state->stencil_front = zsa->stencil_front;
550 state->stencil_back = zsa->stencil_back;
551 state->stencil_front.reference_value = ctx->stencil_ref.ref_value[0];
552 state->stencil_back.reference_value = ctx->stencil_ref.ref_value[back_enab ? 1 : 0];
553}
554
555
556static void
557panfrost_emit_frag_shader(struct panfrost_context *ctx,
558 struct mali_renderer_state_packed *fragmeta,
559 struct panfrost_blend_final *blend)
560{
561 pan_pack(fragmeta, RENDERER_STATE, cfg) {
562 panfrost_prepare_fs_state(ctx, blend, &cfg);
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400563 }
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100564}
565
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400566mali_ptr
567panfrost_emit_compute_shader_meta(struct panfrost_batch *batch, enum pipe_shader_type stage)
568{
569 struct panfrost_shader_state *ss = panfrost_get_shader_state(batch->ctx, stage);
570
571 panfrost_batch_add_bo(batch, ss->bo,
572 PAN_BO_ACCESS_PRIVATE |
573 PAN_BO_ACCESS_READ |
574 PAN_BO_ACCESS_VERTEX_TILER);
575
576 panfrost_batch_add_bo(batch, pan_resource(ss->upload.rsrc)->bo,
577 PAN_BO_ACCESS_PRIVATE |
578 PAN_BO_ACCESS_READ |
579 PAN_BO_ACCESS_VERTEX_TILER);
580
581 return pan_resource(ss->upload.rsrc)->bo->gpu + ss->upload.offset;
582}
583
584mali_ptr
585panfrost_emit_frag_shader_meta(struct panfrost_batch *batch)
Boris Brezillon5d33d422020-03-05 11:02:56 +0100586{
587 struct panfrost_context *ctx = batch->ctx;
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400588 struct panfrost_shader_state *ss = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100589
Boris Brezillon5d33d422020-03-05 11:02:56 +0100590 /* Add the shader BO to the batch. */
591 panfrost_batch_add_bo(batch, ss->bo,
592 PAN_BO_ACCESS_PRIVATE |
593 PAN_BO_ACCESS_READ |
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400594 PAN_BO_ACCESS_FRAGMENT);
Boris Brezillon5d33d422020-03-05 11:02:56 +0100595
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400596 struct panfrost_device *dev = pan_device(ctx->base.screen);
597 unsigned rt_count = MAX2(ctx->pipe_framebuffer.nr_cbufs, 1);
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400598 struct panfrost_transfer xfer;
599 unsigned rt_size;
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100600
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400601 if (dev->quirks & MIDGARD_SFBD)
602 rt_size = 0;
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400603 else
Boris Brezillon83899762020-09-16 13:31:37 +0200604 rt_size = MALI_BLEND_LENGTH;
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100605
Boris Brezillonf734e672020-09-29 15:47:04 +0200606 unsigned desc_size = MALI_RENDERER_STATE_LENGTH + rt_size * rt_count;
607 xfer = panfrost_pool_alloc_aligned(&batch->pool, desc_size, MALI_RENDERER_STATE_LENGTH);
Tomeu Vizoso3c98c452020-04-24 08:40:51 +0200608
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400609 struct panfrost_blend_final blend[PIPE_MAX_COLOR_BUFS];
Icecream952aa58382020-09-19 20:33:14 +1200610 unsigned shader_offset = 0;
611 struct panfrost_bo *shader_bo = NULL;
Boris Brezillonb02f97c2020-03-05 16:20:18 +0100612
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400613 for (unsigned c = 0; c < ctx->pipe_framebuffer.nr_cbufs; ++c)
Icecream952aa58382020-09-19 20:33:14 +1200614 blend[c] = panfrost_get_blend_for_context(ctx, c, &shader_bo,
615 &shader_offset);
Boris Brezillonf734e672020-09-29 15:47:04 +0200616 panfrost_emit_frag_shader(ctx, (struct mali_renderer_state_packed *) xfer.cpu, blend);
Alyssa Rosenzweig1b377c22020-08-21 19:27:40 -0400617
Alyssa Rosenzweig45c59db2020-08-24 12:07:59 -0400618 if (!(dev->quirks & MIDGARD_SFBD))
Boris Brezillonf734e672020-09-29 15:47:04 +0200619 panfrost_emit_blend(batch, xfer.cpu + MALI_RENDERER_STATE_LENGTH, blend);
Alyssa Rosenzweig45c59db2020-08-24 12:07:59 -0400620 else
621 batch->draws |= PIPE_CLEAR_COLOR0;
Tomeu Vizoso3c98c452020-04-24 08:40:51 +0200622
Alyssa Rosenzweig80f1d612020-08-21 14:35:35 -0400623 return xfer.gpu;
Boris Brezillon5d33d422020-03-05 11:02:56 +0100624}
625
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -0400626mali_ptr
627panfrost_emit_viewport(struct panfrost_batch *batch)
Boris Brezillona72bab12020-03-05 09:30:58 +0100628{
629 struct panfrost_context *ctx = batch->ctx;
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400630 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
631 const struct pipe_scissor_state *ss = &ctx->scissor;
632 const struct pipe_rasterizer_state *rast = &ctx->rasterizer->base;
633 const struct pipe_framebuffer_state *fb = &ctx->pipe_framebuffer;
Boris Brezillona72bab12020-03-05 09:30:58 +0100634
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400635 /* Derive min/max from translate/scale. Note since |x| >= 0 by
636 * definition, we have that -|x| <= |x| hence translate - |scale| <=
637 * translate + |scale|, so the ordering is correct here. */
Icecream95e560028c82020-09-23 19:09:23 +1200638 float vp_minx = vp->translate[0] - fabsf(vp->scale[0]);
639 float vp_maxx = vp->translate[0] + fabsf(vp->scale[0]);
640 float vp_miny = vp->translate[1] - fabsf(vp->scale[1]);
641 float vp_maxy = vp->translate[1] + fabsf(vp->scale[1]);
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400642 float minz = (vp->translate[2] - fabsf(vp->scale[2]));
643 float maxz = (vp->translate[2] + fabsf(vp->scale[2]));
Boris Brezillona72bab12020-03-05 09:30:58 +0100644
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400645 /* Scissor to the intersection of viewport and to the scissor, clamped
646 * to the framebuffer */
Boris Brezillona72bab12020-03-05 09:30:58 +0100647
Icecream953d0ae7a2020-09-23 21:35:03 +1200648 unsigned minx = MIN2(fb->width, MAX2((int) vp_minx, 0));
649 unsigned maxx = MIN2(fb->width, MAX2((int) vp_maxx, 0));
650 unsigned miny = MIN2(fb->height, MAX2((int) vp_miny, 0));
651 unsigned maxy = MIN2(fb->height, MAX2((int) vp_maxy, 0));
Boris Brezillona72bab12020-03-05 09:30:58 +0100652
Alyssa Rosenzweigec351592020-08-14 17:50:44 -0400653 if (ss && rast->scissor) {
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400654 minx = MAX2(ss->minx, minx);
655 miny = MAX2(ss->miny, miny);
656 maxx = MIN2(ss->maxx, maxx);
657 maxy = MIN2(ss->maxy, maxy);
658 }
659
Icecream953d0ae7a2020-09-23 21:35:03 +1200660 /* Set the range to [1, 1) so max values don't wrap round */
661 if (maxx == 0 || maxy == 0)
662 maxx = maxy = minx = miny = 1;
663
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400664 struct panfrost_transfer T = panfrost_pool_alloc(&batch->pool, MALI_VIEWPORT_LENGTH);
665
666 pan_pack(T.cpu, VIEWPORT, cfg) {
Icecream953d0ae7a2020-09-23 21:35:03 +1200667 /* [minx, maxx) and [miny, maxy) are exclusive ranges, but
668 * these are inclusive */
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400669 cfg.scissor_minimum_x = minx;
670 cfg.scissor_minimum_y = miny;
671 cfg.scissor_maximum_x = maxx - 1;
672 cfg.scissor_maximum_y = maxy - 1;
673
674 cfg.minimum_z = rast->depth_clip_near ? minz : -INFINITY;
675 cfg.maximum_z = rast->depth_clip_far ? maxz : INFINITY;
676 }
677
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400678 panfrost_batch_union_scissor(batch, minx, miny, maxx, maxy);
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -0400679 return T.gpu;
Boris Brezillona72bab12020-03-05 09:30:58 +0100680}
Boris Brezillon0b735a22020-03-05 09:46:42 +0100681
682static mali_ptr
683panfrost_map_constant_buffer_gpu(struct panfrost_batch *batch,
684 enum pipe_shader_type st,
685 struct panfrost_constant_buffer *buf,
686 unsigned index)
687{
688 struct pipe_constant_buffer *cb = &buf->cb[index];
689 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
690
691 if (rsrc) {
692 panfrost_batch_add_bo(batch, rsrc->bo,
693 PAN_BO_ACCESS_SHARED |
694 PAN_BO_ACCESS_READ |
695 panfrost_bo_access_for_stage(st));
696
697 /* Alignment gauranteed by
698 * PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT */
699 return rsrc->bo->gpu + cb->buffer_offset;
700 } else if (cb->user_buffer) {
Alyssa Rosenzweig1cfbc5c2020-08-20 13:36:46 -0400701 return panfrost_pool_upload_aligned(&batch->pool,
Boris Brezillon0b735a22020-03-05 09:46:42 +0100702 cb->user_buffer +
703 cb->buffer_offset,
Alyssa Rosenzweig1cfbc5c2020-08-20 13:36:46 -0400704 cb->buffer_size, 16);
Boris Brezillon0b735a22020-03-05 09:46:42 +0100705 } else {
706 unreachable("No constant buffer");
707 }
708}
709
710struct sysval_uniform {
711 union {
712 float f[4];
713 int32_t i[4];
714 uint32_t u[4];
715 uint64_t du[2];
716 };
717};
718
719static void
720panfrost_upload_viewport_scale_sysval(struct panfrost_batch *batch,
721 struct sysval_uniform *uniform)
722{
723 struct panfrost_context *ctx = batch->ctx;
724 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
725
726 uniform->f[0] = vp->scale[0];
727 uniform->f[1] = vp->scale[1];
728 uniform->f[2] = vp->scale[2];
729}
730
731static void
732panfrost_upload_viewport_offset_sysval(struct panfrost_batch *batch,
733 struct sysval_uniform *uniform)
734{
735 struct panfrost_context *ctx = batch->ctx;
736 const struct pipe_viewport_state *vp = &ctx->pipe_viewport;
737
738 uniform->f[0] = vp->translate[0];
739 uniform->f[1] = vp->translate[1];
740 uniform->f[2] = vp->translate[2];
741}
742
743static void panfrost_upload_txs_sysval(struct panfrost_batch *batch,
744 enum pipe_shader_type st,
745 unsigned int sysvalid,
746 struct sysval_uniform *uniform)
747{
748 struct panfrost_context *ctx = batch->ctx;
749 unsigned texidx = PAN_SYSVAL_ID_TO_TXS_TEX_IDX(sysvalid);
750 unsigned dim = PAN_SYSVAL_ID_TO_TXS_DIM(sysvalid);
751 bool is_array = PAN_SYSVAL_ID_TO_TXS_IS_ARRAY(sysvalid);
752 struct pipe_sampler_view *tex = &ctx->sampler_views[st][texidx]->base;
753
754 assert(dim);
755 uniform->i[0] = u_minify(tex->texture->width0, tex->u.tex.first_level);
756
757 if (dim > 1)
758 uniform->i[1] = u_minify(tex->texture->height0,
759 tex->u.tex.first_level);
760
761 if (dim > 2)
762 uniform->i[2] = u_minify(tex->texture->depth0,
763 tex->u.tex.first_level);
764
765 if (is_array)
766 uniform->i[dim] = tex->texture->array_size;
767}
768
769static void
770panfrost_upload_ssbo_sysval(struct panfrost_batch *batch,
771 enum pipe_shader_type st,
772 unsigned ssbo_id,
773 struct sysval_uniform *uniform)
774{
775 struct panfrost_context *ctx = batch->ctx;
776
777 assert(ctx->ssbo_mask[st] & (1 << ssbo_id));
778 struct pipe_shader_buffer sb = ctx->ssbo[st][ssbo_id];
779
780 /* Compute address */
781 struct panfrost_bo *bo = pan_resource(sb.buffer)->bo;
782
783 panfrost_batch_add_bo(batch, bo,
784 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_RW |
785 panfrost_bo_access_for_stage(st));
786
787 /* Upload address and size as sysval */
788 uniform->du[0] = bo->gpu + sb.buffer_offset;
789 uniform->u[2] = sb.buffer_size;
790}
791
792static void
793panfrost_upload_sampler_sysval(struct panfrost_batch *batch,
794 enum pipe_shader_type st,
795 unsigned samp_idx,
796 struct sysval_uniform *uniform)
797{
798 struct panfrost_context *ctx = batch->ctx;
799 struct pipe_sampler_state *sampl = &ctx->samplers[st][samp_idx]->base;
800
801 uniform->f[0] = sampl->min_lod;
802 uniform->f[1] = sampl->max_lod;
803 uniform->f[2] = sampl->lod_bias;
804
805 /* Even without any errata, Midgard represents "no mipmapping" as
806 * fixing the LOD with the clamps; keep behaviour consistent. c.f.
807 * panfrost_create_sampler_state which also explains our choice of
808 * epsilon value (again to keep behaviour consistent) */
809
810 if (sampl->min_mip_filter == PIPE_TEX_MIPFILTER_NONE)
811 uniform->f[1] = uniform->f[0] + (1.0/256.0);
812}
813
814static void
815panfrost_upload_num_work_groups_sysval(struct panfrost_batch *batch,
816 struct sysval_uniform *uniform)
817{
818 struct panfrost_context *ctx = batch->ctx;
819
820 uniform->u[0] = ctx->compute_grid->grid[0];
821 uniform->u[1] = ctx->compute_grid->grid[1];
822 uniform->u[2] = ctx->compute_grid->grid[2];
823}
824
825static void
826panfrost_upload_sysvals(struct panfrost_batch *batch, void *buf,
827 struct panfrost_shader_state *ss,
828 enum pipe_shader_type st)
829{
830 struct sysval_uniform *uniforms = (void *)buf;
831
832 for (unsigned i = 0; i < ss->sysval_count; ++i) {
833 int sysval = ss->sysval[i];
834
835 switch (PAN_SYSVAL_TYPE(sysval)) {
836 case PAN_SYSVAL_VIEWPORT_SCALE:
837 panfrost_upload_viewport_scale_sysval(batch,
838 &uniforms[i]);
839 break;
840 case PAN_SYSVAL_VIEWPORT_OFFSET:
841 panfrost_upload_viewport_offset_sysval(batch,
842 &uniforms[i]);
843 break;
844 case PAN_SYSVAL_TEXTURE_SIZE:
845 panfrost_upload_txs_sysval(batch, st,
846 PAN_SYSVAL_ID(sysval),
847 &uniforms[i]);
848 break;
849 case PAN_SYSVAL_SSBO:
850 panfrost_upload_ssbo_sysval(batch, st,
851 PAN_SYSVAL_ID(sysval),
852 &uniforms[i]);
853 break;
854 case PAN_SYSVAL_NUM_WORK_GROUPS:
855 panfrost_upload_num_work_groups_sysval(batch,
856 &uniforms[i]);
857 break;
858 case PAN_SYSVAL_SAMPLER:
859 panfrost_upload_sampler_sysval(batch, st,
860 PAN_SYSVAL_ID(sysval),
861 &uniforms[i]);
862 break;
863 default:
864 assert(0);
865 }
866 }
867}
868
869static const void *
870panfrost_map_constant_buffer_cpu(struct panfrost_constant_buffer *buf,
871 unsigned index)
872{
873 struct pipe_constant_buffer *cb = &buf->cb[index];
874 struct panfrost_resource *rsrc = pan_resource(cb->buffer);
875
876 if (rsrc)
877 return rsrc->bo->cpu;
878 else if (cb->user_buffer)
879 return cb->user_buffer;
880 else
881 unreachable("No constant buffer");
882}
883
Alyssa Rosenzweig8b5f9fc2020-08-25 12:03:17 -0400884mali_ptr
Boris Brezillon0b735a22020-03-05 09:46:42 +0100885panfrost_emit_const_buf(struct panfrost_batch *batch,
886 enum pipe_shader_type stage,
Alyssa Rosenzweig8b5f9fc2020-08-25 12:03:17 -0400887 mali_ptr *push_constants)
Boris Brezillon0b735a22020-03-05 09:46:42 +0100888{
889 struct panfrost_context *ctx = batch->ctx;
890 struct panfrost_shader_variants *all = ctx->shader[stage];
891
892 if (!all)
Alyssa Rosenzweig8b5f9fc2020-08-25 12:03:17 -0400893 return 0;
Boris Brezillon0b735a22020-03-05 09:46:42 +0100894
895 struct panfrost_constant_buffer *buf = &ctx->constant_buffer[stage];
896
897 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
898
899 /* Uniforms are implicitly UBO #0 */
900 bool has_uniforms = buf->enabled_mask & (1 << 0);
901
902 /* Allocate room for the sysval and the uniforms */
903 size_t sys_size = sizeof(float) * 4 * ss->sysval_count;
904 size_t uniform_size = has_uniforms ? (buf->cb[0].buffer_size) : 0;
905 size_t size = sys_size + uniform_size;
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -0400906 struct panfrost_transfer transfer =
907 panfrost_pool_alloc_aligned(&batch->pool, size, 16);
Boris Brezillon0b735a22020-03-05 09:46:42 +0100908
909 /* Upload sysvals requested by the shader */
910 panfrost_upload_sysvals(batch, transfer.cpu, ss, stage);
911
912 /* Upload uniforms */
913 if (has_uniforms && uniform_size) {
914 const void *cpu = panfrost_map_constant_buffer_cpu(buf, 0);
915 memcpy(transfer.cpu + sys_size, cpu, uniform_size);
916 }
917
Boris Brezillon0b735a22020-03-05 09:46:42 +0100918 /* Next up, attach UBOs. UBO #0 is the uniforms we just
Alyssa Rosenzweig5393d732020-08-21 10:42:59 -0400919 * uploaded, so it's always included. The count is the highest UBO
920 * addressable -- gaps are included. */
Boris Brezillon0b735a22020-03-05 09:46:42 +0100921
Alyssa Rosenzweig5393d732020-08-21 10:42:59 -0400922 unsigned ubo_count = 32 - __builtin_clz(buf->enabled_mask | 1);
Boris Brezillon0b735a22020-03-05 09:46:42 +0100923
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400924 size_t sz = MALI_UNIFORM_BUFFER_LENGTH * ubo_count;
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -0400925 struct panfrost_transfer ubos =
926 panfrost_pool_alloc_aligned(&batch->pool, sz,
927 MALI_UNIFORM_BUFFER_LENGTH);
928
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400929 uint64_t *ubo_ptr = (uint64_t *) ubos.cpu;
Boris Brezillon0b735a22020-03-05 09:46:42 +0100930
931 /* Upload uniforms as a UBO */
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400932
Alyssa Rosenzweige4514212020-08-19 09:48:40 -0400933 if (size) {
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400934 pan_pack(ubo_ptr, UNIFORM_BUFFER, cfg) {
Alyssa Rosenzweige4514212020-08-19 09:48:40 -0400935 cfg.entries = DIV_ROUND_UP(size, 16);
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400936 cfg.pointer = transfer.gpu;
937 }
938 } else {
939 *ubo_ptr = 0;
940 }
Boris Brezillon0b735a22020-03-05 09:46:42 +0100941
942 /* The rest are honest-to-goodness UBOs */
943
944 for (unsigned ubo = 1; ubo < ubo_count; ++ubo) {
945 size_t usz = buf->cb[ubo].buffer_size;
946 bool enabled = buf->enabled_mask & (1 << ubo);
947 bool empty = usz == 0;
948
949 if (!enabled || empty) {
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400950 ubo_ptr[ubo] = 0;
Boris Brezillon0b735a22020-03-05 09:46:42 +0100951 continue;
952 }
953
Icecream952aa48bb2020-09-20 15:30:45 +1200954 /* Issue (57) for the ARB_uniform_buffer_object spec says that
955 * the buffer can be larger than the uniform data inside it,
956 * so clamp ubo size to what hardware supports. */
957
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400958 pan_pack(ubo_ptr + ubo, UNIFORM_BUFFER, cfg) {
Icecream952aa48bb2020-09-20 15:30:45 +1200959 cfg.entries = MIN2(DIV_ROUND_UP(usz, 16), 1 << 12);
Alyssa Rosenzweigfa949672020-08-05 21:39:25 -0400960 cfg.pointer = panfrost_map_constant_buffer_gpu(batch,
961 stage, buf, ubo);
962 }
Boris Brezillon0b735a22020-03-05 09:46:42 +0100963 }
964
Alyssa Rosenzweig8b5f9fc2020-08-25 12:03:17 -0400965 *push_constants = transfer.gpu;
Boris Brezillon0b735a22020-03-05 09:46:42 +0100966
967 buf->dirty_mask = 0;
Alyssa Rosenzweig8b5f9fc2020-08-25 12:03:17 -0400968 return ubos.gpu;
Boris Brezillon0b735a22020-03-05 09:46:42 +0100969}
Boris Brezillon36725be2020-03-05 09:57:44 +0100970
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -0400971mali_ptr
Boris Brezillon36725be2020-03-05 09:57:44 +0100972panfrost_emit_shared_memory(struct panfrost_batch *batch,
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -0400973 const struct pipe_grid_info *info)
Boris Brezillon36725be2020-03-05 09:57:44 +0100974{
975 struct panfrost_context *ctx = batch->ctx;
Alyssa Rosenzweig415eb432020-08-13 18:11:12 -0400976 struct panfrost_device *dev = pan_device(ctx->base.screen);
Boris Brezillon36725be2020-03-05 09:57:44 +0100977 struct panfrost_shader_variants *all = ctx->shader[PIPE_SHADER_COMPUTE];
978 struct panfrost_shader_state *ss = &all->variants[all->active_variant];
979 unsigned single_size = util_next_power_of_two(MAX2(ss->shared_size,
980 128));
Alyssa Rosenzweig415eb432020-08-13 18:11:12 -0400981
982 unsigned log2_instances =
983 util_logbase2_ceil(info->grid[0]) +
984 util_logbase2_ceil(info->grid[1]) +
985 util_logbase2_ceil(info->grid[2]);
986
987 unsigned shared_size = single_size * (1 << log2_instances) * dev->core_count;
Boris Brezillon36725be2020-03-05 09:57:44 +0100988 struct panfrost_bo *bo = panfrost_batch_get_shared_memory(batch,
989 shared_size,
990 1);
Boris Brezillon3a06fc32020-09-03 09:18:09 +0200991 struct panfrost_transfer t =
992 panfrost_pool_alloc_aligned(&batch->pool,
993 MALI_LOCAL_STORAGE_LENGTH,
994 64);
Boris Brezillon36725be2020-03-05 09:57:44 +0100995
Boris Brezillon3a06fc32020-09-03 09:18:09 +0200996 pan_pack(t.cpu, LOCAL_STORAGE, ls) {
997 ls.wls_base_pointer = bo->gpu;
998 ls.wls_instances = log2_instances;
999 ls.wls_size_scale = util_logbase2(single_size) + 1;
Boris Brezillon36725be2020-03-05 09:57:44 +01001000 };
1001
Boris Brezillon3a06fc32020-09-03 09:18:09 +02001002 return t.gpu;
Boris Brezillon36725be2020-03-05 09:57:44 +01001003}
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001004
1005static mali_ptr
1006panfrost_get_tex_desc(struct panfrost_batch *batch,
1007 enum pipe_shader_type st,
1008 struct panfrost_sampler_view *view)
1009{
1010 if (!view)
1011 return (mali_ptr) 0;
1012
1013 struct pipe_sampler_view *pview = &view->base;
1014 struct panfrost_resource *rsrc = pan_resource(pview->texture);
1015
1016 /* Add the BO to the job so it's retained until the job is done. */
1017
1018 panfrost_batch_add_bo(batch, rsrc->bo,
1019 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1020 panfrost_bo_access_for_stage(st));
1021
Alyssa Rosenzweig32b171d2020-06-15 09:20:39 -04001022 panfrost_batch_add_bo(batch, view->bo,
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001023 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1024 panfrost_bo_access_for_stage(st));
1025
Alyssa Rosenzweig32b171d2020-06-15 09:20:39 -04001026 return view->bo->gpu;
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001027}
1028
Icecream95fafc3052020-06-12 20:14:02 +12001029static void
1030panfrost_update_sampler_view(struct panfrost_sampler_view *view,
1031 struct pipe_context *pctx)
1032{
1033 struct panfrost_resource *rsrc = pan_resource(view->base.texture);
Icecream9565b3b082020-06-20 19:09:03 +12001034 if (view->texture_bo != rsrc->bo->gpu ||
Alyssa Rosenzweig965537df2020-07-22 10:23:50 -04001035 view->modifier != rsrc->modifier) {
Alyssa Rosenzweig32b171d2020-06-15 09:20:39 -04001036 panfrost_bo_unreference(view->bo);
Icecream95fafc3052020-06-12 20:14:02 +12001037 panfrost_create_sampler_view_bo(view, pctx, &rsrc->base);
1038 }
1039}
1040
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001041mali_ptr
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001042panfrost_emit_texture_descriptors(struct panfrost_batch *batch,
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001043 enum pipe_shader_type stage)
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001044{
1045 struct panfrost_context *ctx = batch->ctx;
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001046 struct panfrost_device *device = pan_device(ctx->base.screen);
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001047
1048 if (!ctx->sampler_view_count[stage])
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001049 return 0;
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001050
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001051 if (device->quirks & IS_BIFROST) {
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001052 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
Alyssa Rosenzweigad0b32c2020-08-06 18:12:28 -04001053 MALI_BIFROST_TEXTURE_LENGTH *
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001054 ctx->sampler_view_count[stage],
1055 MALI_BIFROST_TEXTURE_LENGTH);
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001056
Alyssa Rosenzweigad0b32c2020-08-06 18:12:28 -04001057 struct mali_bifrost_texture_packed *out =
1058 (struct mali_bifrost_texture_packed *) T.cpu;
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001059
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001060 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1061 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1062 struct pipe_sampler_view *pview = &view->base;
1063 struct panfrost_resource *rsrc = pan_resource(pview->texture);
Alyssa Rosenzweigad0b32c2020-08-06 18:12:28 -04001064
Alyssa Rosenzweig65e0e892020-06-15 09:23:27 -04001065 panfrost_update_sampler_view(view, &ctx->base);
Alyssa Rosenzweigad0b32c2020-08-06 18:12:28 -04001066 out[i] = view->bifrost_descriptor;
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001067
Tomeu Vizoso3a81abf2020-05-01 11:37:56 +02001068 /* Add the BOs to the job so they are retained until the job is done. */
1069
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001070 panfrost_batch_add_bo(batch, rsrc->bo,
1071 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1072 panfrost_bo_access_for_stage(stage));
1073
Alyssa Rosenzweig32b171d2020-06-15 09:20:39 -04001074 panfrost_batch_add_bo(batch, view->bo,
Tomeu Vizoso3a81abf2020-05-01 11:37:56 +02001075 PAN_BO_ACCESS_SHARED | PAN_BO_ACCESS_READ |
1076 panfrost_bo_access_for_stage(stage));
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001077 }
1078
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001079 return T.gpu;
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001080 } else {
1081 uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS];
1082
Icecream95fafc3052020-06-12 20:14:02 +12001083 for (int i = 0; i < ctx->sampler_view_count[stage]; ++i) {
1084 struct panfrost_sampler_view *view = ctx->sampler_views[stage][i];
1085
1086 panfrost_update_sampler_view(view, &ctx->base);
1087
1088 trampolines[i] = panfrost_get_tex_desc(batch, stage, view);
1089 }
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001090
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001091 return panfrost_pool_upload_aligned(&batch->pool, trampolines,
1092 sizeof(uint64_t) *
1093 ctx->sampler_view_count[stage],
1094 sizeof(uint64_t));
Tomeu Vizosoe41894b2020-04-17 14:23:49 +02001095 }
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001096}
1097
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001098mali_ptr
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001099panfrost_emit_sampler_descriptors(struct panfrost_batch *batch,
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001100 enum pipe_shader_type stage)
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001101{
1102 struct panfrost_context *ctx = batch->ctx;
1103
1104 if (!ctx->sampler_count[stage])
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001105 return 0;
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001106
Alyssa Rosenzweigb10c3c82020-08-11 18:25:03 -04001107 size_t desc_size = MALI_BIFROST_SAMPLER_LENGTH;
1108 assert(MALI_BIFROST_SAMPLER_LENGTH == MALI_MIDGARD_SAMPLER_LENGTH);
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001109
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -04001110 size_t sz = desc_size * ctx->sampler_count[stage];
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001111 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool, sz, desc_size);
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -04001112 struct mali_midgard_sampler_packed *out = (struct mali_midgard_sampler_packed *) T.cpu;
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001113
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -04001114 for (unsigned i = 0; i < ctx->sampler_count[stage]; ++i)
1115 out[i] = ctx->samplers[stage][i]->hw;
Tomeu Vizosod3eb23a2020-04-17 14:23:39 +02001116
Alyssa Rosenzweigb7169362020-08-24 13:54:20 -04001117 return T.gpu;
Boris Brezillon8e0a08b2020-03-05 18:43:13 +01001118}
Boris Brezillon528384c2020-03-05 18:53:08 +01001119
Alyssa Rosenzweig15133922020-08-25 12:48:12 -04001120mali_ptr
Boris Brezillonb95530b2020-03-06 09:09:03 +01001121panfrost_emit_vertex_data(struct panfrost_batch *batch,
Alyssa Rosenzweig15133922020-08-25 12:48:12 -04001122 mali_ptr *buffers)
Boris Brezillonb95530b2020-03-06 09:09:03 +01001123{
1124 struct panfrost_context *ctx = batch->ctx;
1125 struct panfrost_vertex_state *so = ctx->vertex;
Alyssa Rosenzweig09ea7c02020-08-17 14:42:41 -04001126 struct panfrost_shader_state *vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
Boris Brezillonb95530b2020-03-06 09:09:03 +01001127
Alyssa Rosenzweig9a6934d2020-08-17 14:46:56 -04001128 /* Worst case: everything is NPOT, which is only possible if instancing
1129 * is enabled. Otherwise single record is gauranteed */
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001130 struct panfrost_transfer S = panfrost_pool_alloc_aligned(&batch->pool,
Alyssa Rosenzweig94f4ecb2020-08-17 14:49:52 -04001131 MALI_ATTRIBUTE_BUFFER_LENGTH * vs->attribute_count *
Alyssa Rosenzweig15133922020-08-25 12:48:12 -04001132 (ctx->instance_count > 1 ? 2 : 1),
Alyssa Rosenzweig1cfbc5c2020-08-20 13:36:46 -04001133 MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001134
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001135 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
Alyssa Rosenzweig09ea7c02020-08-17 14:42:41 -04001136 MALI_ATTRIBUTE_LENGTH * vs->attribute_count,
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001137 MALI_ATTRIBUTE_LENGTH);
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001138
1139 struct mali_attribute_buffer_packed *bufs =
1140 (struct mali_attribute_buffer_packed *) S.cpu;
1141
1142 struct mali_attribute_packed *out =
1143 (struct mali_attribute_packed *) T.cpu;
1144
Alyssa Rosenzweig8236fa32020-08-14 12:09:05 -04001145 unsigned attrib_to_buffer[PIPE_MAX_ATTRIBS] = { 0 };
Boris Brezillonb95530b2020-03-06 09:09:03 +01001146 unsigned k = 0;
1147
1148 for (unsigned i = 0; i < so->num_elements; ++i) {
Alyssa Rosenzweigd5a264f2020-08-14 15:24:35 -04001149 /* We map buffers 1:1 with the attributes, which
Boris Brezillonb95530b2020-03-06 09:09:03 +01001150 * means duplicating some vertex buffers (who cares? aside from
1151 * maybe some caching implications but I somehow doubt that
1152 * matters) */
1153
1154 struct pipe_vertex_element *elem = &so->pipe[i];
1155 unsigned vbi = elem->vertex_buffer_index;
Alyssa Rosenzweig8236fa32020-08-14 12:09:05 -04001156 attrib_to_buffer[i] = k;
Boris Brezillonb95530b2020-03-06 09:09:03 +01001157
1158 if (!(ctx->vb_mask & (1 << vbi)))
1159 continue;
1160
1161 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1162 struct panfrost_resource *rsrc;
1163
1164 rsrc = pan_resource(buf->buffer.resource);
1165 if (!rsrc)
1166 continue;
1167
Boris Brezillonb95530b2020-03-06 09:09:03 +01001168 /* Add a dependency of the batch on the vertex buffer */
1169 panfrost_batch_add_bo(batch, rsrc->bo,
1170 PAN_BO_ACCESS_SHARED |
1171 PAN_BO_ACCESS_READ |
1172 PAN_BO_ACCESS_VERTEX_TILER);
1173
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001174 /* Mask off lower bits, see offset fixup below */
1175 mali_ptr raw_addr = rsrc->bo->gpu + buf->buffer_offset;
1176 mali_ptr addr = raw_addr & ~63;
Boris Brezillonb95530b2020-03-06 09:09:03 +01001177
1178 /* Since we advanced the base pointer, we shrink the buffer
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001179 * size, but add the offset we subtracted */
1180 unsigned size = rsrc->base.width0 + (raw_addr - addr)
1181 - buf->buffer_offset;
Boris Brezillonb95530b2020-03-06 09:09:03 +01001182
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001183 /* When there is a divisor, the hardware-level divisor is
1184 * the product of the instance divisor and the padded count */
Boris Brezillonb95530b2020-03-06 09:09:03 +01001185 unsigned divisor = elem->instance_divisor;
Alyssa Rosenzweigc9bb5dc2020-08-14 12:29:57 -04001186 unsigned hw_divisor = ctx->padded_count * divisor;
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001187 unsigned stride = buf->stride;
Alyssa Rosenzweigc9bb5dc2020-08-14 12:29:57 -04001188
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001189 /* If there's a divisor(=1) but no instancing, we want every
1190 * attribute to be the same */
Boris Brezillonb95530b2020-03-06 09:09:03 +01001191
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001192 if (divisor && ctx->instance_count == 1)
1193 stride = 0;
1194
1195 if (!divisor || ctx->instance_count <= 1) {
1196 pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
Alyssa Rosenzweig15133922020-08-25 12:48:12 -04001197 if (ctx->instance_count > 1) {
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001198 cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS;
Alyssa Rosenzweig15133922020-08-25 12:48:12 -04001199 cfg.divisor = ctx->padded_count;
1200 }
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001201
1202 cfg.pointer = addr;
1203 cfg.stride = stride;
1204 cfg.size = size;
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001205 }
Alyssa Rosenzweigc9bb5dc2020-08-14 12:29:57 -04001206 } else if (util_is_power_of_two_or_zero(hw_divisor)) {
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001207 pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
1208 cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR;
1209 cfg.pointer = addr;
1210 cfg.stride = stride;
1211 cfg.size = size;
1212 cfg.divisor_r = __builtin_ctz(hw_divisor);
1213 }
Alyssa Rosenzweigc9bb5dc2020-08-14 12:29:57 -04001214
Boris Brezillonb95530b2020-03-06 09:09:03 +01001215 } else {
Alyssa Rosenzweigc9bb5dc2020-08-14 12:29:57 -04001216 unsigned shift = 0, extra_flags = 0;
1217
1218 unsigned magic_divisor =
1219 panfrost_compute_magic_divisor(hw_divisor, &shift, &extra_flags);
1220
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001221 pan_pack(bufs + k, ATTRIBUTE_BUFFER, cfg) {
1222 cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR;
1223 cfg.pointer = addr;
1224 cfg.stride = stride;
1225 cfg.size = size;
Alyssa Rosenzweigc9bb5dc2020-08-14 12:29:57 -04001226
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001227 cfg.divisor_r = shift;
1228 cfg.divisor_e = extra_flags;
1229 }
Alyssa Rosenzweigc9bb5dc2020-08-14 12:29:57 -04001230
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001231 pan_pack(bufs + k + 1, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) {
1232 cfg.divisor_numerator = magic_divisor;
1233 cfg.divisor = divisor;
1234 }
1235
1236 ++k;
Boris Brezillonb95530b2020-03-06 09:09:03 +01001237 }
Alyssa Rosenzweige646c862020-08-14 12:51:36 -04001238
1239 ++k;
Boris Brezillonb95530b2020-03-06 09:09:03 +01001240 }
1241
1242 /* Add special gl_VertexID/gl_InstanceID buffers */
1243
Alyssa Rosenzweig09ea7c02020-08-17 14:42:41 -04001244 if (unlikely(vs->attribute_count >= PAN_VERTEX_ID)) {
1245 panfrost_vertex_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
Alyssa Rosenzweig27f8b872020-08-14 12:19:10 -04001246
Alyssa Rosenzweig09ea7c02020-08-17 14:42:41 -04001247 pan_pack(out + PAN_VERTEX_ID, ATTRIBUTE, cfg) {
1248 cfg.buffer_index = k++;
1249 cfg.format = so->formats[PAN_VERTEX_ID];
1250 }
Alyssa Rosenzweig6caf7892020-08-14 12:14:20 -04001251
Alyssa Rosenzweig09ea7c02020-08-17 14:42:41 -04001252 panfrost_instance_id(ctx->padded_count, &bufs[k], ctx->instance_count > 1);
Alyssa Rosenzweig27f8b872020-08-14 12:19:10 -04001253
Alyssa Rosenzweig09ea7c02020-08-17 14:42:41 -04001254 pan_pack(out + PAN_INSTANCE_ID, ATTRIBUTE, cfg) {
1255 cfg.buffer_index = k++;
1256 cfg.format = so->formats[PAN_INSTANCE_ID];
1257 }
Alyssa Rosenzweig27f8b872020-08-14 12:19:10 -04001258 }
Boris Brezillonb95530b2020-03-06 09:09:03 +01001259
Alyssa Rosenzweig9cc81ea2020-08-14 12:01:55 -04001260 /* Attribute addresses require 64-byte alignment, so let:
Alyssa Rosenzweig76de3e62020-08-13 14:32:23 -04001261 *
1262 * base' = base & ~63 = base - (base & 63)
Alyssa Rosenzweig9cc81ea2020-08-14 12:01:55 -04001263 * offset' = offset + (base & 63)
Alyssa Rosenzweig76de3e62020-08-13 14:32:23 -04001264 *
Alyssa Rosenzweig9cc81ea2020-08-14 12:01:55 -04001265 * Since base' + offset' = base + offset, these are equivalent
1266 * addressing modes and now base is 64 aligned.
Alyssa Rosenzweig76de3e62020-08-13 14:32:23 -04001267 */
1268
Alyssa Rosenzweig76de3e62020-08-13 14:32:23 -04001269 for (unsigned i = 0; i < so->num_elements; ++i) {
1270 unsigned vbi = so->pipe[i].vertex_buffer_index;
1271 struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[vbi];
1272
1273 /* Adjust by the masked off bits of the offset. Make sure we
1274 * read src_offset from so->hw (which is not GPU visible)
1275 * rather than target (which is) due to caching effects */
1276
1277 unsigned src_offset = so->pipe[i].src_offset;
1278
1279 /* BOs aligned to 4k so guaranteed aligned to 64 */
1280 src_offset += (buf->buffer_offset & 63);
1281
1282 /* Also, somewhat obscurely per-instance data needs to be
1283 * offset in response to a delayed start in an indexed draw */
1284
Alyssa Rosenzweig15133922020-08-25 12:48:12 -04001285 if (so->pipe[i].instance_divisor && ctx->instance_count > 1)
1286 src_offset -= buf->stride * ctx->offset_start;
Alyssa Rosenzweig76de3e62020-08-13 14:32:23 -04001287
Alyssa Rosenzweig27f8b872020-08-14 12:19:10 -04001288 pan_pack(out + i, ATTRIBUTE, cfg) {
1289 cfg.buffer_index = attrib_to_buffer[i];
1290 cfg.format = so->formats[i];
1291 cfg.offset = src_offset;
1292 }
Alyssa Rosenzweig76de3e62020-08-13 14:32:23 -04001293 }
1294
Alyssa Rosenzweig15133922020-08-25 12:48:12 -04001295 *buffers = S.gpu;
1296 return T.gpu;
Boris Brezillonb95530b2020-03-06 09:09:03 +01001297}
1298
Boris Brezillon836686d2020-03-06 09:45:31 +01001299static mali_ptr
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001300panfrost_emit_varyings(struct panfrost_batch *batch,
1301 struct mali_attribute_buffer_packed *slot,
1302 unsigned stride, unsigned count)
Boris Brezillon836686d2020-03-06 09:45:31 +01001303{
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001304 unsigned size = stride * count;
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001305 mali_ptr ptr = panfrost_pool_alloc_aligned(&batch->invisible_pool, size, 64).gpu;
Boris Brezillon836686d2020-03-06 09:45:31 +01001306
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001307 pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
1308 cfg.stride = stride;
1309 cfg.size = size;
1310 cfg.pointer = ptr;
1311 }
Boris Brezillon836686d2020-03-06 09:45:31 +01001312
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001313 return ptr;
Boris Brezillon836686d2020-03-06 09:45:31 +01001314}
1315
Alyssa Rosenzweige26ac2e2020-06-10 19:28:28 -04001316static unsigned
Ilia Mirkin4c050f22020-08-09 00:13:14 -04001317panfrost_streamout_offset(unsigned stride,
Alyssa Rosenzweige26ac2e2020-06-10 19:28:28 -04001318 struct pipe_stream_output_target *target)
1319{
Ilia Mirkin4c050f22020-08-09 00:13:14 -04001320 return (target->buffer_offset + (pan_so_target(target)->offset * stride * 4)) & 63;
Alyssa Rosenzweige26ac2e2020-06-10 19:28:28 -04001321}
1322
Boris Brezillon836686d2020-03-06 09:45:31 +01001323static void
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001324panfrost_emit_streamout(struct panfrost_batch *batch,
1325 struct mali_attribute_buffer_packed *slot,
Ilia Mirkin4c050f22020-08-09 00:13:14 -04001326 unsigned stride_words, unsigned count,
Boris Brezillon836686d2020-03-06 09:45:31 +01001327 struct pipe_stream_output_target *target)
1328{
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001329 unsigned stride = stride_words * 4;
Boris Brezillon836686d2020-03-06 09:45:31 +01001330 unsigned max_size = target->buffer_size;
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001331 unsigned expected_size = stride * count;
Boris Brezillon836686d2020-03-06 09:45:31 +01001332
Boris Brezillon836686d2020-03-06 09:45:31 +01001333 /* Grab the BO and bind it to the batch */
1334 struct panfrost_bo *bo = pan_resource(target->buffer)->bo;
1335
1336 /* Varyings are WRITE from the perspective of the VERTEX but READ from
1337 * the perspective of the TILER and FRAGMENT.
1338 */
1339 panfrost_batch_add_bo(batch, bo,
1340 PAN_BO_ACCESS_SHARED |
1341 PAN_BO_ACCESS_RW |
1342 PAN_BO_ACCESS_VERTEX_TILER |
1343 PAN_BO_ACCESS_FRAGMENT);
1344
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001345 /* We will have an offset applied to get alignment */
Ilia Mirkin4c050f22020-08-09 00:13:14 -04001346 mali_ptr addr = bo->gpu + target->buffer_offset + (pan_so_target(target)->offset * stride);
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001347
1348 pan_pack(slot, ATTRIBUTE_BUFFER, cfg) {
1349 cfg.pointer = (addr & ~63);
1350 cfg.stride = stride;
1351 cfg.size = MIN2(max_size, expected_size) + (addr & 63);
1352 }
Boris Brezillon836686d2020-03-06 09:45:31 +01001353}
1354
Boris Brezillon836686d2020-03-06 09:45:31 +01001355/* Helpers for manipulating stream out information so we can pack varyings
1356 * accordingly. Compute the src_offset for a given captured varying */
1357
1358static struct pipe_stream_output *
1359pan_get_so(struct pipe_stream_output_info *info, gl_varying_slot loc)
1360{
1361 for (unsigned i = 0; i < info->num_outputs; ++i) {
1362 if (info->output[i].register_index == loc)
1363 return &info->output[i];
1364 }
1365
1366 unreachable("Varying not captured");
1367}
1368
Alyssa Rosenzweig24c3b952020-06-10 15:35:41 -04001369static unsigned
1370pan_varying_size(enum mali_format fmt)
1371{
1372 unsigned type = MALI_EXTRACT_TYPE(fmt);
1373 unsigned chan = MALI_EXTRACT_CHANNELS(fmt);
1374 unsigned bits = MALI_EXTRACT_BITS(fmt);
1375 unsigned bpc = 0;
1376
1377 if (bits == MALI_CHANNEL_FLOAT) {
1378 /* No doubles */
1379 bool fp16 = (type == MALI_FORMAT_SINT);
1380 assert(fp16 || (type == MALI_FORMAT_UNORM));
1381
1382 bpc = fp16 ? 2 : 4;
1383 } else {
1384 assert(type >= MALI_FORMAT_SNORM && type <= MALI_FORMAT_SINT);
1385
1386 /* See the enums */
1387 bits = 1 << bits;
1388 assert(bits >= 8);
1389 bpc = bits / 8;
1390 }
1391
1392 return bpc * chan;
1393}
1394
Alyssa Rosenzweig258b80b2020-06-08 12:56:33 -04001395/* Indices for named (non-XFB) varyings that are present. These are packed
1396 * tightly so they correspond to a bitfield present (P) indexed by (1 <<
1397 * PAN_VARY_*). This has the nice property that you can lookup the buffer index
1398 * of a given special field given a shift S by:
1399 *
1400 * idx = popcount(P & ((1 << S) - 1))
1401 *
1402 * That is... look at all of the varyings that come earlier and count them, the
1403 * count is the new index since plus one. Likewise, the total number of special
1404 * buffers required is simply popcount(P)
1405 */
1406
1407enum pan_special_varying {
1408 PAN_VARY_GENERAL = 0,
1409 PAN_VARY_POSITION = 1,
1410 PAN_VARY_PSIZ = 2,
1411 PAN_VARY_PNTCOORD = 3,
1412 PAN_VARY_FACE = 4,
1413 PAN_VARY_FRAGCOORD = 5,
1414
1415 /* Keep last */
1416 PAN_VARY_MAX,
1417};
1418
1419/* Given a varying, figure out which index it correpsonds to */
1420
1421static inline unsigned
1422pan_varying_index(unsigned present, enum pan_special_varying v)
1423{
1424 unsigned mask = (1 << v) - 1;
1425 return util_bitcount(present & mask);
1426}
1427
1428/* Get the base offset for XFB buffers, which by convention come after
1429 * everything else. Wrapper function for semantic reasons; by construction this
1430 * is just popcount. */
1431
1432static inline unsigned
1433pan_xfb_base(unsigned present)
1434{
1435 return util_bitcount(present);
1436}
1437
Alyssa Rosenzweig3d04ebf2020-06-08 13:32:38 -04001438/* Computes the present mask for varyings so we can start emitting varying records */
1439
1440static inline unsigned
1441pan_varying_present(
1442 struct panfrost_shader_state *vs,
1443 struct panfrost_shader_state *fs,
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -04001444 unsigned quirks,
1445 uint16_t point_coord_mask)
Alyssa Rosenzweig3d04ebf2020-06-08 13:32:38 -04001446{
1447 /* At the moment we always emit general and position buffers. Not
1448 * strictly necessary but usually harmless */
1449
1450 unsigned present = (1 << PAN_VARY_GENERAL) | (1 << PAN_VARY_POSITION);
1451
1452 /* Enable special buffers by the shader info */
1453
1454 if (vs->writes_point_size)
1455 present |= (1 << PAN_VARY_PSIZ);
1456
1457 if (fs->reads_point_coord)
1458 present |= (1 << PAN_VARY_PNTCOORD);
1459
1460 if (fs->reads_face)
1461 present |= (1 << PAN_VARY_FACE);
1462
1463 if (fs->reads_frag_coord && !(quirks & IS_BIFROST))
1464 present |= (1 << PAN_VARY_FRAGCOORD);
1465
1466 /* Also, if we have a point sprite, we need a point coord buffer */
1467
1468 for (unsigned i = 0; i < fs->varying_count; i++) {
1469 gl_varying_slot loc = fs->varyings_loc[i];
1470
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -04001471 if (util_varying_is_point_coord(loc, point_coord_mask))
Alyssa Rosenzweig3d04ebf2020-06-08 13:32:38 -04001472 present |= (1 << PAN_VARY_PNTCOORD);
1473 }
1474
1475 return present;
1476}
1477
Alyssa Rosenzweig0c0217d2020-06-08 13:45:17 -04001478/* Emitters for varying records */
1479
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001480static void
1481pan_emit_vary(struct mali_attribute_packed *out,
1482 unsigned present, enum pan_special_varying buf,
Alyssa Rosenzweig0c0217d2020-06-08 13:45:17 -04001483 unsigned quirks, enum mali_format format,
1484 unsigned offset)
1485{
1486 unsigned nr_channels = MALI_EXTRACT_CHANNELS(format);
Alyssa Rosenzweig668ec242020-08-11 22:26:03 -04001487 unsigned swizzle = quirks & HAS_SWIZZLES ?
1488 panfrost_get_default_swizzle(nr_channels) :
1489 panfrost_bifrost_swizzle(nr_channels);
Alyssa Rosenzweig0c0217d2020-06-08 13:45:17 -04001490
Alyssa Rosenzweig59fa2692020-08-14 15:23:10 -04001491 pan_pack(out, ATTRIBUTE, cfg) {
1492 cfg.buffer_index = pan_varying_index(present, buf);
1493 cfg.unknown = quirks & IS_BIFROST ? 0x0 : 0x1;
1494 cfg.format = (format << 12) | swizzle;
1495 cfg.offset = offset;
1496 }
Alyssa Rosenzweig0c0217d2020-06-08 13:45:17 -04001497}
1498
1499/* General varying that is unused */
1500
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001501static void
1502pan_emit_vary_only(struct mali_attribute_packed *out,
1503 unsigned present, unsigned quirks)
Alyssa Rosenzweig0c0217d2020-06-08 13:45:17 -04001504{
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001505 pan_emit_vary(out, present, 0, quirks, MALI_VARYING_DISCARD, 0);
Alyssa Rosenzweig0c0217d2020-06-08 13:45:17 -04001506}
1507
Alyssa Rosenzweigdf242092020-06-08 13:52:38 -04001508/* Special records */
1509
1510static const enum mali_format pan_varying_formats[PAN_VARY_MAX] = {
1511 [PAN_VARY_POSITION] = MALI_VARYING_POS,
1512 [PAN_VARY_PSIZ] = MALI_R16F,
1513 [PAN_VARY_PNTCOORD] = MALI_R16F,
1514 [PAN_VARY_FACE] = MALI_R32I,
1515 [PAN_VARY_FRAGCOORD] = MALI_RGBA32F
1516};
1517
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001518static void
1519pan_emit_vary_special(struct mali_attribute_packed *out,
1520 unsigned present, enum pan_special_varying buf,
Alyssa Rosenzweigdf242092020-06-08 13:52:38 -04001521 unsigned quirks)
1522{
1523 assert(buf < PAN_VARY_MAX);
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001524 pan_emit_vary(out, present, buf, quirks, pan_varying_formats[buf], 0);
Alyssa Rosenzweigdf242092020-06-08 13:52:38 -04001525}
1526
Alyssa Rosenzweigc31af6f2020-06-08 14:08:45 -04001527static enum mali_format
1528pan_xfb_format(enum mali_format format, unsigned nr)
1529{
1530 if (MALI_EXTRACT_BITS(format) == MALI_CHANNEL_FLOAT)
1531 return MALI_R32F | MALI_NR_CHANNELS(nr);
1532 else
1533 return MALI_EXTRACT_TYPE(format) | MALI_NR_CHANNELS(nr) | MALI_CHANNEL_32;
1534}
1535
1536/* Transform feedback records. Note struct pipe_stream_output is (if packed as
1537 * a bitfield) 32-bit, smaller than a 64-bit pointer, so may as well pass by
1538 * value. */
1539
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001540static void
1541pan_emit_vary_xfb(struct mali_attribute_packed *out,
1542 unsigned present,
Alyssa Rosenzweigc31af6f2020-06-08 14:08:45 -04001543 unsigned max_xfb,
1544 unsigned *streamout_offsets,
1545 unsigned quirks,
1546 enum mali_format format,
1547 struct pipe_stream_output o)
1548{
Alyssa Rosenzweig668ec242020-08-11 22:26:03 -04001549 unsigned swizzle = quirks & HAS_SWIZZLES ?
1550 panfrost_get_default_swizzle(o.num_components) :
1551 panfrost_bifrost_swizzle(o.num_components);
1552
Alyssa Rosenzweig6c850632020-08-14 15:21:20 -04001553 pan_pack(out, ATTRIBUTE, cfg) {
Alyssa Rosenzweigc31af6f2020-06-08 14:08:45 -04001554 /* XFB buffers come after everything else */
Alyssa Rosenzweig6c850632020-08-14 15:21:20 -04001555 cfg.buffer_index = pan_xfb_base(present) + o.output_buffer;
1556 cfg.unknown = quirks & IS_BIFROST ? 0x0 : 0x1;
Alyssa Rosenzweigc31af6f2020-06-08 14:08:45 -04001557
Alyssa Rosenzweigc31af6f2020-06-08 14:08:45 -04001558 /* Override number of channels and precision to highp */
Alyssa Rosenzweig6c850632020-08-14 15:21:20 -04001559 cfg.format = (pan_xfb_format(format, o.num_components) << 12) | swizzle;
Alyssa Rosenzweigc31af6f2020-06-08 14:08:45 -04001560
1561 /* Apply given offsets together */
Alyssa Rosenzweig6c850632020-08-14 15:21:20 -04001562 cfg.offset = (o.dst_offset * 4) /* dwords */
1563 + streamout_offsets[o.output_buffer];
1564 }
Alyssa Rosenzweigc31af6f2020-06-08 14:08:45 -04001565}
1566
Alyssa Rosenzweige9e9b2b2020-06-10 15:13:12 -04001567/* Determine if we should capture a varying for XFB. This requires actually
1568 * having a buffer for it. If we don't capture it, we'll fallback to a general
1569 * varying path (linked or unlinked, possibly discarding the write) */
1570
1571static bool
1572panfrost_xfb_captured(struct panfrost_shader_state *xfb,
1573 unsigned loc, unsigned max_xfb)
1574{
1575 if (!(xfb->so_mask & (1ll << loc)))
1576 return false;
1577
1578 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1579 return o->output_buffer < max_xfb;
1580}
1581
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001582static void
Alyssa Rosenzweig40b4ee92020-08-14 15:19:25 -04001583pan_emit_general_varying(struct mali_attribute_packed *out,
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001584 struct panfrost_shader_state *other,
1585 struct panfrost_shader_state *xfb,
Alyssa Rosenzweig40b4ee92020-08-14 15:19:25 -04001586 gl_varying_slot loc,
1587 enum mali_format format,
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001588 unsigned present,
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001589 unsigned quirks,
1590 unsigned *gen_offsets,
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001591 enum mali_format *gen_formats,
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001592 unsigned *gen_stride,
1593 unsigned idx,
Alyssa Rosenzweig40b4ee92020-08-14 15:19:25 -04001594 bool should_alloc)
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001595{
Alyssa Rosenzweig40b4ee92020-08-14 15:19:25 -04001596 /* Check if we're linked */
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001597 signed other_idx = -1;
1598
1599 for (unsigned j = 0; j < other->varying_count; ++j) {
1600 if (other->varyings_loc[j] == loc) {
1601 other_idx = j;
1602 break;
1603 }
1604 }
1605
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001606 if (other_idx < 0) {
1607 pan_emit_vary_only(out, present, quirks);
1608 return;
1609 }
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001610
1611 unsigned offset = gen_offsets[other_idx];
1612
1613 if (should_alloc) {
1614 /* We're linked, so allocate a space via a watermark allocation */
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001615 enum mali_format alt = other->varyings[other_idx];
1616
1617 /* Do interpolation at minimum precision */
1618 unsigned size_main = pan_varying_size(format);
1619 unsigned size_alt = pan_varying_size(alt);
1620 unsigned size = MIN2(size_main, size_alt);
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001621
1622 /* If a varying is marked for XFB but not actually captured, we
1623 * should match the format to the format that would otherwise
1624 * be used for XFB, since dEQP checks for invariance here. It's
1625 * unclear if this is required by the spec. */
1626
1627 if (xfb->so_mask & (1ull << loc)) {
1628 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1629 format = pan_xfb_format(format, o->num_components);
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001630 size = pan_varying_size(format);
1631 } else if (size == size_alt) {
1632 format = alt;
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001633 }
1634
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001635 gen_offsets[idx] = *gen_stride;
1636 gen_formats[other_idx] = format;
1637 offset = *gen_stride;
1638 *gen_stride += size;
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001639 }
1640
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001641 pan_emit_vary(out, present, PAN_VARY_GENERAL, quirks, format, offset);
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001642}
1643
Alyssa Rosenzweig40b4ee92020-08-14 15:19:25 -04001644/* Higher-level wrapper around all of the above, classifying a varying into one
1645 * of the above types */
1646
1647static void
1648panfrost_emit_varying(
1649 struct mali_attribute_packed *out,
1650 struct panfrost_shader_state *stage,
1651 struct panfrost_shader_state *other,
1652 struct panfrost_shader_state *xfb,
1653 unsigned present,
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -04001654 uint16_t point_sprite_mask,
Alyssa Rosenzweig40b4ee92020-08-14 15:19:25 -04001655 unsigned max_xfb,
1656 unsigned *streamout_offsets,
1657 unsigned quirks,
1658 unsigned *gen_offsets,
1659 enum mali_format *gen_formats,
1660 unsigned *gen_stride,
1661 unsigned idx,
1662 bool should_alloc,
1663 bool is_fragment)
1664{
1665 gl_varying_slot loc = stage->varyings_loc[idx];
1666 enum mali_format format = stage->varyings[idx];
1667
1668 /* Override format to match linkage */
1669 if (!should_alloc && gen_formats[idx])
1670 format = gen_formats[idx];
1671
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -04001672 if (util_varying_is_point_coord(loc, point_sprite_mask)) {
Alyssa Rosenzweig40b4ee92020-08-14 15:19:25 -04001673 pan_emit_vary_special(out, present, PAN_VARY_PNTCOORD, quirks);
1674 } else if (panfrost_xfb_captured(xfb, loc, max_xfb)) {
1675 struct pipe_stream_output *o = pan_get_so(&xfb->stream_output, loc);
1676 pan_emit_vary_xfb(out, present, max_xfb, streamout_offsets, quirks, format, *o);
1677 } else if (loc == VARYING_SLOT_POS) {
1678 if (is_fragment)
1679 pan_emit_vary_special(out, present, PAN_VARY_FRAGCOORD, quirks);
1680 else
1681 pan_emit_vary_special(out, present, PAN_VARY_POSITION, quirks);
1682 } else if (loc == VARYING_SLOT_PSIZ) {
1683 pan_emit_vary_special(out, present, PAN_VARY_PSIZ, quirks);
1684 } else if (loc == VARYING_SLOT_PNTC) {
1685 pan_emit_vary_special(out, present, PAN_VARY_PNTCOORD, quirks);
1686 } else if (loc == VARYING_SLOT_FACE) {
1687 pan_emit_vary_special(out, present, PAN_VARY_FACE, quirks);
1688 } else {
1689 pan_emit_general_varying(out, other, xfb, loc, format, present,
1690 quirks, gen_offsets, gen_formats, gen_stride,
1691 idx, should_alloc);
1692 }
1693}
1694
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001695static void
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001696pan_emit_special_input(struct mali_attribute_buffer_packed *out,
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001697 unsigned present,
1698 enum pan_special_varying v,
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001699 unsigned special)
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001700{
1701 if (present & (1 << v)) {
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001702 unsigned idx = pan_varying_index(present, v);
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001703
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001704 pan_pack(out + idx, ATTRIBUTE_BUFFER, cfg) {
1705 cfg.special = special;
1706 cfg.type = 0;
1707 }
Alyssa Rosenzweig6ab87c52020-06-08 15:29:05 -04001708 }
1709}
1710
Boris Brezillon836686d2020-03-06 09:45:31 +01001711void
1712panfrost_emit_varying_descriptor(struct panfrost_batch *batch,
1713 unsigned vertex_count,
Alyssa Rosenzweige5c77cb2020-08-25 13:37:22 -04001714 mali_ptr *vs_attribs,
1715 mali_ptr *fs_attribs,
1716 mali_ptr *buffers,
1717 mali_ptr *position,
1718 mali_ptr *psiz)
Boris Brezillon836686d2020-03-06 09:45:31 +01001719{
1720 /* Load the shaders */
1721 struct panfrost_context *ctx = batch->ctx;
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001722 struct panfrost_device *dev = pan_device(ctx->base.screen);
Boris Brezillon836686d2020-03-06 09:45:31 +01001723 struct panfrost_shader_state *vs, *fs;
Boris Brezillon836686d2020-03-06 09:45:31 +01001724 size_t vs_size, fs_size;
1725
1726 /* Allocate the varying descriptor */
1727
1728 vs = panfrost_get_shader_state(ctx, PIPE_SHADER_VERTEX);
1729 fs = panfrost_get_shader_state(ctx, PIPE_SHADER_FRAGMENT);
Alyssa Rosenzweig7ef205d2020-08-14 15:23:51 -04001730 vs_size = MALI_ATTRIBUTE_LENGTH * vs->varying_count;
1731 fs_size = MALI_ATTRIBUTE_LENGTH * fs->varying_count;
Boris Brezillon836686d2020-03-06 09:45:31 +01001732
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001733 struct panfrost_transfer trans = panfrost_pool_alloc_aligned(
1734 &batch->pool, vs_size + fs_size, MALI_ATTRIBUTE_LENGTH);
Boris Brezillon836686d2020-03-06 09:45:31 +01001735
1736 struct pipe_stream_output_info *so = &vs->stream_output;
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -04001737 uint16_t point_coord_mask = ctx->rasterizer->base.sprite_coord_enable;
1738 unsigned present = pan_varying_present(vs, fs, dev->quirks, point_coord_mask);
Boris Brezillon836686d2020-03-06 09:45:31 +01001739
1740 /* Check if this varying is linked by us. This is the case for
1741 * general-purpose, non-captured varyings. If it is, link it. If it's
1742 * not, use the provided stream out information to determine the
1743 * offset, since it was already linked for us. */
1744
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001745 unsigned gen_offsets[32];
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001746 enum mali_format gen_formats[32];
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001747 memset(gen_offsets, 0, sizeof(gen_offsets));
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001748 memset(gen_formats, 0, sizeof(gen_formats));
Boris Brezillon836686d2020-03-06 09:45:31 +01001749
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001750 unsigned gen_stride = 0;
1751 assert(vs->varying_count < ARRAY_SIZE(gen_offsets));
1752 assert(fs->varying_count < ARRAY_SIZE(gen_offsets));
Boris Brezillon836686d2020-03-06 09:45:31 +01001753
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001754 unsigned streamout_offsets[32];
Boris Brezillon836686d2020-03-06 09:45:31 +01001755
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001756 for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
1757 streamout_offsets[i] = panfrost_streamout_offset(
1758 so->stride[i],
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001759 ctx->streamout.targets[i]);
Boris Brezillon836686d2020-03-06 09:45:31 +01001760 }
1761
Alyssa Rosenzweigb805cf92020-08-14 15:12:39 -04001762 struct mali_attribute_packed *ovs = (struct mali_attribute_packed *)trans.cpu;
1763 struct mali_attribute_packed *ofs = ovs + vs->varying_count;
Boris Brezillon836686d2020-03-06 09:45:31 +01001764
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001765 for (unsigned i = 0; i < vs->varying_count; i++) {
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -04001766 panfrost_emit_varying(ovs + i, vs, fs, vs, present, 0,
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001767 ctx->streamout.num_targets, streamout_offsets,
1768 dev->quirks,
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001769 gen_offsets, gen_formats, &gen_stride, i, true, false);
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001770 }
Boris Brezillon836686d2020-03-06 09:45:31 +01001771
1772 for (unsigned i = 0; i < fs->varying_count; i++) {
Alyssa Rosenzweigb17b6cc2020-08-26 11:22:47 -04001773 panfrost_emit_varying(ofs + i, fs, vs, vs, present, point_coord_mask,
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001774 ctx->streamout.num_targets, streamout_offsets,
1775 dev->quirks,
Alyssa Rosenzweiga7f52462020-06-08 18:11:29 -04001776 gen_offsets, gen_formats, &gen_stride, i, false, true);
Boris Brezillon836686d2020-03-06 09:45:31 +01001777 }
1778
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001779 unsigned xfb_base = pan_xfb_base(present);
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -04001780 struct panfrost_transfer T = panfrost_pool_alloc_aligned(&batch->pool,
1781 MALI_ATTRIBUTE_BUFFER_LENGTH * (xfb_base + ctx->streamout.num_targets),
Alyssa Rosenzweig1cfbc5c2020-08-20 13:36:46 -04001782 MALI_ATTRIBUTE_BUFFER_LENGTH * 2);
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001783 struct mali_attribute_buffer_packed *varyings =
1784 (struct mali_attribute_buffer_packed *) T.cpu;
Boris Brezillon836686d2020-03-06 09:45:31 +01001785
1786 /* Emit the stream out buffers */
1787
1788 unsigned out_count = u_stream_outputs_for_vertices(ctx->active_prim,
1789 ctx->vertex_count);
1790
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001791 for (unsigned i = 0; i < ctx->streamout.num_targets; ++i) {
1792 panfrost_emit_streamout(batch, &varyings[xfb_base + i],
1793 so->stride[i],
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001794 out_count,
1795 ctx->streamout.targets[i]);
Boris Brezillon836686d2020-03-06 09:45:31 +01001796 }
1797
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001798 panfrost_emit_varyings(batch,
1799 &varyings[pan_varying_index(present, PAN_VARY_GENERAL)],
1800 gen_stride, vertex_count);
Boris Brezillon836686d2020-03-06 09:45:31 +01001801
1802 /* fp32 vec4 gl_Position */
Alyssa Rosenzweige5c77cb2020-08-25 13:37:22 -04001803 *position = panfrost_emit_varyings(batch,
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001804 &varyings[pan_varying_index(present, PAN_VARY_POSITION)],
1805 sizeof(float) * 4, vertex_count);
Boris Brezillon836686d2020-03-06 09:45:31 +01001806
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001807 if (present & (1 << PAN_VARY_PSIZ)) {
Alyssa Rosenzweige5c77cb2020-08-25 13:37:22 -04001808 *psiz = panfrost_emit_varyings(batch,
Alyssa Rosenzweig79e349a2020-06-04 15:45:34 -04001809 &varyings[pan_varying_index(present, PAN_VARY_PSIZ)],
1810 2, vertex_count);
Boris Brezillon836686d2020-03-06 09:45:31 +01001811 }
1812
Alyssa Rosenzweigec58cda2020-08-14 15:50:13 -04001813 pan_emit_special_input(varyings, present, PAN_VARY_PNTCOORD, MALI_ATTRIBUTE_SPECIAL_POINT_COORD);
1814 pan_emit_special_input(varyings, present, PAN_VARY_FACE, MALI_ATTRIBUTE_SPECIAL_FRONT_FACING);
1815 pan_emit_special_input(varyings, present, PAN_VARY_FRAGCOORD, MALI_ATTRIBUTE_SPECIAL_FRAG_COORD);
Boris Brezillon836686d2020-03-06 09:45:31 +01001816
Alyssa Rosenzweige5c77cb2020-08-25 13:37:22 -04001817 *buffers = T.gpu;
1818 *vs_attribs = trans.gpu;
1819 *fs_attribs = trans.gpu + vs_size;
Boris Brezillon836686d2020-03-06 09:45:31 +01001820}
1821
Boris Brezillonb95530b2020-03-06 09:09:03 +01001822void
Boris Brezillon528384c2020-03-05 18:53:08 +01001823panfrost_emit_vertex_tiler_jobs(struct panfrost_batch *batch,
Boris Brezillon6b923032020-09-08 20:32:41 +02001824 const struct panfrost_transfer *vertex_job,
1825 const struct panfrost_transfer *tiler_job)
Boris Brezillon528384c2020-03-05 18:53:08 +01001826{
1827 struct panfrost_context *ctx = batch->ctx;
Alyssa Rosenzweig31197c22020-07-07 17:07:34 -04001828 bool wallpapering = ctx->wallpaper_batch && batch->scoreboard.tiler_dep;
Boris Brezillon528384c2020-03-05 18:53:08 +01001829
1830 if (wallpapering) {
1831 /* Inject in reverse order, with "predicted" job indices.
1832 * THIS IS A HACK XXX */
Boris Brezillon6b923032020-09-08 20:32:41 +02001833
1834 panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false,
1835 batch->scoreboard.job_index + 2, tiler_job, true);
1836 panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
1837 vertex_job, true);
Boris Brezillon528384c2020-03-05 18:53:08 +01001838 return;
1839 }
1840
1841 /* If rasterizer discard is enable, only submit the vertex */
1842
Boris Brezillon6b923032020-09-08 20:32:41 +02001843 unsigned vertex = panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, 0,
1844 vertex_job, false);
Boris Brezillon528384c2020-03-05 18:53:08 +01001845
Alyssa Rosenzweigec351592020-08-14 17:50:44 -04001846 if (ctx->rasterizer->base.rasterizer_discard)
Boris Brezillon528384c2020-03-05 18:53:08 +01001847 return;
1848
Boris Brezillon6b923032020-09-08 20:32:41 +02001849 panfrost_add_job(&batch->pool, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, vertex, tiler_job, false);
Boris Brezillon528384c2020-03-05 18:53:08 +01001850}
Alyssa Rosenzweig0a9fa4b2020-04-06 19:44:58 -04001851
1852/* TODO: stop hardcoding this */
1853mali_ptr
1854panfrost_emit_sample_locations(struct panfrost_batch *batch)
1855{
1856 uint16_t locations[] = {
1857 128, 128,
1858 0, 256,
1859 0, 256,
1860 0, 256,
1861 0, 256,
1862 0, 256,
1863 0, 256,
1864 0, 256,
1865 0, 256,
1866 0, 256,
1867 0, 256,
1868 0, 256,
1869 0, 256,
1870 0, 256,
1871 0, 256,
1872 0, 256,
1873 0, 256,
1874 0, 256,
1875 0, 256,
1876 0, 256,
1877 0, 256,
1878 0, 256,
1879 0, 256,
1880 0, 256,
1881 0, 256,
1882 0, 256,
1883 0, 256,
1884 0, 256,
1885 0, 256,
1886 0, 256,
1887 0, 256,
1888 0, 256,
1889 128, 128,
1890 0, 0,
1891 0, 0,
1892 0, 0,
1893 0, 0,
1894 0, 0,
1895 0, 0,
1896 0, 0,
1897 0, 0,
1898 0, 0,
1899 0, 0,
1900 0, 0,
1901 0, 0,
1902 0, 0,
1903 0, 0,
1904 0, 0,
1905 };
1906
Alyssa Rosenzweig1cfbc5c2020-08-20 13:36:46 -04001907 return panfrost_pool_upload_aligned(&batch->pool, locations, 96 * sizeof(uint16_t), 64);
Alyssa Rosenzweig0a9fa4b2020-04-06 19:44:58 -04001908}