blob: 4d74dd4345ebf049b87ae28b3065bedb6cec752c [file] [log] [blame]
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -04001/*
2 * Copyright (C) 2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27#include <math.h>
28#include <stdio.h>
29#include "pan_encoder.h"
30#include "pan_pool.h"
31#include "pan_scoreboard.h"
32#include "pan_texture.h"
33#include "panfrost-quirks.h"
34#include "../midgard/midgard_compile.h"
35#include "compiler/nir/nir_builder.h"
36#include "util/u_math.h"
37
38/* On Midgard, the native blit infrastructure (via MFBD preloads) is broken or
39 * missing in many cases. We instead use software paths as fallbacks to
40 * implement blits, which are done as TILER jobs. No vertex shader is
41 * necessary since we can supply screen-space coordinates directly.
42 *
43 * This is primarily designed as a fallback for preloads but could be extended
44 * for other clears/blits if needed in the future. */
45
46static void
47panfrost_build_blit_shader(panfrost_program *program, unsigned gpu_id, gl_frag_result loc, nir_alu_type T, bool ms)
48{
49 bool is_colour = loc >= FRAG_RESULT_DATA0;
50
Alyssa Rosenzweigc92be292020-08-26 16:48:13 -040051 nir_builder _b;
52 nir_builder_init_simple_shader(&_b, NULL, MESA_SHADER_FRAGMENT, &midgard_nir_options);
53 nir_builder *b = &_b;
54 nir_shader *shader = b->shader;
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -040055
Icecream9590eaaad2020-09-26 12:16:02 +120056 shader->info.internal = true;
57
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -040058 nir_variable *c_src = nir_variable_create(shader, nir_var_shader_in, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "coord");
59 nir_variable *c_out = nir_variable_create(shader, nir_var_shader_out, glsl_vector_type(
60 GLSL_TYPE_FLOAT, is_colour ? 4 : 1), "out");
61
62 c_src->data.location = VARYING_SLOT_TEX0;
63 c_out->data.location = loc;
64
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -040065 nir_ssa_def *coord = nir_load_var(b, c_src);
66
67 nir_tex_instr *tex = nir_tex_instr_create(shader, ms ? 3 : 1);
68
69 tex->dest_type = T;
70
71 if (ms) {
72 tex->src[0].src_type = nir_tex_src_coord;
73 tex->src[0].src = nir_src_for_ssa(nir_f2i32(b, coord));
74 tex->coord_components = 2;
75
76 tex->src[1].src_type = nir_tex_src_ms_index;
77 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
78
79 tex->src[2].src_type = nir_tex_src_lod;
80 tex->src[2].src = nir_src_for_ssa(nir_imm_int(b, 0));
81 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
82 tex->op = nir_texop_txf_ms;
83 } else {
84 tex->op = nir_texop_tex;
85
86 tex->src[0].src_type = nir_tex_src_coord;
87 tex->src[0].src = nir_src_for_ssa(coord);
88 tex->coord_components = 2;
89
90 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
91 }
92
93 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
94 nir_builder_instr_insert(b, &tex->instr);
95
96 if (is_colour)
97 nir_store_var(b, c_out, &tex->dest.ssa, 0xFF);
98 else
99 nir_store_var(b, c_out, nir_channel(b, &tex->dest.ssa, 0), 0xFF);
100
Boris Brezillon0a74a042020-10-08 10:09:56 +0200101 struct panfrost_compile_inputs inputs = {
102 .gpu_id = gpu_id,
103 };
104
105 midgard_compile_shader_nir(shader, program, &inputs);
Alyssa Rosenzweigda6d0e32020-08-18 08:10:25 -0400106 ralloc_free(shader);
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400107}
108
109/* Compile and upload all possible blit shaders ahead-of-time to reduce draw
110 * time overhead. There's only ~30 of them at the moment, so this is fine */
111
112void
113panfrost_init_blit_shaders(struct panfrost_device *dev)
114{
115 static const struct {
116 gl_frag_result loc;
117 unsigned types;
118 } shader_descs[] = {
119 { FRAG_RESULT_DEPTH, 1 << PAN_BLIT_FLOAT },
120 { FRAG_RESULT_STENCIL, 1 << PAN_BLIT_UINT },
121 { FRAG_RESULT_DATA0, ~0 },
122 { FRAG_RESULT_DATA1, ~0 },
123 { FRAG_RESULT_DATA2, ~0 },
124 { FRAG_RESULT_DATA3, ~0 },
125 { FRAG_RESULT_DATA4, ~0 },
126 { FRAG_RESULT_DATA5, ~0 },
127 { FRAG_RESULT_DATA6, ~0 },
128 { FRAG_RESULT_DATA7, ~0 }
129 };
130
131 nir_alu_type nir_types[PAN_BLIT_NUM_TYPES] = {
132 nir_type_float,
133 nir_type_uint,
134 nir_type_int
135 };
136
137 /* Total size = # of shaders * bytes per shader. There are
138 * shaders for each RT (so up to DATA7 -- overestimate is
139 * okay) and up to NUM_TYPES variants of each, * 2 for multisampling
140 * variants. These shaders are simple enough that they should be less
141 * than 8 quadwords each (again, overestimate is fine). */
142
143 unsigned offset = 0;
144 unsigned total_size = (FRAG_RESULT_DATA7 * PAN_BLIT_NUM_TYPES)
145 * (8 * 16) * 2;
146
147 dev->blit_shaders.bo = panfrost_bo_create(dev, total_size, PAN_BO_EXECUTE);
148
149 /* Don't bother generating multisampling variants if we don't actually
150 * support multisampling */
151 bool has_ms = !(dev->quirks & MIDGARD_SFBD);
152
153 for (unsigned ms = 0; ms <= has_ms; ++ms) {
154 for (unsigned i = 0; i < ARRAY_SIZE(shader_descs); ++i) {
155 unsigned loc = shader_descs[i].loc;
156
157 for (enum pan_blit_type T = 0; T < PAN_BLIT_NUM_TYPES; ++T) {
158 if (!(shader_descs[i].types & (1 << T)))
159 continue;
160
161 panfrost_program program;
162 panfrost_build_blit_shader(&program, dev->gpu_id, loc,
163 nir_types[T], ms);
164
165 assert(offset + program.compiled.size < total_size);
166 memcpy(dev->blit_shaders.bo->cpu + offset, program.compiled.data, program.compiled.size);
167
168 dev->blit_shaders.loads[loc][T][ms] = (dev->blit_shaders.bo->gpu + offset) | program.first_tag;
169 offset += ALIGN_POT(program.compiled.size, 64);
170 util_dynarray_fini(&program.compiled);
171 }
172 }
173 }
174}
175
176/* Add a shader-based load on Midgard (draw-time for GL). Shaders are
177 * precached */
178
179void
180panfrost_load_midg(
181 struct pan_pool *pool,
182 struct pan_scoreboard *scoreboard,
183 mali_ptr blend_shader,
184 mali_ptr fbd,
185 mali_ptr coordinates, unsigned vertex_count,
186 struct pan_image *image,
187 unsigned loc)
188{
Alyssa Rosenzweig5b3b2a62020-08-21 16:22:10 -0400189 bool srgb = util_format_is_srgb(image->format);
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400190 unsigned width = u_minify(image->width0, image->first_level);
191 unsigned height = u_minify(image->height0, image->first_level);
192
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400193 struct panfrost_transfer viewport = panfrost_pool_alloc(pool, MALI_VIEWPORT_LENGTH);
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400194 struct panfrost_transfer sampler = panfrost_pool_alloc(pool, MALI_MIDGARD_SAMPLER_LENGTH);
Alyssa Rosenzweig4fc90f72020-08-13 16:06:12 -0400195 struct panfrost_transfer varying = panfrost_pool_alloc(pool, MALI_ATTRIBUTE_LENGTH);
196 struct panfrost_transfer varying_buffer = panfrost_pool_alloc(pool, MALI_ATTRIBUTE_BUFFER_LENGTH);
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400197
Alyssa Rosenzweig7f487e02020-08-05 19:33:20 -0400198 pan_pack(viewport.cpu, VIEWPORT, cfg) {
199 cfg.scissor_maximum_x = width - 1; /* Inclusive */
200 cfg.scissor_maximum_y = height - 1;
201 }
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400202
Alyssa Rosenzweig4fc90f72020-08-13 16:06:12 -0400203 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
204 cfg.pointer = coordinates;
205 cfg.stride = 4 * sizeof(float);
206 cfg.size = cfg.stride * vertex_count;
207 }
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400208
Alyssa Rosenzweig4fc90f72020-08-13 16:06:12 -0400209 pan_pack(varying.cpu, ATTRIBUTE, cfg) {
Alyssa Rosenzweig2c8a7222020-08-13 13:27:16 -0400210 cfg.buffer_index = 0;
211 cfg.format = (MALI_CHANNEL_R << 0) | (MALI_CHANNEL_G << 3) | (MALI_RGBA32F << 12);
212 }
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400213
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400214 /* Determine the sampler type needed. Stencil is always sampled as
215 * UINT. Pure (U)INT is always (U)INT. Everything else is FLOAT. */
216
217 enum pan_blit_type T =
218 (loc == FRAG_RESULT_STENCIL) ? PAN_BLIT_UINT :
219 (util_format_is_pure_uint(image->format)) ? PAN_BLIT_UINT :
220 (util_format_is_pure_sint(image->format)) ? PAN_BLIT_INT :
221 PAN_BLIT_FLOAT;
222
223 bool ms = image->nr_samples > 1;
224
Boris Brezillon83899762020-09-16 13:31:37 +0200225 struct panfrost_transfer shader_meta_t =
226 panfrost_pool_alloc_aligned(pool,
227 MALI_RENDERER_STATE_LENGTH +
228 8 * MALI_BLEND_LENGTH,
229 128);
Alyssa Rosenzweig661b4692020-08-21 10:34:06 -0400230
Boris Brezillonf734e672020-09-29 15:47:04 +0200231 pan_pack(shader_meta_t.cpu, RENDERER_STATE, cfg) {
Alyssa Rosenzweig5b3b2a62020-08-21 16:22:10 -0400232 cfg.shader.shader = pool->dev->blit_shaders.loads[loc][T][ms];
233 cfg.shader.varying_count = 1;
234 cfg.shader.texture_count = 1;
235 cfg.shader.sampler_count = 1;
Alyssa Rosenzweig1b7d4f12020-08-20 16:25:14 -0400236
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200237 cfg.properties.work_register_count = 4;
238 cfg.properties.midgard_early_z_enable = (loc >= FRAG_RESULT_DATA0);
239 cfg.properties.stencil_from_shader = (loc == FRAG_RESULT_STENCIL);
240 cfg.properties.depth_source = (loc == FRAG_RESULT_DEPTH) ?
241 MALI_DEPTH_SOURCE_SHADER :
242 MALI_DEPTH_SOURCE_FIXED_FUNCTION;
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400243
Alyssa Rosenzweig5b3b2a62020-08-21 16:22:10 -0400244 cfg.multisample_misc.sample_mask = 0xFFFF;
245 cfg.multisample_misc.multisample_enable = ms;
246 cfg.multisample_misc.evaluate_per_sample = ms;
247 cfg.multisample_misc.depth_write_mask = (loc == FRAG_RESULT_DEPTH);
248 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
249
250 cfg.stencil_mask_misc.stencil_enable = (loc == FRAG_RESULT_STENCIL);
251 cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
252 cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
253 cfg.stencil_mask_misc.unknown_1 = 0x7;
254
255 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
256 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
257 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
258 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
Alyssa Rosenzweig322ddbd2020-09-09 16:29:04 -0400259 cfg.stencil_front.mask = 0xFF;
Alyssa Rosenzweig5b3b2a62020-08-21 16:22:10 -0400260
261 cfg.stencil_back = cfg.stencil_front;
262
263 if (pool->dev->quirks & MIDGARD_SFBD) {
264 cfg.stencil_mask_misc.sfbd_write_enable = true;
265 cfg.stencil_mask_misc.sfbd_dither_disable = true;
266 cfg.stencil_mask_misc.sfbd_srgb = srgb;
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200267 cfg.multisample_misc.sfbd_blend_shader = !!blend_shader;
268 if (cfg.multisample_misc.sfbd_blend_shader) {
269 cfg.sfbd_blend_shader = blend_shader;
270 } else {
Boris Brezillon713419e2020-09-16 10:26:06 +0200271 cfg.sfbd_blend_equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
272 cfg.sfbd_blend_equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
273 cfg.sfbd_blend_equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
274 cfg.sfbd_blend_equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
275 cfg.sfbd_blend_equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
276 cfg.sfbd_blend_equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
277
278 if (loc >= FRAG_RESULT_DATA0)
279 cfg.sfbd_blend_equation.color_mask = 0xf;
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200280 cfg.sfbd_blend_constant = 0;
281 }
Alyssa Rosenzweig5b3b2a62020-08-21 16:22:10 -0400282 } else if (!(pool->dev->quirks & IS_BIFROST)) {
Boris Brezillon7bb85ea2020-09-15 17:03:28 +0200283 cfg.sfbd_blend_shader = blend_shader;
Alyssa Rosenzweig5b3b2a62020-08-21 16:22:10 -0400284 }
285
286 assert(cfg.shader.shader);
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400287 }
288
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400289 /* Create the texture descriptor. We partially compute the base address
290 * ourselves to account for layer, such that the texture descriptor
291 * itself is for a 2D texture with array size 1 even for 3D/array
292 * textures, removing the need to separately key the blit shaders for
293 * 2D and 3D variants */
294
Alyssa Rosenzweig373a2042020-08-17 14:27:57 -0400295 struct panfrost_transfer texture_t = panfrost_pool_alloc_aligned(
296 pool, MALI_MIDGARD_TEXTURE_LENGTH + sizeof(mali_ptr) * 2 * MAX2(image->nr_samples, 1), 128);
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400297
298 panfrost_new_texture(texture_t.cpu,
299 image->width0, image->height0,
300 MAX2(image->nr_samples, 1), 1,
Alyssa Rosenzweigf008a632020-08-11 17:27:36 -0400301 image->format, MALI_TEXTURE_DIMENSION_2D,
Alyssa Rosenzweig965537df2020-07-22 10:23:50 -0400302 image->modifier,
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400303 image->first_level, image->last_level,
304 0, 0,
305 image->nr_samples,
306 0,
Alyssa Rosenzweigcdc32762020-08-12 16:46:07 -0400307 (MALI_CHANNEL_R << 0) | (MALI_CHANNEL_G << 3) | (MALI_CHANNEL_B << 6) | (MALI_CHANNEL_A << 9),
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400308 image->bo->gpu + image->first_layer *
309 panfrost_get_layer_stride(image->slices,
Alyssa Rosenzweigf008a632020-08-11 17:27:36 -0400310 image->dim == MALI_TEXTURE_DIMENSION_3D,
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400311 image->cubemap_stride, image->first_level),
312 image->slices);
313
Alyssa Rosenzweigf74186b2020-08-11 18:23:12 -0400314 pan_pack(sampler.cpu, MIDGARD_SAMPLER, cfg)
315 cfg.normalized_coordinates = false;
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400316
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400317 for (unsigned i = 0; i < 8; ++i) {
Boris Brezillon83899762020-09-16 13:31:37 +0200318 void *dest = shader_meta_t.cpu + MALI_RENDERER_STATE_LENGTH +
319 MALI_BLEND_LENGTH * i;
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400320
Boris Brezillon83899762020-09-16 13:31:37 +0200321 if (loc != (FRAG_RESULT_DATA0 + i)) {
322 memset(dest, 0x0, MALI_BLEND_LENGTH);
323 continue;
324 }
Boris Brezillon713419e2020-09-16 10:26:06 +0200325
Boris Brezillon83899762020-09-16 13:31:37 +0200326 pan_pack(dest, BLEND, cfg) {
327 cfg.round_to_fb_precision = true;
328 cfg.srgb = srgb;
329 if (blend_shader) {
330 cfg.midgard.blend_shader = true;
331 cfg.midgard.shader_pc = blend_shader;
332 } else {
333 cfg.midgard.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
334 cfg.midgard.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
335 cfg.midgard.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
336 cfg.midgard.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
337 cfg.midgard.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
338 cfg.midgard.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
339 cfg.midgard.equation.color_mask = 0xf;
Boris Brezillon713419e2020-09-16 10:26:06 +0200340 }
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400341 }
342 }
343
Boris Brezillon6b923032020-09-08 20:32:41 +0200344 struct panfrost_transfer t =
345 panfrost_pool_alloc_aligned(pool, MALI_MIDGARD_TILER_JOB_LENGTH, 64);
Alyssa Rosenzweig59b6e3c2020-08-24 13:46:34 -0400346
Boris Brezillon6b923032020-09-08 20:32:41 +0200347 pan_section_pack(t.cpu, MIDGARD_TILER_JOB, DRAW, cfg) {
Boris Brezillond343f232020-09-29 10:45:23 +0200348 cfg.four_components_per_vertex = true;
349 cfg.draw_descriptor_is_64b = true;
350 cfg.texture_descriptor_is_64b = true;
Alyssa Rosenzweig59b6e3c2020-08-24 13:46:34 -0400351 cfg.position = coordinates;
352 cfg.textures = panfrost_pool_upload(pool, &texture_t.gpu, sizeof(texture_t.gpu));
353 cfg.samplers = sampler.gpu;
354 cfg.state = shader_meta_t.gpu;
355 cfg.varying_buffers = varying_buffer.gpu;
356 cfg.varyings = varying.gpu;
357 cfg.viewport = viewport.gpu;
Boris Brezillond343f232020-09-29 10:45:23 +0200358 cfg.fbd = fbd;
Alyssa Rosenzweig59b6e3c2020-08-24 13:46:34 -0400359 }
360
Boris Brezillon6b923032020-09-08 20:32:41 +0200361 pan_section_pack(t.cpu, MIDGARD_TILER_JOB, PRIMITIVE, cfg) {
Alyssa Rosenzweigb60d5672020-08-25 16:59:14 -0400362 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLES;
363 cfg.index_count = vertex_count;
Boris Brezillon51331d62020-09-29 11:21:33 +0200364 cfg.job_task_split = 6;
Alyssa Rosenzweigb60d5672020-08-25 16:59:14 -0400365 }
366
Boris Brezillon6b923032020-09-08 20:32:41 +0200367 panfrost_pack_work_groups_compute(pan_section_ptr(t.cpu, MIDGARD_TILER_JOB, INVOCATION),
Boris Brezillond2892092020-09-08 19:41:51 +0200368 1, vertex_count, 1, 1, 1, 1, true);
Alyssa Rosenzweig59b6e3c2020-08-24 13:46:34 -0400369
Boris Brezillon6b923032020-09-08 20:32:41 +0200370 panfrost_add_job(pool, scoreboard, MALI_JOB_TYPE_TILER, false, 0, &t, true);
Alyssa Rosenzweig293f2512020-07-09 13:42:25 -0400371}