Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1 | /* |
| 2 | * © Copyright 2018 Alyssa Rosenzweig |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | * SOFTWARE. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #include <sys/poll.h> |
| 26 | #include <errno.h> |
| 27 | |
| 28 | #include "pan_context.h" |
| 29 | #include "pan_swizzle.h" |
| 30 | #include "pan_format.h" |
| 31 | |
| 32 | #include "util/macros.h" |
| 33 | #include "util/u_format.h" |
| 34 | #include "util/u_inlines.h" |
| 35 | #include "util/u_upload_mgr.h" |
| 36 | #include "util/u_memory.h" |
| 37 | #include "util/half_float.h" |
| 38 | #include "indices/u_primconvert.h" |
| 39 | #include "tgsi/tgsi_parse.h" |
| 40 | |
| 41 | #include "pan_screen.h" |
| 42 | #include "pan_blending.h" |
| 43 | #include "pan_blend_shaders.h" |
| 44 | #include "pan_wallpaper.h" |
| 45 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 46 | static int performance_counter_number = 0; |
Alyssa Rosenzweig | 4c82abb | 2019-02-25 03:31:29 +0000 | [diff] [blame] | 47 | extern const char *pan_counters_base; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 48 | |
| 49 | /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ |
| 50 | //#define DRY_RUN |
| 51 | |
| 52 | #define SET_BIT(lval, bit, cond) \ |
| 53 | if (cond) \ |
| 54 | lval |= (bit); \ |
| 55 | else \ |
| 56 | lval &= ~(bit); |
| 57 | |
| 58 | /* TODO: Sample size, etc */ |
| 59 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 60 | /* True for t6XX, false for t8xx. TODO: Run-time settable for automatic |
| 61 | * hardware configuration. */ |
| 62 | |
| 63 | static bool is_t6xx = false; |
| 64 | |
| 65 | /* If set, we'll require the use of single render-target framebuffer |
| 66 | * descriptors (SFBD), for older hardware -- specifically, <T760 hardware, If |
| 67 | * false, we'll use the MFBD no matter what. New hardware -does- retain support |
| 68 | * for SFBD, and in theory we could flip between them on a per-RT basis, but |
| 69 | * there's no real advantage to doing so */ |
| 70 | |
| 71 | static bool require_sfbd = false; |
| 72 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 73 | static void |
| 74 | panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) |
| 75 | { |
| 76 | SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); |
| 77 | SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); |
| 78 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 79 | if (require_sfbd) { |
| 80 | SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled); |
| 81 | } else { |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 82 | SET_BIT(ctx->fragment_rts[0].format.flags, MALI_MFBD_FORMAT_MSAA, enabled); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 83 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 84 | SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 85 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 86 | /* XXX */ |
| 87 | ctx->fragment_mfbd.rt_count_2 = enabled ? 4 : 1; |
| 88 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 89 | } |
| 90 | |
| 91 | /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically |
| 92 | * indepdent between color buffers and depth/stencil). To enable, we allocate |
| 93 | * the AFBC metadata buffer and mark that it is enabled. We do -not- actually |
| 94 | * edit the fragment job here. This routine should be called ONCE per |
| 95 | * AFBC-compressed buffer, rather than on every frame. */ |
| 96 | |
| 97 | static void |
| 98 | panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) |
| 99 | { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 100 | if (require_sfbd) { |
| 101 | printf("AFBC not supported yet on SFBD\n"); |
| 102 | assert(0); |
| 103 | } |
| 104 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 105 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 106 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 107 | /* AFBC metadata is 16 bytes per tile */ |
| 108 | int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 109 | int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 110 | int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); |
| 111 | int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */ |
| 112 | |
| 113 | stride *= 2; /* TODO: Should this be carried over? */ |
| 114 | int main_size = stride * rsrc->base.height0; |
| 115 | rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16; |
| 116 | |
| 117 | /* Allocate the AFBC slab itself, large enough to hold the above */ |
| 118 | screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, |
| 119 | (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096, |
| 120 | true, 0, 0, 0); |
| 121 | |
| 122 | rsrc->bo->has_afbc = true; |
| 123 | |
| 124 | /* Compressed textured reads use a tagged pointer to the metadata */ |
| 125 | |
| 126 | rsrc->bo->gpu[0] = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1); |
| 127 | rsrc->bo->cpu[0] = rsrc->bo->afbc_slab.cpu; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | static void |
| 131 | panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc) |
| 132 | { |
| 133 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 134 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 135 | int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 136 | int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 137 | |
| 138 | /* 8 byte checksum per tile */ |
| 139 | rsrc->bo->checksum_stride = tile_w * 8; |
| 140 | int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); |
| 141 | screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); |
| 142 | |
| 143 | rsrc->bo->has_checksum = true; |
| 144 | } |
| 145 | |
| 146 | /* ..by contrast, this routine runs for every FRAGMENT job, but does no |
| 147 | * allocation. AFBC is enabled on a per-surface basis */ |
| 148 | |
| 149 | static void |
| 150 | panfrost_set_fragment_afbc(struct panfrost_context *ctx) |
| 151 | { |
| 152 | for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) { |
| 153 | struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[cb]->texture; |
| 154 | |
| 155 | /* Non-AFBC is the default */ |
| 156 | if (!rsrc->bo->has_afbc) |
| 157 | continue; |
| 158 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 159 | if (require_sfbd) { |
| 160 | fprintf(stderr, "Color AFBC not supported on SFBD\n"); |
| 161 | assert(0); |
| 162 | } |
| 163 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 164 | /* Enable AFBC for the render target */ |
| 165 | ctx->fragment_rts[0].afbc.metadata = rsrc->bo->afbc_slab.gpu; |
| 166 | ctx->fragment_rts[0].afbc.stride = 0; |
| 167 | ctx->fragment_rts[0].afbc.unk = 0x30009; |
| 168 | |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 169 | ctx->fragment_rts[0].format.flags |= MALI_MFBD_FORMAT_AFBC; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 170 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 171 | /* Point rendering to our special framebuffer */ |
| 172 | ctx->fragment_rts[0].framebuffer = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; |
| 173 | |
| 174 | /* WAT? Stride is diff from the scanout case */ |
| 175 | ctx->fragment_rts[0].framebuffer_stride = ctx->pipe_framebuffer.width * 2 * 4; |
| 176 | } |
| 177 | |
| 178 | /* Enable depth/stencil AFBC for the framebuffer (not the render target) */ |
| 179 | if (ctx->pipe_framebuffer.zsbuf) { |
| 180 | struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture; |
| 181 | |
| 182 | if (rsrc->bo->has_afbc) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 183 | if (require_sfbd) { |
| 184 | fprintf(stderr, "Depth AFBC not supported on SFBD\n"); |
| 185 | assert(0); |
| 186 | } |
| 187 | |
| 188 | ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 189 | |
| 190 | ctx->fragment_extra.ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu; |
| 191 | ctx->fragment_extra.ds_afbc.depth_stencil_afbc_stride = 0; |
| 192 | |
| 193 | ctx->fragment_extra.ds_afbc.depth_stencil = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; |
| 194 | |
| 195 | ctx->fragment_extra.ds_afbc.zero1 = 0x10009; |
| 196 | ctx->fragment_extra.ds_afbc.padding = 0x1000; |
| 197 | |
| 198 | ctx->fragment_extra.unk = 0x435; /* General 0x400 in all unks. 0x5 for depth/stencil. 0x10 for AFBC encoded depth stencil. Unclear where the 0x20 is from */ |
| 199 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 200 | ctx->fragment_mfbd.unk3 |= 0x400; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 201 | } |
| 202 | } |
| 203 | |
| 204 | /* For the special case of a depth-only FBO, we need to attach a dummy render target */ |
| 205 | |
| 206 | if (ctx->pipe_framebuffer.nr_cbufs == 0) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 207 | if (require_sfbd) { |
| 208 | fprintf(stderr, "Depth-only FBO not supported on SFBD\n"); |
| 209 | assert(0); |
| 210 | } |
| 211 | |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 212 | struct mali_rt_format null_rt = { |
| 213 | .unk1 = 0x4000000, |
| 214 | .unk4 = 0x8 |
| 215 | }; |
| 216 | |
| 217 | ctx->fragment_rts[0].format = null_rt; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 218 | ctx->fragment_rts[0].framebuffer = 0; |
| 219 | ctx->fragment_rts[0].framebuffer_stride = 0; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | /* Framebuffer descriptor */ |
| 224 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 225 | static void |
| 226 | panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h) |
| 227 | { |
| 228 | fb->width = MALI_POSITIVE(w); |
| 229 | fb->height = MALI_POSITIVE(h); |
| 230 | |
| 231 | /* No idea why this is needed, but it's how resolution_check is |
| 232 | * calculated. It's not clear to us yet why the hardware wants this. |
| 233 | * The formula itself was discovered mostly by manual bruteforce and |
| 234 | * aggressive algebraic simplification. */ |
| 235 | |
| 236 | fb->resolution_check = ((w + h) / 3) << 4; |
| 237 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 238 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 239 | static struct mali_single_framebuffer |
| 240 | panfrost_emit_sfbd(struct panfrost_context *ctx) |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 241 | { |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 242 | struct mali_single_framebuffer framebuffer = { |
| 243 | .unknown2 = 0x1f, |
| 244 | .format = 0x30000000, |
| 245 | .clear_flags = 0x1000, |
| 246 | .unknown_address_0 = ctx->scratchpad.gpu, |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 247 | .unknown_address_1 = ctx->misc_0.gpu, |
| 248 | .unknown_address_2 = ctx->misc_0.gpu + 40960, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 249 | .tiler_flags = 0xf0, |
| 250 | .tiler_heap_free = ctx->tiler_heap.gpu, |
| 251 | .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, |
| 252 | }; |
| 253 | |
| 254 | panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 255 | |
| 256 | return framebuffer; |
| 257 | } |
| 258 | |
| 259 | static struct bifrost_framebuffer |
| 260 | panfrost_emit_mfbd(struct panfrost_context *ctx) |
| 261 | { |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 262 | struct bifrost_framebuffer framebuffer = { |
Alyssa Rosenzweig | 77fea55 | 2019-02-21 05:57:29 +0000 | [diff] [blame] | 263 | /* It is not yet clear what tiler_meta means or how it's |
| 264 | * calculated, but we can tell the lower 32-bits are a |
| 265 | * (monotonically increasing?) function of tile count and |
| 266 | * geometry complexity; I suspect it defines a memory size of |
| 267 | * some kind? for the tiler. It's really unclear at the |
| 268 | * moment... but to add to the confusion, the hardware is happy |
| 269 | * enough to accept a zero in this field, so we don't even have |
| 270 | * to worry about it right now. |
| 271 | * |
| 272 | * The byte (just after the 32-bit mark) is much more |
| 273 | * interesting. The higher nibble I've only ever seen as 0xF, |
| 274 | * but the lower one I've seen as 0x0 or 0xF, and it's not |
| 275 | * obvious what the difference is. But what -is- obvious is |
| 276 | * that when the lower nibble is zero, performance is severely |
| 277 | * degraded compared to when the lower nibble is set. |
| 278 | * Evidently, that nibble enables some sort of fast path, |
| 279 | * perhaps relating to caching or tile flush? Regardless, at |
| 280 | * this point there's no clear reason not to set it, aside from |
| 281 | * substantially increased memory requirements (of the misc_0 |
| 282 | * buffer) */ |
| 283 | |
| 284 | .tiler_meta = ((uint64_t) 0xff << 32) | 0x0, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 285 | |
| 286 | .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), |
| 287 | .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), |
| 288 | .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width), |
| 289 | .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height), |
| 290 | |
| 291 | .unk1 = 0x1080, |
| 292 | |
| 293 | /* TODO: MRT */ |
| 294 | .rt_count_1 = MALI_POSITIVE(1), |
| 295 | .rt_count_2 = 4, |
| 296 | |
| 297 | .unknown2 = 0x1f, |
| 298 | |
Alyssa Rosenzweig | 77fea55 | 2019-02-21 05:57:29 +0000 | [diff] [blame] | 299 | /* Corresponds to unknown_address_X of SFBD */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 300 | .scratchpad = ctx->scratchpad.gpu, |
| 301 | .tiler_scratch_start = ctx->misc_0.gpu, |
Alyssa Rosenzweig | 77fea55 | 2019-02-21 05:57:29 +0000 | [diff] [blame] | 302 | |
| 303 | /* The constant added here is, like the lower word of |
| 304 | * tiler_meta, (loosely) another product of framebuffer size |
| 305 | * and geometry complexity. It must be sufficiently large for |
| 306 | * the tiler_meta fast path to work; if it's too small, there |
| 307 | * will be DATA_INVALID_FAULTs. Conversely, it must be less |
| 308 | * than the total size of misc_0, or else there's no room. It's |
| 309 | * possible this constant configures a partition between two |
| 310 | * parts of misc_0? We haven't investigated the functionality, |
| 311 | * as these buffers are internally used by the hardware |
| 312 | * (presumably by the tiler) but not seemingly touched by the driver |
| 313 | */ |
| 314 | |
| 315 | .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 316 | |
| 317 | .tiler_heap_start = ctx->tiler_heap.gpu, |
| 318 | .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, |
| 319 | }; |
| 320 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 321 | return framebuffer; |
| 322 | } |
| 323 | |
| 324 | /* Are we currently rendering to the screen (rather than an FBO)? */ |
| 325 | |
| 326 | static bool |
| 327 | panfrost_is_scanout(struct panfrost_context *ctx) |
| 328 | { |
| 329 | /* If there is no color buffer, it's an FBO */ |
| 330 | if (!ctx->pipe_framebuffer.nr_cbufs) |
| 331 | return false; |
| 332 | |
| 333 | /* If we're too early that no framebuffer was sent, it's scanout */ |
| 334 | if (!ctx->pipe_framebuffer.cbufs[0]) |
| 335 | return true; |
| 336 | |
| 337 | return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET || |
| 338 | ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT || |
| 339 | ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED; |
| 340 | } |
| 341 | |
| 342 | /* The above function is for generalised fbd emission, used in both fragment as |
| 343 | * well as vertex/tiler payloads. This payload is specific to fragment |
| 344 | * payloads. */ |
| 345 | |
| 346 | static void |
| 347 | panfrost_new_frag_framebuffer(struct panfrost_context *ctx) |
| 348 | { |
| 349 | mali_ptr framebuffer; |
| 350 | int stride; |
| 351 | |
| 352 | if (ctx->pipe_framebuffer.nr_cbufs > 0) { |
| 353 | framebuffer = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture)->bo->gpu[0]; |
| 354 | stride = util_format_get_stride(ctx->pipe_framebuffer.cbufs[0]->format, ctx->pipe_framebuffer.width); |
| 355 | } else { |
| 356 | /* Depth-only framebuffer -> dummy RT */ |
| 357 | framebuffer = 0; |
| 358 | stride = 0; |
| 359 | } |
| 360 | |
| 361 | /* The default is upside down from OpenGL's perspective. */ |
| 362 | if (panfrost_is_scanout(ctx)) { |
| 363 | framebuffer += stride * (ctx->pipe_framebuffer.height - 1); |
| 364 | stride = -stride; |
| 365 | } |
| 366 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 367 | if (require_sfbd) { |
| 368 | struct mali_single_framebuffer fb = panfrost_emit_sfbd(ctx); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 369 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 370 | fb.framebuffer = framebuffer; |
| 371 | fb.stride = stride; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 372 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 373 | fb.format = 0xb84e0281; /* RGB32, no MSAA */ |
| 374 | memcpy(&ctx->fragment_sfbd, &fb, sizeof(fb)); |
| 375 | } else { |
| 376 | struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 377 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 378 | /* XXX: MRT case */ |
| 379 | fb.rt_count_2 = 1; |
| 380 | fb.unk3 = 0x100; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 381 | |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 382 | /* By default, Gallium seems to need a BGR framebuffer */ |
| 383 | unsigned char bgra[4] = { |
| 384 | PIPE_SWIZZLE_Z, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_X, PIPE_SWIZZLE_W |
| 385 | }; |
| 386 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 387 | struct bifrost_render_target rt = { |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 388 | .format = { |
| 389 | .unk1 = 0x4000000, |
| 390 | .unk2 = 0x1, |
| 391 | .nr_channels = MALI_POSITIVE(4), |
| 392 | .flags = 0x444, |
| 393 | .swizzle = panfrost_translate_swizzle_4(bgra), |
| 394 | .unk4 = 0x8 |
| 395 | }, |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 396 | .framebuffer = framebuffer, |
| 397 | .framebuffer_stride = (stride / 16) & 0xfffffff, |
| 398 | }; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 399 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 400 | memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt)); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 401 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 402 | memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra)); |
| 403 | memcpy(&ctx->fragment_mfbd, &fb, sizeof(fb)); |
| 404 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 405 | } |
| 406 | |
| 407 | /* Maps float 0.0-1.0 to int 0x00-0xFF */ |
| 408 | static uint8_t |
| 409 | normalised_float_to_u8(float f) |
| 410 | { |
| 411 | return (uint8_t) (int) (f * 255.0f); |
| 412 | } |
| 413 | |
| 414 | static void |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 415 | panfrost_clear_sfbd(struct panfrost_job *job) |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 416 | { |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 417 | struct panfrost_context *ctx = job->ctx; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 418 | struct mali_single_framebuffer *sfbd = &ctx->fragment_sfbd; |
| 419 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 420 | if (job->clear & PIPE_CLEAR_COLOR) { |
| 421 | sfbd->clear_color_1 = job->clear_color; |
| 422 | sfbd->clear_color_2 = job->clear_color; |
| 423 | sfbd->clear_color_3 = job->clear_color; |
| 424 | sfbd->clear_color_4 = job->clear_color; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 425 | } |
| 426 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 427 | if (job->clear & PIPE_CLEAR_DEPTH) { |
| 428 | sfbd->clear_depth_1 = job->clear_depth; |
| 429 | sfbd->clear_depth_2 = job->clear_depth; |
| 430 | sfbd->clear_depth_3 = job->clear_depth; |
| 431 | sfbd->clear_depth_4 = job->clear_depth; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 432 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 433 | sfbd->depth_buffer = ctx->depth_stencil_buffer.gpu; |
| 434 | sfbd->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; |
| 435 | } |
| 436 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 437 | if (job->clear & PIPE_CLEAR_STENCIL) { |
| 438 | sfbd->clear_stencil = job->clear_stencil; |
| 439 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 440 | sfbd->stencil_buffer = ctx->depth_stencil_buffer.gpu; |
| 441 | sfbd->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; |
| 442 | } |
| 443 | |
| 444 | /* Set flags based on what has been cleared, for the SFBD case */ |
| 445 | /* XXX: What do these flags mean? */ |
| 446 | int clear_flags = 0x101100; |
| 447 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 448 | if (!(job->clear & ~(PIPE_CLEAR_COLOR | PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 449 | /* On a tiler like this, it's fastest to clear all three buffers at once */ |
| 450 | |
| 451 | clear_flags |= MALI_CLEAR_FAST; |
| 452 | } else { |
| 453 | clear_flags |= MALI_CLEAR_SLOW; |
| 454 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 455 | if (job->clear & PIPE_CLEAR_STENCIL) |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 456 | clear_flags |= MALI_CLEAR_SLOW_STENCIL; |
| 457 | } |
| 458 | |
| 459 | sfbd->clear_flags = clear_flags; |
| 460 | } |
| 461 | |
| 462 | static void |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 463 | panfrost_clear_mfbd(struct panfrost_job *job) |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 464 | { |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 465 | struct panfrost_context *ctx = job->ctx; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 466 | struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0]; |
| 467 | struct bifrost_framebuffer *buffer_ds = &ctx->fragment_mfbd; |
| 468 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 469 | if (job->clear & PIPE_CLEAR_COLOR) { |
| 470 | buffer_color->clear_color_1 = job->clear_color; |
| 471 | buffer_color->clear_color_2 = job->clear_color; |
| 472 | buffer_color->clear_color_3 = job->clear_color; |
| 473 | buffer_color->clear_color_4 = job->clear_color; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 474 | } |
| 475 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 476 | if (job->clear & PIPE_CLEAR_DEPTH) { |
| 477 | buffer_ds->clear_depth = job->clear_depth; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 478 | } |
| 479 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 480 | if (job->clear & PIPE_CLEAR_STENCIL) { |
| 481 | buffer_ds->clear_stencil = job->clear_stencil; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 482 | } |
| 483 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 484 | if (job->clear & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 485 | /* Setup combined 24/8 depth/stencil */ |
| 486 | ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 487 | ctx->fragment_extra.unk = 0x405; |
| 488 | ctx->fragment_extra.ds_linear.depth = ctx->depth_stencil_buffer.gpu; |
| 489 | ctx->fragment_extra.ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4; |
| 490 | } |
| 491 | } |
| 492 | |
| 493 | static void |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 494 | panfrost_clear( |
| 495 | struct pipe_context *pipe, |
| 496 | unsigned buffers, |
| 497 | const union pipe_color_union *color, |
| 498 | double depth, unsigned stencil) |
| 499 | { |
| 500 | struct panfrost_context *ctx = pan_context(pipe); |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 501 | struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 502 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 503 | if (buffers & PIPE_CLEAR_COLOR) { |
| 504 | /* Alpha clear only meaningful without alpha channel, TODO less ad hoc */ |
| 505 | bool has_alpha = util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format); |
| 506 | float clear_alpha = has_alpha ? color->f[3] : 1.0f; |
| 507 | |
| 508 | uint32_t packed_color = |
| 509 | (normalised_float_to_u8(clear_alpha) << 24) | |
| 510 | (normalised_float_to_u8(color->f[2]) << 16) | |
| 511 | (normalised_float_to_u8(color->f[1]) << 8) | |
| 512 | (normalised_float_to_u8(color->f[0]) << 0); |
| 513 | |
| 514 | job->clear_color = packed_color; |
| 515 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 516 | } |
| 517 | |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 518 | if (buffers & PIPE_CLEAR_DEPTH) { |
| 519 | job->clear_depth = depth; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 520 | } |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 521 | |
| 522 | if (buffers & PIPE_CLEAR_STENCIL) { |
| 523 | job->clear_stencil = stencil; |
| 524 | } |
| 525 | |
| 526 | job->clear |= buffers; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 527 | } |
| 528 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 529 | static mali_ptr |
| 530 | panfrost_attach_vt_mfbd(struct panfrost_context *ctx) |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 531 | { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 532 | /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 533 | struct bifrost_render_target rts_list[] = { |
| 534 | { |
| 535 | .chunknown = { |
| 536 | .unk = 0x30005, |
| 537 | }, |
| 538 | .framebuffer = ctx->misc_0.gpu, |
| 539 | .zero2 = 0x3, |
| 540 | }, |
| 541 | }; |
| 542 | |
| 543 | /* Allocate memory for the three components */ |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 544 | int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 545 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); |
| 546 | |
| 547 | /* Opaque 1024-block */ |
| 548 | rts_list[0].chunknown.pointer = transfer.gpu; |
| 549 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 550 | memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd)); |
| 551 | memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list)); |
| 552 | |
| 553 | return (transfer.gpu + 1024) | MALI_MFBD; |
| 554 | } |
| 555 | |
| 556 | static mali_ptr |
| 557 | panfrost_attach_vt_sfbd(struct panfrost_context *ctx) |
| 558 | { |
| 559 | return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD; |
| 560 | } |
| 561 | |
| 562 | static void |
| 563 | panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) |
| 564 | { |
| 565 | mali_ptr framebuffer = require_sfbd ? |
| 566 | panfrost_attach_vt_sfbd(ctx) : |
| 567 | panfrost_attach_vt_mfbd(ctx); |
| 568 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 569 | ctx->payload_vertex.postfix.framebuffer = framebuffer; |
| 570 | ctx->payload_tiler.postfix.framebuffer = framebuffer; |
| 571 | } |
| 572 | |
| 573 | static void |
| 574 | panfrost_viewport(struct panfrost_context *ctx, |
Alyssa Rosenzweig | 49985ce | 2019-02-15 06:45:07 +0000 | [diff] [blame] | 575 | float depth_clip_near, |
| 576 | float depth_clip_far, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 577 | int viewport_x0, int viewport_y0, |
| 578 | int viewport_x1, int viewport_y1) |
| 579 | { |
Alyssa Rosenzweig | 49985ce | 2019-02-15 06:45:07 +0000 | [diff] [blame] | 580 | /* Clip bounds are encoded as floats. The viewport itself is encoded as |
| 581 | * (somewhat) asymmetric ints. */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 582 | |
| 583 | struct mali_viewport ret = { |
Alyssa Rosenzweig | 2c6a7fb | 2019-02-16 01:04:07 +0000 | [diff] [blame] | 584 | /* By default, do no viewport clipping, i.e. clip to (-inf, |
| 585 | * inf) in each direction. Clipping to the viewport in theory |
| 586 | * should work, but in practice causes issues when we're not |
| 587 | * explicitly trying to scissor */ |
| 588 | |
| 589 | .clip_minx = -inff, |
| 590 | .clip_miny = -inff, |
| 591 | .clip_maxx = inff, |
| 592 | .clip_maxy = inff, |
| 593 | |
| 594 | /* We always perform depth clipping (TODO: Can this be disabled?) */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 595 | |
Alyssa Rosenzweig | 49985ce | 2019-02-15 06:45:07 +0000 | [diff] [blame] | 596 | .clip_minz = depth_clip_near, |
| 597 | .clip_maxz = depth_clip_far, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 598 | |
| 599 | .viewport0 = { viewport_x0, viewport_y0 }, |
| 600 | .viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) }, |
| 601 | }; |
| 602 | |
| 603 | memcpy(ctx->viewport, &ret, sizeof(ret)); |
| 604 | } |
| 605 | |
| 606 | /* Reset per-frame context, called on context initialisation as well as after |
| 607 | * flushing a frame */ |
| 608 | |
| 609 | static void |
| 610 | panfrost_invalidate_frame(struct panfrost_context *ctx) |
| 611 | { |
| 612 | unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset; |
| 613 | printf("Uploaded transient %d bytes\n", transient_count); |
| 614 | |
| 615 | /* Rotate cmdstream */ |
| 616 | if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) |
| 617 | ctx->cmdstream_i = 0; |
| 618 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 619 | if (require_sfbd) |
| 620 | ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); |
| 621 | else |
| 622 | ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); |
| 623 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 624 | panfrost_new_frag_framebuffer(ctx); |
| 625 | |
| 626 | /* Reset varyings allocated */ |
| 627 | ctx->varying_height = 0; |
| 628 | |
| 629 | /* The transient cmdstream is dirty every frame; the only bits worth preserving |
| 630 | * (textures, shaders, etc) are in other buffers anyways */ |
| 631 | |
| 632 | ctx->transient_pools[ctx->cmdstream_i].entry_index = 0; |
| 633 | ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0; |
| 634 | |
| 635 | /* Regenerate payloads */ |
| 636 | panfrost_attach_vt_framebuffer(ctx); |
| 637 | |
| 638 | if (ctx->rasterizer) |
| 639 | ctx->dirty |= PAN_DIRTY_RASTERIZER; |
| 640 | |
| 641 | /* XXX */ |
| 642 | ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; |
| 643 | } |
| 644 | |
| 645 | /* In practice, every field of these payloads should be configurable |
| 646 | * arbitrarily, which means these functions are basically catch-all's for |
| 647 | * as-of-yet unwavering unknowns */ |
| 648 | |
| 649 | static void |
| 650 | panfrost_emit_vertex_payload(struct panfrost_context *ctx) |
| 651 | { |
| 652 | struct midgard_payload_vertex_tiler payload = { |
| 653 | .prefix = { |
| 654 | .workgroups_z_shift = 32, |
| 655 | .workgroups_x_shift_2 = 0x2, |
| 656 | .workgroups_x_shift_3 = 0x5, |
| 657 | }, |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 658 | .gl_enables = 0x4 | (is_t6xx ? 0 : 0x2), |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 659 | }; |
| 660 | |
| 661 | memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); |
| 662 | } |
| 663 | |
| 664 | static void |
| 665 | panfrost_emit_tiler_payload(struct panfrost_context *ctx) |
| 666 | { |
| 667 | struct midgard_payload_vertex_tiler payload = { |
| 668 | .prefix = { |
| 669 | .workgroups_z_shift = 32, |
| 670 | .workgroups_x_shift_2 = 0x2, |
| 671 | .workgroups_x_shift_3 = 0x6, |
| 672 | |
| 673 | .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ |
| 674 | }, |
| 675 | }; |
| 676 | |
| 677 | /* Reserve the viewport */ |
| 678 | struct panfrost_transfer t = panfrost_allocate_chunk(ctx, sizeof(struct mali_viewport), HEAP_DESCRIPTOR); |
| 679 | ctx->viewport = (struct mali_viewport *) t.cpu; |
| 680 | payload.postfix.viewport = t.gpu; |
| 681 | |
| 682 | memcpy(&ctx->payload_tiler, &payload, sizeof(payload)); |
| 683 | } |
| 684 | |
| 685 | static unsigned |
| 686 | translate_tex_wrap(enum pipe_tex_wrap w) |
| 687 | { |
| 688 | switch (w) { |
| 689 | case PIPE_TEX_WRAP_REPEAT: |
| 690 | return MALI_WRAP_REPEAT; |
| 691 | |
| 692 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
| 693 | return MALI_WRAP_CLAMP_TO_EDGE; |
| 694 | |
| 695 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
| 696 | return MALI_WRAP_CLAMP_TO_BORDER; |
| 697 | |
| 698 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
| 699 | return MALI_WRAP_MIRRORED_REPEAT; |
| 700 | |
| 701 | default: |
| 702 | assert(0); |
| 703 | return 0; |
| 704 | } |
| 705 | } |
| 706 | |
| 707 | static unsigned |
| 708 | translate_tex_filter(enum pipe_tex_filter f) |
| 709 | { |
| 710 | switch (f) { |
| 711 | case PIPE_TEX_FILTER_NEAREST: |
| 712 | return MALI_NEAREST; |
| 713 | |
| 714 | case PIPE_TEX_FILTER_LINEAR: |
| 715 | return MALI_LINEAR; |
| 716 | |
| 717 | default: |
| 718 | assert(0); |
| 719 | return 0; |
| 720 | } |
| 721 | } |
| 722 | |
| 723 | static unsigned |
| 724 | translate_mip_filter(enum pipe_tex_mipfilter f) |
| 725 | { |
| 726 | return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0; |
| 727 | } |
| 728 | |
| 729 | static unsigned |
| 730 | panfrost_translate_compare_func(enum pipe_compare_func in) |
| 731 | { |
| 732 | switch (in) { |
| 733 | case PIPE_FUNC_NEVER: |
| 734 | return MALI_FUNC_NEVER; |
| 735 | |
| 736 | case PIPE_FUNC_LESS: |
| 737 | return MALI_FUNC_LESS; |
| 738 | |
| 739 | case PIPE_FUNC_EQUAL: |
| 740 | return MALI_FUNC_EQUAL; |
| 741 | |
| 742 | case PIPE_FUNC_LEQUAL: |
| 743 | return MALI_FUNC_LEQUAL; |
| 744 | |
| 745 | case PIPE_FUNC_GREATER: |
| 746 | return MALI_FUNC_GREATER; |
| 747 | |
| 748 | case PIPE_FUNC_NOTEQUAL: |
| 749 | return MALI_FUNC_NOTEQUAL; |
| 750 | |
| 751 | case PIPE_FUNC_GEQUAL: |
| 752 | return MALI_FUNC_GEQUAL; |
| 753 | |
| 754 | case PIPE_FUNC_ALWAYS: |
| 755 | return MALI_FUNC_ALWAYS; |
| 756 | } |
| 757 | |
| 758 | assert (0); |
| 759 | return 0; /* Unreachable */ |
| 760 | } |
| 761 | |
| 762 | static unsigned |
| 763 | panfrost_translate_alt_compare_func(enum pipe_compare_func in) |
| 764 | { |
| 765 | switch (in) { |
| 766 | case PIPE_FUNC_NEVER: |
| 767 | return MALI_ALT_FUNC_NEVER; |
| 768 | |
| 769 | case PIPE_FUNC_LESS: |
| 770 | return MALI_ALT_FUNC_LESS; |
| 771 | |
| 772 | case PIPE_FUNC_EQUAL: |
| 773 | return MALI_ALT_FUNC_EQUAL; |
| 774 | |
| 775 | case PIPE_FUNC_LEQUAL: |
| 776 | return MALI_ALT_FUNC_LEQUAL; |
| 777 | |
| 778 | case PIPE_FUNC_GREATER: |
| 779 | return MALI_ALT_FUNC_GREATER; |
| 780 | |
| 781 | case PIPE_FUNC_NOTEQUAL: |
| 782 | return MALI_ALT_FUNC_NOTEQUAL; |
| 783 | |
| 784 | case PIPE_FUNC_GEQUAL: |
| 785 | return MALI_ALT_FUNC_GEQUAL; |
| 786 | |
| 787 | case PIPE_FUNC_ALWAYS: |
| 788 | return MALI_ALT_FUNC_ALWAYS; |
| 789 | } |
| 790 | |
| 791 | assert (0); |
| 792 | return 0; /* Unreachable */ |
| 793 | } |
| 794 | |
| 795 | static unsigned |
| 796 | panfrost_translate_stencil_op(enum pipe_stencil_op in) |
| 797 | { |
| 798 | switch (in) { |
| 799 | case PIPE_STENCIL_OP_KEEP: |
| 800 | return MALI_STENCIL_KEEP; |
| 801 | |
| 802 | case PIPE_STENCIL_OP_ZERO: |
| 803 | return MALI_STENCIL_ZERO; |
| 804 | |
| 805 | case PIPE_STENCIL_OP_REPLACE: |
| 806 | return MALI_STENCIL_REPLACE; |
| 807 | |
| 808 | case PIPE_STENCIL_OP_INCR: |
| 809 | return MALI_STENCIL_INCR; |
| 810 | |
| 811 | case PIPE_STENCIL_OP_DECR: |
| 812 | return MALI_STENCIL_DECR; |
| 813 | |
| 814 | case PIPE_STENCIL_OP_INCR_WRAP: |
| 815 | return MALI_STENCIL_INCR_WRAP; |
| 816 | |
| 817 | case PIPE_STENCIL_OP_DECR_WRAP: |
| 818 | return MALI_STENCIL_DECR_WRAP; |
| 819 | |
| 820 | case PIPE_STENCIL_OP_INVERT: |
| 821 | return MALI_STENCIL_INVERT; |
| 822 | } |
| 823 | |
| 824 | assert (0); |
| 825 | return 0; /* Unreachable */ |
| 826 | } |
| 827 | |
| 828 | static void |
| 829 | panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out) |
| 830 | { |
| 831 | out->ref = 0; /* Gallium gets it from elsewhere */ |
| 832 | |
| 833 | out->mask = in->valuemask; |
| 834 | out->func = panfrost_translate_compare_func(in->func); |
| 835 | out->sfail = panfrost_translate_stencil_op(in->fail_op); |
| 836 | out->dpfail = panfrost_translate_stencil_op(in->zfail_op); |
| 837 | out->dppass = panfrost_translate_stencil_op(in->zpass_op); |
| 838 | } |
| 839 | |
| 840 | static void |
| 841 | panfrost_default_shader_backend(struct panfrost_context *ctx) |
| 842 | { |
| 843 | struct mali_shader_meta shader = { |
| 844 | .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000), |
| 845 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 846 | .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 847 | .unknown2_4 = MALI_NO_MSAA | 0x4e0, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 848 | }; |
| 849 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 850 | if (is_t6xx) { |
| 851 | shader.unknown2_4 |= 0x10; |
| 852 | } |
| 853 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 854 | struct pipe_stencil_state default_stencil = { |
| 855 | .enabled = 0, |
| 856 | .func = PIPE_FUNC_ALWAYS, |
| 857 | .fail_op = MALI_STENCIL_KEEP, |
| 858 | .zfail_op = MALI_STENCIL_KEEP, |
| 859 | .zpass_op = MALI_STENCIL_KEEP, |
| 860 | .writemask = 0xFF, |
| 861 | .valuemask = 0xFF |
| 862 | }; |
| 863 | |
| 864 | panfrost_make_stencil_state(&default_stencil, &shader.stencil_front); |
| 865 | shader.stencil_mask_front = default_stencil.writemask; |
| 866 | |
| 867 | panfrost_make_stencil_state(&default_stencil, &shader.stencil_back); |
| 868 | shader.stencil_mask_back = default_stencil.writemask; |
| 869 | |
| 870 | if (default_stencil.enabled) |
| 871 | shader.unknown2_4 |= MALI_STENCIL_TEST; |
| 872 | |
| 873 | memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); |
| 874 | } |
| 875 | |
| 876 | /* Generates a vertex/tiler job. This is, in some sense, the heart of the |
| 877 | * graphics command stream. It should be called once per draw, accordding to |
| 878 | * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in |
| 879 | * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for |
| 880 | * vertex jobs. */ |
| 881 | |
| 882 | struct panfrost_transfer |
| 883 | panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler) |
| 884 | { |
| 885 | /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */ |
| 886 | int draw_job_index = 1 + (2 * ctx->draw_count); |
| 887 | |
| 888 | struct mali_job_descriptor_header job = { |
| 889 | .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, |
| 890 | .job_index = draw_job_index + (is_tiler ? 1 : 0), |
| 891 | #ifdef __LP64__ |
| 892 | .job_descriptor_size = 1, |
| 893 | #endif |
| 894 | }; |
| 895 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 896 | /* Only non-elided tiler jobs have dependencies which are known at this point */ |
| 897 | |
| 898 | if (is_tiler && !is_elided_tiler) { |
| 899 | /* Tiler jobs depend on vertex jobs */ |
| 900 | |
| 901 | job.job_dependency_index_1 = draw_job_index; |
| 902 | |
| 903 | /* Tiler jobs also depend on the previous tiler job */ |
| 904 | |
| 905 | if (ctx->draw_count) |
| 906 | job.job_dependency_index_2 = draw_job_index - 1; |
| 907 | } |
| 908 | |
| 909 | struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex; |
| 910 | |
| 911 | /* There's some padding hacks on 32-bit */ |
| 912 | |
| 913 | #ifdef __LP64__ |
| 914 | int offset = 0; |
| 915 | #else |
| 916 | int offset = 4; |
| 917 | #endif |
| 918 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); |
| 919 | memcpy(transfer.cpu, &job, sizeof(job)); |
| 920 | memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload)); |
| 921 | return transfer; |
| 922 | } |
| 923 | |
| 924 | /* Generates a set value job. It's unclear what exactly this does, why it's |
| 925 | * necessary, and when to call it. */ |
| 926 | |
| 927 | static void |
| 928 | panfrost_set_value_job(struct panfrost_context *ctx) |
| 929 | { |
| 930 | struct mali_job_descriptor_header job = { |
| 931 | .job_type = JOB_TYPE_SET_VALUE, |
| 932 | .job_descriptor_size = 1, |
| 933 | .job_index = 1 + (2 * ctx->draw_count), |
| 934 | }; |
| 935 | |
| 936 | struct mali_payload_set_value payload = { |
| 937 | .out = ctx->misc_0.gpu, |
| 938 | .unknown = 0x3, |
| 939 | }; |
| 940 | |
| 941 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload)); |
| 942 | memcpy(transfer.cpu, &job, sizeof(job)); |
| 943 | memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload)); |
| 944 | |
| 945 | ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu; |
| 946 | ctx->set_value_job = transfer.gpu; |
| 947 | } |
| 948 | |
| 949 | /* Generate a fragment job. This should be called once per frame. (According to |
| 950 | * presentations, this is supposed to correspond to eglSwapBuffers) */ |
| 951 | |
| 952 | mali_ptr |
| 953 | panfrost_fragment_job(struct panfrost_context *ctx) |
| 954 | { |
Alyssa Rosenzweig | 40ffee4 | 2019-02-26 23:51:34 +0000 | [diff] [blame] | 955 | struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); |
| 956 | |
| 957 | /* Actualize the clear late; TODO: Fix order dependency between clear |
| 958 | * and afbc */ |
| 959 | |
| 960 | if (require_sfbd) { |
| 961 | panfrost_clear_sfbd(job); |
| 962 | } else { |
| 963 | panfrost_clear_mfbd(job); |
| 964 | } |
| 965 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 966 | panfrost_set_fragment_afbc(ctx); |
| 967 | |
| 968 | if (ctx->pipe_framebuffer.nr_cbufs == 1) { |
| 969 | struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 970 | |
| 971 | if (rsrc->bo->has_checksum) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 972 | if (require_sfbd) { |
| 973 | fprintf(stderr, "Checksumming not supported on SFBD\n"); |
| 974 | assert(0); |
| 975 | } |
| 976 | |
| 977 | int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0); |
| 978 | |
| 979 | ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 980 | ctx->fragment_extra.unk |= 0x420; |
| 981 | ctx->fragment_extra.checksum_stride = rsrc->bo->checksum_stride; |
| 982 | ctx->fragment_extra.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0; |
| 983 | } |
| 984 | } |
| 985 | |
| 986 | /* The frame is complete and therefore the framebuffer descriptor is |
| 987 | * ready for linkage and upload */ |
| 988 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 989 | size_t sz = require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 990 | struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz); |
| 991 | off_t offset = 0; |
| 992 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 993 | if (require_sfbd) { |
| 994 | /* Upload just the SFBD all at once */ |
| 995 | memcpy(fbd_t.cpu, &ctx->fragment_sfbd, sizeof(ctx->fragment_sfbd)); |
| 996 | offset += sizeof(ctx->fragment_sfbd); |
| 997 | } else { |
| 998 | /* Upload the MFBD header */ |
| 999 | memcpy(fbd_t.cpu, &ctx->fragment_mfbd, sizeof(ctx->fragment_mfbd)); |
| 1000 | offset += sizeof(ctx->fragment_mfbd); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1001 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1002 | /* Upload extra framebuffer info if necessary */ |
| 1003 | if (ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { |
| 1004 | memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra)); |
| 1005 | offset += sizeof(struct bifrost_fb_extra); |
| 1006 | } |
| 1007 | |
| 1008 | /* Upload (single) render target */ |
| 1009 | memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1010 | } |
| 1011 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1012 | /* Generate the fragment (frame) job */ |
| 1013 | |
| 1014 | struct mali_job_descriptor_header header = { |
| 1015 | .job_type = JOB_TYPE_FRAGMENT, |
| 1016 | .job_index = 1, |
| 1017 | #ifdef __LP64__ |
| 1018 | .job_descriptor_size = 1 |
| 1019 | #endif |
| 1020 | }; |
| 1021 | |
| 1022 | struct mali_payload_fragment payload = { |
| 1023 | .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0), |
| 1024 | .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height), |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1025 | .framebuffer = fbd_t.gpu | (require_sfbd ? MALI_SFBD : MALI_MFBD), |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1026 | }; |
| 1027 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1028 | if (!require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { |
| 1029 | /* Signal that there is an extra portion of the framebuffer |
| 1030 | * descriptor */ |
| 1031 | |
| 1032 | payload.framebuffer |= 2; |
| 1033 | } |
| 1034 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1035 | /* Normally, there should be no padding. However, fragment jobs are |
| 1036 | * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes |
| 1037 | * of zero padding in between. */ |
| 1038 | |
| 1039 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(header) + sizeof(payload)); |
| 1040 | memcpy(transfer.cpu, &header, sizeof(header)); |
| 1041 | memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload)); |
| 1042 | return transfer.gpu; |
| 1043 | } |
| 1044 | |
| 1045 | /* Emits attributes and varying descriptors, which should be called every draw, |
| 1046 | * excepting some obscure circumstances */ |
| 1047 | |
| 1048 | static void |
| 1049 | panfrost_emit_vertex_data(struct panfrost_context *ctx) |
| 1050 | { |
| 1051 | /* TODO: Only update the dirtied buffers */ |
| 1052 | union mali_attr attrs[PIPE_MAX_ATTRIBS]; |
| 1053 | union mali_attr varyings[PIPE_MAX_ATTRIBS]; |
| 1054 | |
| 1055 | unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count); |
| 1056 | |
| 1057 | for (int i = 0; i < ctx->vertex_buffer_count; ++i) { |
| 1058 | struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; |
| 1059 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); |
| 1060 | |
| 1061 | /* Let's figure out the layout of the attributes in memory so |
| 1062 | * we can be smart about size computation. The idea is to |
| 1063 | * figure out the maximum src_offset, which tells us the latest |
| 1064 | * spot a vertex could start. Meanwhile, we figure out the size |
| 1065 | * of the attribute memory (assuming interleaved |
| 1066 | * representation) and tack on the max src_offset for a |
| 1067 | * reasonably good upper bound on the size. |
| 1068 | * |
| 1069 | * Proving correctness is left as an exercise to the reader. |
| 1070 | */ |
| 1071 | |
| 1072 | unsigned max_src_offset = 0; |
| 1073 | |
| 1074 | for (unsigned j = 0; j < ctx->vertex->num_elements; ++j) { |
| 1075 | if (ctx->vertex->pipe[j].vertex_buffer_index != i) continue; |
| 1076 | max_src_offset = MAX2(max_src_offset, ctx->vertex->pipe[j].src_offset); |
| 1077 | } |
| 1078 | |
| 1079 | /* Offset vertex count by draw_start to make sure we upload enough */ |
| 1080 | attrs[i].stride = buf->stride; |
| 1081 | attrs[i].size = buf->stride * (ctx->payload_vertex.draw_start + invocation_count) + max_src_offset; |
| 1082 | |
| 1083 | /* Vertex elements are -already- GPU-visible, at |
| 1084 | * rsrc->gpu. However, attribute buffers must be 64 aligned. If |
| 1085 | * it is not, for now we have to duplicate the buffer. */ |
| 1086 | |
| 1087 | mali_ptr effective_address = (rsrc->bo->gpu[0] + buf->buffer_offset); |
| 1088 | |
| 1089 | if (effective_address & 0x3F) { |
| 1090 | attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu[0] + buf->buffer_offset, attrs[i].size) | 1; |
| 1091 | } else { |
| 1092 | attrs[i].elements = effective_address | 1; |
| 1093 | } |
| 1094 | } |
| 1095 | |
| 1096 | struct panfrost_varyings *vars = &ctx->vs->variants[ctx->vs->active_variant].varyings; |
| 1097 | |
| 1098 | for (int i = 0; i < vars->varying_buffer_count; ++i) { |
| 1099 | mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; |
| 1100 | |
| 1101 | varyings[i].elements = varying_address | 1; |
| 1102 | varyings[i].stride = vars->varyings_stride[i]; |
| 1103 | varyings[i].size = vars->varyings_stride[i] * invocation_count; |
| 1104 | |
| 1105 | /* If this varying has to be linked somewhere, do it now. See |
| 1106 | * pan_assemble.c for the indices. TODO: Use a more generic |
| 1107 | * linking interface */ |
| 1108 | |
| 1109 | if (i == 1) { |
| 1110 | /* gl_Position */ |
| 1111 | ctx->payload_tiler.postfix.position_varying = varying_address; |
| 1112 | } else if (i == 2) { |
| 1113 | /* gl_PointSize */ |
| 1114 | ctx->payload_tiler.primitive_size.pointer = varying_address; |
| 1115 | } |
| 1116 | |
| 1117 | /* Varyings appear to need 64-byte alignment */ |
| 1118 | ctx->varying_height += ALIGN(varyings[i].size, 64); |
| 1119 | |
| 1120 | /* Ensure that we fit */ |
| 1121 | assert(ctx->varying_height < ctx->varying_mem.size); |
| 1122 | } |
| 1123 | |
| 1124 | ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr)); |
| 1125 | |
| 1126 | mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, vars->varying_buffer_count * sizeof(union mali_attr)); |
| 1127 | ctx->payload_vertex.postfix.varyings = varyings_p; |
| 1128 | ctx->payload_tiler.postfix.varyings = varyings_p; |
| 1129 | } |
| 1130 | |
| 1131 | /* Go through dirty flags and actualise them in the cmdstream. */ |
| 1132 | |
| 1133 | void |
| 1134 | panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) |
| 1135 | { |
| 1136 | if (with_vertex_data) { |
| 1137 | panfrost_emit_vertex_data(ctx); |
| 1138 | } |
| 1139 | |
| 1140 | if (ctx->dirty & PAN_DIRTY_RASTERIZER) { |
| 1141 | ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; |
| 1142 | panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); |
| 1143 | } |
| 1144 | |
| 1145 | if (ctx->occlusion_query) { |
Alyssa Rosenzweig | 2d22b53 | 2019-02-14 02:44:03 +0000 | [diff] [blame] | 1146 | ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1147 | ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; |
| 1148 | } |
| 1149 | |
| 1150 | if (ctx->dirty & PAN_DIRTY_VS) { |
| 1151 | assert(ctx->vs); |
| 1152 | |
| 1153 | struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; |
| 1154 | |
| 1155 | /* Late shader descriptor assignments */ |
| 1156 | vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; |
| 1157 | vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; |
| 1158 | |
| 1159 | /* Who knows */ |
| 1160 | vs->tripipe->midgard1.unknown1 = 0x2201; |
| 1161 | |
| 1162 | ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; |
| 1163 | |
| 1164 | /* Varying descriptor is tied to the vertex shader. Also the |
| 1165 | * fragment shader, I suppose, but it's generated with the |
| 1166 | * vertex shader so */ |
| 1167 | |
| 1168 | struct panfrost_varyings *varyings = &ctx->vs->variants[ctx->vs->active_variant].varyings; |
| 1169 | |
| 1170 | ctx->payload_vertex.postfix.varying_meta = varyings->varyings_descriptor; |
| 1171 | ctx->payload_tiler.postfix.varying_meta = varyings->varyings_descriptor_fragment; |
| 1172 | } |
| 1173 | |
| 1174 | if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { |
| 1175 | /* Check if we need to link the gl_PointSize varying */ |
| 1176 | assert(ctx->vs); |
| 1177 | struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; |
| 1178 | |
| 1179 | bool needs_gl_point_size = vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS; |
| 1180 | |
| 1181 | if (!needs_gl_point_size) { |
| 1182 | /* If the size is constant, write it out. Otherwise, |
| 1183 | * don't touch primitive_size (since we would clobber |
| 1184 | * the pointer there) */ |
| 1185 | |
| 1186 | ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width; |
| 1187 | } |
| 1188 | |
| 1189 | /* Set the flag for varying (pointer) point size if the shader needs that */ |
| 1190 | SET_BIT(ctx->payload_tiler.prefix.unknown_draw, MALI_DRAW_VARYING_SIZE, needs_gl_point_size); |
| 1191 | } |
| 1192 | |
| 1193 | /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ |
| 1194 | if (ctx->fs) |
| 1195 | ctx->dirty |= PAN_DIRTY_FS; |
| 1196 | |
| 1197 | if (ctx->dirty & PAN_DIRTY_FS) { |
| 1198 | assert(ctx->fs); |
| 1199 | struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant]; |
| 1200 | |
| 1201 | #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name |
| 1202 | |
| 1203 | COPY(shader); |
| 1204 | COPY(attribute_count); |
| 1205 | COPY(varying_count); |
| 1206 | COPY(midgard1.uniform_count); |
| 1207 | COPY(midgard1.work_count); |
| 1208 | COPY(midgard1.unknown2); |
| 1209 | |
| 1210 | #undef COPY |
| 1211 | /* If there is a blend shader, work registers are shared */ |
| 1212 | |
| 1213 | if (ctx->blend->has_blend_shader) |
| 1214 | ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; |
| 1215 | |
| 1216 | /* Set late due to depending on render state */ |
| 1217 | /* The one at the end seems to mean "1 UBO" */ |
| 1218 | ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201; |
| 1219 | |
| 1220 | /* Assign texture/sample count right before upload */ |
| 1221 | ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]; |
| 1222 | ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT]; |
| 1223 | |
| 1224 | /* Assign the stencil refs late */ |
| 1225 | ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0]; |
| 1226 | ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1]; |
| 1227 | |
| 1228 | /* CAN_DISCARD should be set if the fragment shader possibly |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1229 | * contains a 'discard' instruction. It is likely this is |
| 1230 | * related to optimizations related to forward-pixel kill, as |
| 1231 | * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good |
| 1232 | * thing?" by Peter Harris |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1233 | */ |
| 1234 | |
| 1235 | if (variant->can_discard) { |
| 1236 | ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; |
| 1237 | ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE; |
| 1238 | ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000; |
| 1239 | ctx->fragment_shader_core.midgard1.unknown1 = 0x4200; |
| 1240 | } |
| 1241 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1242 | /* Check if we're using the default blend descriptor (fast path) */ |
| 1243 | |
| 1244 | bool no_blending = |
| 1245 | !ctx->blend->has_blend_shader && |
| 1246 | (ctx->blend->equation.rgb_mode == 0x122) && |
| 1247 | (ctx->blend->equation.alpha_mode == 0x122) && |
| 1248 | (ctx->blend->equation.color_mask == 0xf); |
| 1249 | |
| 1250 | if (require_sfbd) { |
| 1251 | /* When only a single render target platform is used, the blend |
| 1252 | * information is inside the shader meta itself. We |
| 1253 | * additionally need to signal CAN_DISCARD for nontrivial blend |
| 1254 | * modes (so we're able to read back the destination buffer) */ |
| 1255 | |
| 1256 | if (ctx->blend->has_blend_shader) { |
| 1257 | ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader; |
| 1258 | } else { |
| 1259 | memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation)); |
| 1260 | } |
| 1261 | |
| 1262 | if (!no_blending) { |
| 1263 | ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; |
| 1264 | } |
| 1265 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1266 | |
| 1267 | size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta); |
| 1268 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); |
| 1269 | memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta)); |
| 1270 | |
| 1271 | ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; |
| 1272 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1273 | if (!require_sfbd) { |
| 1274 | /* Additional blend descriptor tacked on for jobs using MFBD */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1275 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1276 | unsigned blend_count = 0; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1277 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1278 | if (ctx->blend->has_blend_shader) { |
| 1279 | /* For a blend shader, the bottom nibble corresponds to |
| 1280 | * the number of work registers used, which signals the |
| 1281 | * -existence- of a blend shader */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1282 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1283 | assert(ctx->blend->blend_work_count >= 2); |
| 1284 | blend_count |= MIN2(ctx->blend->blend_work_count, 3); |
| 1285 | } else { |
| 1286 | /* Otherwise, the bottom bit simply specifies if |
| 1287 | * blending (anything other than REPLACE) is enabled */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1288 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1289 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1290 | if (!no_blending) |
| 1291 | blend_count |= 0x1; |
| 1292 | } |
| 1293 | |
| 1294 | /* Second blend equation is always a simple replace */ |
| 1295 | |
| 1296 | uint64_t replace_magic = 0xf0122122; |
| 1297 | struct mali_blend_equation replace_mode; |
| 1298 | memcpy(&replace_mode, &replace_magic, sizeof(replace_mode)); |
| 1299 | |
| 1300 | struct mali_blend_meta blend_meta[] = { |
| 1301 | { |
| 1302 | .unk1 = 0x200 | blend_count, |
| 1303 | .blend_equation_1 = ctx->blend->equation, |
| 1304 | .blend_equation_2 = replace_mode |
| 1305 | }, |
| 1306 | }; |
| 1307 | |
| 1308 | if (ctx->blend->has_blend_shader) |
| 1309 | memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader)); |
| 1310 | |
| 1311 | memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta)); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1312 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1313 | } |
| 1314 | |
| 1315 | if (ctx->dirty & PAN_DIRTY_VERTEX) { |
| 1316 | ctx->payload_vertex.postfix.attribute_meta = ctx->vertex->descriptor_ptr; |
| 1317 | } |
| 1318 | |
| 1319 | if (ctx->dirty & PAN_DIRTY_SAMPLERS) { |
| 1320 | /* Upload samplers back to back, no padding */ |
| 1321 | |
| 1322 | for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { |
| 1323 | if (!ctx->sampler_count[t]) continue; |
| 1324 | |
| 1325 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]); |
| 1326 | struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; |
| 1327 | |
| 1328 | for (int i = 0; i < ctx->sampler_count[t]; ++i) { |
| 1329 | desc[i] = ctx->samplers[t][i]->hw; |
| 1330 | } |
| 1331 | |
| 1332 | if (t == PIPE_SHADER_FRAGMENT) |
| 1333 | ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu; |
| 1334 | else if (t == PIPE_SHADER_VERTEX) |
| 1335 | ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu; |
| 1336 | else |
| 1337 | assert(0); |
| 1338 | } |
| 1339 | } |
| 1340 | |
| 1341 | if (ctx->dirty & PAN_DIRTY_TEXTURES) { |
| 1342 | for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { |
| 1343 | /* Shortcircuit */ |
| 1344 | if (!ctx->sampler_view_count[t]) continue; |
| 1345 | |
| 1346 | uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; |
| 1347 | |
| 1348 | for (int i = 0; i < ctx->sampler_view_count[t]; ++i) { |
| 1349 | if (!ctx->sampler_views[t][i]) |
| 1350 | continue; |
| 1351 | |
| 1352 | struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture; |
| 1353 | struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc; |
| 1354 | |
| 1355 | /* Inject the address in. */ |
| 1356 | for (int l = 0; l < (tex_rsrc->last_level + 1); ++l) |
| 1357 | ctx->sampler_views[t][i]->hw.swizzled_bitmaps[l] = rsrc->bo->gpu[l]; |
| 1358 | |
| 1359 | /* Workaround maybe-errata (?) with non-mipmaps */ |
| 1360 | int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels; |
| 1361 | |
| 1362 | if (!rsrc->bo->is_mipmap) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1363 | if (is_t6xx) { |
| 1364 | /* HW ERRATA, not needed after t6XX */ |
| 1365 | ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0]; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1366 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1367 | ctx->sampler_views[t][i]->hw.unknown3A = 1; |
| 1368 | } |
| 1369 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1370 | ctx->sampler_views[t][i]->hw.nr_mipmap_levels = 0; |
| 1371 | } |
| 1372 | |
| 1373 | trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor)); |
| 1374 | |
| 1375 | /* Restore */ |
| 1376 | ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s; |
| 1377 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1378 | if (is_t6xx) { |
| 1379 | ctx->sampler_views[t][i]->hw.unknown3A = 0; |
| 1380 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1381 | } |
| 1382 | |
| 1383 | mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); |
| 1384 | |
| 1385 | if (t == PIPE_SHADER_FRAGMENT) |
| 1386 | ctx->payload_tiler.postfix.texture_trampoline = trampoline; |
| 1387 | else if (t == PIPE_SHADER_VERTEX) |
| 1388 | ctx->payload_vertex.postfix.texture_trampoline = trampoline; |
| 1389 | else |
| 1390 | assert(0); |
| 1391 | } |
| 1392 | } |
| 1393 | |
| 1394 | /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */ |
| 1395 | const struct pipe_viewport_state *vp = &ctx->pipe_viewport; |
| 1396 | |
| 1397 | float viewport_vec4[] = { |
| 1398 | vp->scale[0], |
| 1399 | fabsf(vp->scale[1]), |
| 1400 | |
| 1401 | vp->translate[0], |
| 1402 | /* -1.0 * vp->translate[1] */ fabs(1.0 * vp->scale[1]) /* XXX */ |
| 1403 | }; |
| 1404 | |
| 1405 | for (int i = 0; i < PIPE_SHADER_TYPES; ++i) { |
| 1406 | struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i]; |
| 1407 | |
| 1408 | if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) { |
| 1409 | /* It doesn't matter if we don't use all the memory; |
| 1410 | * we'd need a dummy UBO anyway. Compute the max */ |
| 1411 | |
| 1412 | size_t size = sizeof(viewport_vec4) + buf->size; |
| 1413 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); |
| 1414 | |
| 1415 | /* Keep track how much we've uploaded */ |
| 1416 | off_t offset = 0; |
| 1417 | |
| 1418 | if (i == PIPE_SHADER_VERTEX) { |
| 1419 | /* Upload viewport */ |
| 1420 | memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4)); |
| 1421 | offset += sizeof(viewport_vec4); |
| 1422 | } |
| 1423 | |
| 1424 | /* Upload uniforms */ |
| 1425 | memcpy(transfer.cpu + offset, buf->buffer, buf->size); |
| 1426 | |
| 1427 | int uniform_count = 0; |
| 1428 | |
| 1429 | struct mali_vertex_tiler_postfix *postfix; |
| 1430 | |
| 1431 | switch (i) { |
| 1432 | case PIPE_SHADER_VERTEX: |
| 1433 | uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count; |
| 1434 | postfix = &ctx->payload_vertex.postfix; |
| 1435 | break; |
| 1436 | |
| 1437 | case PIPE_SHADER_FRAGMENT: |
| 1438 | uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count; |
| 1439 | postfix = &ctx->payload_tiler.postfix; |
| 1440 | break; |
| 1441 | |
| 1442 | default: |
| 1443 | printf("Unknown shader stage %d in uniform upload\n", i); |
| 1444 | assert(0); |
| 1445 | } |
| 1446 | |
| 1447 | /* Also attach the same buffer as a UBO for extended access */ |
| 1448 | |
| 1449 | struct mali_uniform_buffer_meta uniform_buffers[] = { |
| 1450 | { |
| 1451 | .size = MALI_POSITIVE((2 + uniform_count)), |
| 1452 | .ptr = transfer.gpu >> 2, |
| 1453 | }, |
| 1454 | }; |
| 1455 | |
| 1456 | mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers)); |
| 1457 | postfix->uniforms = transfer.gpu; |
| 1458 | postfix->uniform_buffers = ubufs; |
| 1459 | |
| 1460 | buf->dirty = 0; |
| 1461 | } |
| 1462 | } |
| 1463 | |
| 1464 | ctx->dirty = 0; |
| 1465 | } |
| 1466 | |
| 1467 | /* Corresponds to exactly one draw, but does not submit anything */ |
| 1468 | |
| 1469 | static void |
| 1470 | panfrost_queue_draw(struct panfrost_context *ctx) |
| 1471 | { |
| 1472 | /* TODO: Expand the array? */ |
| 1473 | if (ctx->draw_count >= MAX_DRAW_CALLS) { |
| 1474 | printf("Job buffer overflow, ignoring draw\n"); |
| 1475 | assert(0); |
| 1476 | } |
| 1477 | |
| 1478 | /* Handle dirty flags now */ |
| 1479 | panfrost_emit_for_draw(ctx, true); |
| 1480 | |
| 1481 | struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false); |
| 1482 | struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false); |
| 1483 | |
| 1484 | ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu; |
| 1485 | ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu; |
| 1486 | |
| 1487 | ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu; |
| 1488 | ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu; |
| 1489 | |
| 1490 | ctx->draw_count++; |
| 1491 | } |
| 1492 | |
| 1493 | /* At the end of the frame, the vertex and tiler jobs are linked together and |
| 1494 | * then the fragment job is plonked at the end. Set value job is first for |
| 1495 | * unknown reasons. */ |
| 1496 | |
| 1497 | static void |
| 1498 | panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next) |
| 1499 | { |
| 1500 | if (first->job_descriptor_size) |
| 1501 | first->next_job_64 = (u64) (uintptr_t) next; |
| 1502 | else |
| 1503 | first->next_job_32 = (u32) (uintptr_t) next; |
| 1504 | } |
| 1505 | |
| 1506 | static void |
| 1507 | panfrost_link_jobs(struct panfrost_context *ctx) |
| 1508 | { |
| 1509 | if (ctx->draw_count) { |
| 1510 | /* Generate the set_value_job */ |
| 1511 | panfrost_set_value_job(ctx); |
| 1512 | |
| 1513 | /* Have the first vertex job depend on the set value job */ |
| 1514 | ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index; |
| 1515 | |
| 1516 | /* SV -> V */ |
| 1517 | panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]); |
| 1518 | } |
| 1519 | |
| 1520 | /* V -> V/T ; T -> T/null */ |
| 1521 | for (int i = 0; i < ctx->vertex_job_count; ++i) { |
| 1522 | bool isLast = (i + 1) == ctx->vertex_job_count; |
| 1523 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1524 | panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1525 | } |
| 1526 | |
| 1527 | /* T -> T/null */ |
| 1528 | for (int i = 0; i < ctx->tiler_job_count; ++i) { |
| 1529 | bool isLast = (i + 1) == ctx->tiler_job_count; |
| 1530 | panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]); |
| 1531 | } |
| 1532 | } |
| 1533 | |
| 1534 | /* The entire frame is in memory -- send it off to the kernel! */ |
| 1535 | |
| 1536 | static void |
| 1537 | panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) |
| 1538 | { |
| 1539 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 1540 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 1541 | |
| 1542 | /* Edge case if screen is cleared and nothing else */ |
| 1543 | bool has_draws = ctx->draw_count > 0; |
| 1544 | |
| 1545 | /* Workaround a bizarre lockup (a hardware errata?) */ |
| 1546 | if (!has_draws) |
| 1547 | flush_immediate = true; |
| 1548 | |
| 1549 | /* A number of jobs are batched -- this must be linked and cleared */ |
| 1550 | panfrost_link_jobs(ctx); |
| 1551 | |
| 1552 | ctx->draw_count = 0; |
| 1553 | ctx->vertex_job_count = 0; |
| 1554 | ctx->tiler_job_count = 0; |
| 1555 | |
| 1556 | #ifndef DRY_RUN |
| 1557 | |
Alyssa Rosenzweig | d43ec10 | 2019-02-05 05:13:50 +0000 | [diff] [blame] | 1558 | bool is_scanout = panfrost_is_scanout(ctx); |
| 1559 | int fragment_id = screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1560 | |
| 1561 | /* If visual, we can stall a frame */ |
| 1562 | |
| 1563 | if (panfrost_is_scanout(ctx) && !flush_immediate) |
| 1564 | screen->driver->force_flush_fragment(ctx); |
| 1565 | |
| 1566 | screen->last_fragment_id = fragment_id; |
| 1567 | screen->last_fragment_flushed = false; |
| 1568 | |
| 1569 | /* If readback, flush now (hurts the pipelined performance) */ |
| 1570 | if (panfrost_is_scanout(ctx) && flush_immediate) |
| 1571 | screen->driver->force_flush_fragment(ctx); |
| 1572 | |
Alyssa Rosenzweig | 4c82abb | 2019-02-25 03:31:29 +0000 | [diff] [blame] | 1573 | if (screen->driver->dump_counters && pan_counters_base) { |
Alyssa Rosenzweig | 4a4726a | 2019-02-18 23:32:05 +0000 | [diff] [blame] | 1574 | screen->driver->dump_counters(screen); |
| 1575 | |
| 1576 | char filename[128]; |
Alyssa Rosenzweig | 4c82abb | 2019-02-25 03:31:29 +0000 | [diff] [blame] | 1577 | snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number); |
Alyssa Rosenzweig | 4a4726a | 2019-02-18 23:32:05 +0000 | [diff] [blame] | 1578 | FILE *fp = fopen(filename, "wb"); |
| 1579 | fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); |
| 1580 | fclose(fp); |
| 1581 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1582 | |
| 1583 | #endif |
| 1584 | } |
| 1585 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1586 | void |
| 1587 | panfrost_flush( |
| 1588 | struct pipe_context *pipe, |
| 1589 | struct pipe_fence_handle **fence, |
| 1590 | unsigned flags) |
| 1591 | { |
| 1592 | struct panfrost_context *ctx = pan_context(pipe); |
Alyssa Rosenzweig | c351cc4 | 2019-02-27 00:30:59 +0000 | [diff] [blame^] | 1593 | struct panfrost_job *job = panfrost_get_job_for_fbo(ctx); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1594 | |
Alyssa Rosenzweig | c351cc4 | 2019-02-27 00:30:59 +0000 | [diff] [blame^] | 1595 | /* Nothing to do! */ |
| 1596 | if (!ctx->draw_count && !job->clear) return; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1597 | |
| 1598 | /* Whether to stall the pipeline for immediately correct results */ |
| 1599 | bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; |
| 1600 | |
| 1601 | /* Submit the frame itself */ |
| 1602 | panfrost_submit_frame(ctx, flush_immediate); |
| 1603 | |
| 1604 | /* Prepare for the next frame */ |
| 1605 | panfrost_invalidate_frame(ctx); |
| 1606 | } |
| 1607 | |
| 1608 | #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; |
| 1609 | |
| 1610 | static int |
| 1611 | g2m_draw_mode(enum pipe_prim_type mode) |
| 1612 | { |
| 1613 | switch (mode) { |
| 1614 | DEFINE_CASE(POINTS); |
| 1615 | DEFINE_CASE(LINES); |
| 1616 | DEFINE_CASE(LINE_LOOP); |
| 1617 | DEFINE_CASE(LINE_STRIP); |
| 1618 | DEFINE_CASE(TRIANGLES); |
| 1619 | DEFINE_CASE(TRIANGLE_STRIP); |
| 1620 | DEFINE_CASE(TRIANGLE_FAN); |
| 1621 | DEFINE_CASE(QUADS); |
| 1622 | DEFINE_CASE(QUAD_STRIP); |
| 1623 | DEFINE_CASE(POLYGON); |
| 1624 | |
| 1625 | default: |
| 1626 | printf("Illegal draw mode %d\n", mode); |
| 1627 | assert(0); |
| 1628 | return MALI_LINE_LOOP; |
| 1629 | } |
| 1630 | } |
| 1631 | |
| 1632 | #undef DEFINE_CASE |
| 1633 | |
| 1634 | static unsigned |
| 1635 | panfrost_translate_index_size(unsigned size) |
| 1636 | { |
| 1637 | switch (size) { |
| 1638 | case 1: |
| 1639 | return MALI_DRAW_INDEXED_UINT8; |
| 1640 | |
| 1641 | case 2: |
| 1642 | return MALI_DRAW_INDEXED_UINT16; |
| 1643 | |
| 1644 | case 4: |
| 1645 | return MALI_DRAW_INDEXED_UINT32; |
| 1646 | |
| 1647 | default: |
| 1648 | printf("Unknown index size %d\n", size); |
| 1649 | assert(0); |
| 1650 | return 0; |
| 1651 | } |
| 1652 | } |
| 1653 | |
| 1654 | static const uint8_t * |
| 1655 | panfrost_get_index_buffer_raw(const struct pipe_draw_info *info) |
| 1656 | { |
| 1657 | if (info->has_user_indices) { |
| 1658 | return (const uint8_t *) info->index.user; |
| 1659 | } else { |
| 1660 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); |
| 1661 | return (const uint8_t *) rsrc->bo->cpu[0]; |
| 1662 | } |
| 1663 | } |
| 1664 | |
| 1665 | /* Gets a GPU address for the associated index buffer. Only gauranteed to be |
| 1666 | * good for the duration of the draw (transient), could last longer */ |
| 1667 | |
| 1668 | static mali_ptr |
| 1669 | panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info) |
| 1670 | { |
| 1671 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); |
| 1672 | |
| 1673 | off_t offset = info->start * info->index_size; |
| 1674 | |
| 1675 | if (!info->has_user_indices) { |
| 1676 | /* Only resources can be directly mapped */ |
| 1677 | return rsrc->bo->gpu[0] + offset; |
| 1678 | } else { |
| 1679 | /* Otherwise, we need to upload to transient memory */ |
| 1680 | const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info); |
| 1681 | return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size); |
| 1682 | } |
| 1683 | } |
| 1684 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1685 | #define CALCULATE_MIN_MAX_INDEX(T, buffer, start, count) \ |
| 1686 | for (unsigned _idx = (start); _idx < (start + count); ++_idx) { \ |
| 1687 | T idx = buffer[_idx]; \ |
| 1688 | if (idx > max_index) max_index = idx; \ |
| 1689 | if (idx < min_index) min_index = idx; \ |
| 1690 | } |
| 1691 | |
| 1692 | static void |
| 1693 | panfrost_draw_vbo( |
| 1694 | struct pipe_context *pipe, |
| 1695 | const struct pipe_draw_info *info) |
| 1696 | { |
| 1697 | struct panfrost_context *ctx = pan_context(pipe); |
| 1698 | |
| 1699 | ctx->payload_vertex.draw_start = info->start; |
| 1700 | ctx->payload_tiler.draw_start = info->start; |
| 1701 | |
| 1702 | int mode = info->mode; |
| 1703 | |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 1704 | /* Fallback for unsupported modes */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1705 | |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 1706 | if (!(ctx->draw_modes & mode)) { |
| 1707 | if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) { |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1708 | mode = PIPE_PRIM_TRIANGLE_FAN; |
| 1709 | } else { |
| 1710 | if (info->count < 4) { |
| 1711 | /* Degenerate case? */ |
| 1712 | return; |
| 1713 | } |
| 1714 | |
| 1715 | util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base); |
| 1716 | util_primconvert_draw_vbo(ctx->primconvert, info); |
| 1717 | return; |
| 1718 | } |
| 1719 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1720 | |
Alyssa Rosenzweig | 59c9623 | 2019-02-25 05:32:16 +0000 | [diff] [blame] | 1721 | /* Now that we have a guaranteed terminating path, find the job. |
| 1722 | * Assignment commented out to prevent unused warning */ |
| 1723 | |
| 1724 | /* struct panfrost_job *job = */ panfrost_get_job_for_fbo(ctx); |
| 1725 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1726 | ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode); |
| 1727 | |
| 1728 | ctx->vertex_count = info->count; |
| 1729 | |
| 1730 | /* For non-indexed draws, they're the same */ |
| 1731 | unsigned invocation_count = ctx->vertex_count; |
| 1732 | |
| 1733 | /* For higher amounts of vertices (greater than what fits in a 16-bit |
| 1734 | * short), the other value is needed, otherwise there will be bizarre |
| 1735 | * rendering artefacts. It's not clear what these values mean yet. */ |
| 1736 | |
| 1737 | ctx->payload_tiler.prefix.unknown_draw &= ~(0x3000 | 0x18000); |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 1738 | ctx->payload_tiler.prefix.unknown_draw |= (mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1739 | |
| 1740 | if (info->index_size) { |
| 1741 | /* Calculate the min/max index used so we can figure out how |
| 1742 | * many times to invoke the vertex shader */ |
| 1743 | |
| 1744 | const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info); |
| 1745 | |
| 1746 | int min_index = INT_MAX; |
| 1747 | int max_index = 0; |
| 1748 | |
| 1749 | if (info->index_size == 1) { |
| 1750 | CALCULATE_MIN_MAX_INDEX(uint8_t, ibuf8, info->start, info->count); |
| 1751 | } else if (info->index_size == 2) { |
| 1752 | const uint16_t *ibuf16 = (const uint16_t *) ibuf8; |
| 1753 | CALCULATE_MIN_MAX_INDEX(uint16_t, ibuf16, info->start, info->count); |
| 1754 | } else if (info->index_size == 4) { |
| 1755 | const uint32_t *ibuf32 = (const uint32_t *) ibuf8; |
| 1756 | CALCULATE_MIN_MAX_INDEX(uint32_t, ibuf32, info->start, info->count); |
| 1757 | } else { |
| 1758 | assert(0); |
| 1759 | } |
| 1760 | |
| 1761 | /* Make sure we didn't go crazy */ |
| 1762 | assert(min_index < INT_MAX); |
| 1763 | assert(max_index > 0); |
| 1764 | assert(max_index > min_index); |
| 1765 | |
| 1766 | /* Use the corresponding values */ |
| 1767 | invocation_count = max_index - min_index + 1; |
| 1768 | ctx->payload_vertex.draw_start = min_index; |
| 1769 | ctx->payload_tiler.draw_start = min_index; |
| 1770 | |
| 1771 | ctx->payload_tiler.prefix.negative_start = -min_index; |
| 1772 | ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count); |
| 1773 | |
| 1774 | //assert(!info->restart_index); /* TODO: Research */ |
| 1775 | assert(!info->index_bias); |
| 1776 | //assert(!info->min_index); /* TODO: Use value */ |
| 1777 | |
| 1778 | ctx->payload_tiler.prefix.unknown_draw |= panfrost_translate_index_size(info->index_size); |
| 1779 | ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); |
| 1780 | } else { |
| 1781 | /* Index count == vertex count, if no indexing is applied, as |
| 1782 | * if it is internally indexed in the expected order */ |
| 1783 | |
| 1784 | ctx->payload_tiler.prefix.negative_start = 0; |
| 1785 | ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count); |
| 1786 | |
| 1787 | /* Reverse index state */ |
| 1788 | ctx->payload_tiler.prefix.unknown_draw &= ~MALI_DRAW_INDEXED_UINT32; |
| 1789 | ctx->payload_tiler.prefix.indices = (uintptr_t) NULL; |
| 1790 | } |
| 1791 | |
| 1792 | ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count); |
| 1793 | ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count); |
| 1794 | |
| 1795 | /* Fire off the draw itself */ |
| 1796 | panfrost_queue_draw(ctx); |
| 1797 | } |
| 1798 | |
| 1799 | /* CSO state */ |
| 1800 | |
| 1801 | static void |
| 1802 | panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso) |
| 1803 | { |
| 1804 | free(hwcso); |
| 1805 | } |
| 1806 | |
| 1807 | static void |
| 1808 | panfrost_set_scissor(struct panfrost_context *ctx) |
| 1809 | { |
| 1810 | const struct pipe_scissor_state *ss = &ctx->scissor; |
| 1811 | |
| 1812 | if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor && 0) { |
| 1813 | ctx->viewport->viewport0[0] = ss->minx; |
| 1814 | ctx->viewport->viewport0[1] = ss->miny; |
| 1815 | ctx->viewport->viewport1[0] = MALI_POSITIVE(ss->maxx); |
| 1816 | ctx->viewport->viewport1[1] = MALI_POSITIVE(ss->maxy); |
| 1817 | } else { |
| 1818 | ctx->viewport->viewport0[0] = 0; |
| 1819 | ctx->viewport->viewport0[1] = 0; |
| 1820 | ctx->viewport->viewport1[0] = MALI_POSITIVE(ctx->pipe_framebuffer.width); |
| 1821 | ctx->viewport->viewport1[1] = MALI_POSITIVE(ctx->pipe_framebuffer.height); |
| 1822 | } |
| 1823 | } |
| 1824 | |
| 1825 | static void * |
| 1826 | panfrost_create_rasterizer_state( |
| 1827 | struct pipe_context *pctx, |
| 1828 | const struct pipe_rasterizer_state *cso) |
| 1829 | { |
| 1830 | struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); |
| 1831 | |
| 1832 | so->base = *cso; |
| 1833 | |
| 1834 | /* Bitmask, unknown meaning of the start value */ |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1835 | so->tiler_gl_enables = is_t6xx ? 0x105 : 0x7; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1836 | |
| 1837 | so->tiler_gl_enables |= MALI_FRONT_FACE( |
| 1838 | cso->front_ccw ? MALI_CCW : MALI_CW); |
| 1839 | |
| 1840 | if (cso->cull_face & PIPE_FACE_FRONT) |
| 1841 | so->tiler_gl_enables |= MALI_CULL_FACE_FRONT; |
| 1842 | |
| 1843 | if (cso->cull_face & PIPE_FACE_BACK) |
| 1844 | so->tiler_gl_enables |= MALI_CULL_FACE_BACK; |
| 1845 | |
| 1846 | return so; |
| 1847 | } |
| 1848 | |
| 1849 | static void |
| 1850 | panfrost_bind_rasterizer_state( |
| 1851 | struct pipe_context *pctx, |
| 1852 | void *hwcso) |
| 1853 | { |
| 1854 | struct panfrost_context *ctx = pan_context(pctx); |
| 1855 | struct pipe_rasterizer_state *cso = hwcso; |
| 1856 | |
| 1857 | /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ |
| 1858 | if (!hwcso) |
| 1859 | return; |
| 1860 | |
| 1861 | /* If scissor test has changed, we'll need to update that now */ |
| 1862 | bool update_scissor = !ctx->rasterizer || ctx->rasterizer->base.scissor != cso->scissor; |
| 1863 | |
| 1864 | ctx->rasterizer = hwcso; |
| 1865 | |
| 1866 | /* Actualise late changes */ |
| 1867 | if (update_scissor) |
| 1868 | panfrost_set_scissor(ctx); |
| 1869 | |
| 1870 | ctx->dirty |= PAN_DIRTY_RASTERIZER; |
| 1871 | } |
| 1872 | |
| 1873 | static void * |
| 1874 | panfrost_create_vertex_elements_state( |
| 1875 | struct pipe_context *pctx, |
| 1876 | unsigned num_elements, |
| 1877 | const struct pipe_vertex_element *elements) |
| 1878 | { |
| 1879 | struct panfrost_context *ctx = pan_context(pctx); |
| 1880 | struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state); |
| 1881 | |
| 1882 | so->num_elements = num_elements; |
| 1883 | memcpy(so->pipe, elements, sizeof(*elements) * num_elements); |
| 1884 | |
| 1885 | struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_attr_meta) * num_elements, HEAP_DESCRIPTOR); |
| 1886 | so->hw = (struct mali_attr_meta *) transfer.cpu; |
| 1887 | so->descriptor_ptr = transfer.gpu; |
| 1888 | |
| 1889 | /* Allocate memory for the descriptor state */ |
| 1890 | |
| 1891 | for (int i = 0; i < num_elements; ++i) { |
| 1892 | so->hw[i].index = elements[i].vertex_buffer_index; |
| 1893 | |
| 1894 | enum pipe_format fmt = elements[i].src_format; |
| 1895 | const struct util_format_description *desc = util_format_description(fmt); |
| 1896 | so->hw[i].unknown1 = 0x2; |
| 1897 | so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels); |
| 1898 | |
| 1899 | so->hw[i].format = panfrost_find_format(desc); |
| 1900 | |
| 1901 | /* The field itself should probably be shifted over */ |
| 1902 | so->hw[i].src_offset = elements[i].src_offset; |
| 1903 | } |
| 1904 | |
| 1905 | return so; |
| 1906 | } |
| 1907 | |
| 1908 | static void |
| 1909 | panfrost_bind_vertex_elements_state( |
| 1910 | struct pipe_context *pctx, |
| 1911 | void *hwcso) |
| 1912 | { |
| 1913 | struct panfrost_context *ctx = pan_context(pctx); |
| 1914 | |
| 1915 | ctx->vertex = hwcso; |
| 1916 | ctx->dirty |= PAN_DIRTY_VERTEX; |
| 1917 | } |
| 1918 | |
| 1919 | static void |
| 1920 | panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) |
| 1921 | { |
Alyssa Rosenzweig | acc52ff | 2019-02-14 04:00:19 +0000 | [diff] [blame] | 1922 | struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso; |
| 1923 | unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements; |
| 1924 | printf("Vertex elements delete leaks descriptor (%d bytes)\n", bytes); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1925 | free(hwcso); |
| 1926 | } |
| 1927 | |
| 1928 | static void * |
| 1929 | panfrost_create_shader_state( |
| 1930 | struct pipe_context *pctx, |
| 1931 | const struct pipe_shader_state *cso) |
| 1932 | { |
| 1933 | struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants); |
| 1934 | so->base = *cso; |
| 1935 | |
| 1936 | /* Token deep copy to prevent memory corruption */ |
| 1937 | |
| 1938 | if (cso->type == PIPE_SHADER_IR_TGSI) |
| 1939 | so->base.tokens = tgsi_dup_tokens(so->base.tokens); |
| 1940 | |
| 1941 | return so; |
| 1942 | } |
| 1943 | |
| 1944 | static void |
| 1945 | panfrost_delete_shader_state( |
| 1946 | struct pipe_context *pctx, |
| 1947 | void *so) |
| 1948 | { |
Alyssa Rosenzweig | acc52ff | 2019-02-14 04:00:19 +0000 | [diff] [blame] | 1949 | struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so; |
| 1950 | |
| 1951 | if (cso->base.type == PIPE_SHADER_IR_TGSI) { |
| 1952 | printf("Deleting TGSI shader leaks duplicated tokens\n"); |
| 1953 | } |
| 1954 | |
| 1955 | unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta); |
| 1956 | printf("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak); |
| 1957 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1958 | free(so); |
| 1959 | } |
| 1960 | |
| 1961 | static void * |
| 1962 | panfrost_create_sampler_state( |
| 1963 | struct pipe_context *pctx, |
| 1964 | const struct pipe_sampler_state *cso) |
| 1965 | { |
| 1966 | struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state); |
| 1967 | so->base = *cso; |
| 1968 | |
| 1969 | /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */ |
| 1970 | |
| 1971 | struct mali_sampler_descriptor sampler_descriptor = { |
| 1972 | .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter)) |
| 1973 | | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter)) |
| 1974 | | translate_mip_filter(cso->min_mip_filter) |
| 1975 | | 0x20, |
| 1976 | |
| 1977 | .wrap_s = translate_tex_wrap(cso->wrap_s), |
| 1978 | .wrap_t = translate_tex_wrap(cso->wrap_t), |
| 1979 | .wrap_r = translate_tex_wrap(cso->wrap_r), |
| 1980 | .compare_func = panfrost_translate_alt_compare_func(cso->compare_func), |
| 1981 | .border_color = { |
| 1982 | cso->border_color.f[0], |
| 1983 | cso->border_color.f[1], |
| 1984 | cso->border_color.f[2], |
| 1985 | cso->border_color.f[3] |
| 1986 | }, |
| 1987 | .min_lod = FIXED_16(0.0), |
| 1988 | .max_lod = FIXED_16(31.0), |
| 1989 | .unknown2 = 1, |
| 1990 | }; |
| 1991 | |
| 1992 | so->hw = sampler_descriptor; |
| 1993 | |
| 1994 | return so; |
| 1995 | } |
| 1996 | |
| 1997 | static void |
| 1998 | panfrost_bind_sampler_states( |
| 1999 | struct pipe_context *pctx, |
| 2000 | enum pipe_shader_type shader, |
| 2001 | unsigned start_slot, unsigned num_sampler, |
| 2002 | void **sampler) |
| 2003 | { |
| 2004 | assert(start_slot == 0); |
| 2005 | |
| 2006 | struct panfrost_context *ctx = pan_context(pctx); |
| 2007 | |
| 2008 | /* XXX: Should upload, not just copy? */ |
| 2009 | ctx->sampler_count[shader] = num_sampler; |
| 2010 | memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); |
| 2011 | |
| 2012 | ctx->dirty |= PAN_DIRTY_SAMPLERS; |
| 2013 | } |
| 2014 | |
| 2015 | static bool |
| 2016 | panfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant) |
| 2017 | { |
| 2018 | struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha; |
| 2019 | |
| 2020 | if (alpha->enabled || variant->alpha_state.enabled) { |
| 2021 | /* Make sure enable state is at least the same */ |
| 2022 | if (alpha->enabled != variant->alpha_state.enabled) { |
| 2023 | return false; |
| 2024 | } |
| 2025 | |
| 2026 | /* Check that the contents of the test are the same */ |
| 2027 | bool same_func = alpha->func == variant->alpha_state.func; |
| 2028 | bool same_ref = alpha->ref_value == variant->alpha_state.ref_value; |
| 2029 | |
| 2030 | if (!(same_func && same_ref)) { |
| 2031 | return false; |
| 2032 | } |
| 2033 | } |
| 2034 | /* Otherwise, we're good to go */ |
| 2035 | return true; |
| 2036 | } |
| 2037 | |
| 2038 | static void |
| 2039 | panfrost_bind_fs_state( |
| 2040 | struct pipe_context *pctx, |
| 2041 | void *hwcso) |
| 2042 | { |
| 2043 | struct panfrost_context *ctx = pan_context(pctx); |
| 2044 | |
| 2045 | ctx->fs = hwcso; |
| 2046 | |
| 2047 | if (hwcso) { |
| 2048 | /* Match the appropriate variant */ |
| 2049 | |
| 2050 | signed variant = -1; |
| 2051 | |
| 2052 | struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso; |
| 2053 | |
| 2054 | for (unsigned i = 0; i < variants->variant_count; ++i) { |
| 2055 | if (panfrost_variant_matches(ctx, &variants->variants[i])) { |
| 2056 | variant = i; |
| 2057 | break; |
| 2058 | } |
| 2059 | } |
| 2060 | |
| 2061 | if (variant == -1) { |
| 2062 | /* No variant matched, so create a new one */ |
| 2063 | variant = variants->variant_count++; |
| 2064 | assert(variants->variant_count < MAX_SHADER_VARIANTS); |
| 2065 | |
| 2066 | variants->variants[variant].base = hwcso; |
| 2067 | variants->variants[variant].alpha_state = ctx->depth_stencil->alpha; |
| 2068 | |
| 2069 | /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */ |
| 2070 | struct panfrost_context *ctx = pan_context(pctx); |
| 2071 | struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); |
| 2072 | |
| 2073 | variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu; |
| 2074 | variants->variants[variant].tripipe_gpu = transfer.gpu; |
| 2075 | |
| 2076 | } |
| 2077 | |
| 2078 | /* Select this variant */ |
| 2079 | variants->active_variant = variant; |
| 2080 | |
| 2081 | struct panfrost_shader_state *shader_state = &variants->variants[variant]; |
| 2082 | assert(panfrost_variant_matches(ctx, shader_state)); |
| 2083 | |
| 2084 | /* Now we have a variant selected, so compile and go */ |
| 2085 | |
| 2086 | if (!shader_state->compiled) { |
| 2087 | panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state); |
| 2088 | shader_state->compiled = true; |
| 2089 | } |
| 2090 | } |
| 2091 | |
| 2092 | ctx->dirty |= PAN_DIRTY_FS; |
| 2093 | } |
| 2094 | |
| 2095 | static void |
| 2096 | panfrost_bind_vs_state( |
| 2097 | struct pipe_context *pctx, |
| 2098 | void *hwcso) |
| 2099 | { |
| 2100 | struct panfrost_context *ctx = pan_context(pctx); |
| 2101 | |
| 2102 | ctx->vs = hwcso; |
| 2103 | |
| 2104 | if (hwcso) { |
| 2105 | if (!ctx->vs->variants[0].compiled) { |
| 2106 | ctx->vs->variants[0].base = hwcso; |
| 2107 | |
| 2108 | /* TODO DRY from above */ |
| 2109 | struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); |
| 2110 | ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu; |
| 2111 | ctx->vs->variants[0].tripipe_gpu = transfer.gpu; |
| 2112 | |
| 2113 | panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]); |
| 2114 | ctx->vs->variants[0].compiled = true; |
| 2115 | } |
| 2116 | } |
| 2117 | |
| 2118 | ctx->dirty |= PAN_DIRTY_VS; |
| 2119 | } |
| 2120 | |
| 2121 | static void |
| 2122 | panfrost_set_vertex_buffers( |
| 2123 | struct pipe_context *pctx, |
| 2124 | unsigned start_slot, |
| 2125 | unsigned num_buffers, |
| 2126 | const struct pipe_vertex_buffer *buffers) |
| 2127 | { |
| 2128 | struct panfrost_context *ctx = pan_context(pctx); |
| 2129 | assert(num_buffers <= PIPE_MAX_ATTRIBS); |
| 2130 | |
| 2131 | /* XXX: Dirty tracking? etc */ |
| 2132 | if (buffers) { |
| 2133 | size_t sz = sizeof(buffers[0]) * num_buffers; |
| 2134 | ctx->vertex_buffers = malloc(sz); |
| 2135 | ctx->vertex_buffer_count = num_buffers; |
| 2136 | memcpy(ctx->vertex_buffers, buffers, sz); |
| 2137 | } else { |
| 2138 | if (ctx->vertex_buffers) { |
| 2139 | free(ctx->vertex_buffers); |
| 2140 | ctx->vertex_buffers = NULL; |
| 2141 | } |
| 2142 | |
| 2143 | ctx->vertex_buffer_count = 0; |
| 2144 | } |
| 2145 | } |
| 2146 | |
| 2147 | static void |
| 2148 | panfrost_set_constant_buffer( |
| 2149 | struct pipe_context *pctx, |
| 2150 | enum pipe_shader_type shader, uint index, |
| 2151 | const struct pipe_constant_buffer *buf) |
| 2152 | { |
| 2153 | struct panfrost_context *ctx = pan_context(pctx); |
| 2154 | struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader]; |
| 2155 | |
| 2156 | size_t sz = buf ? buf->buffer_size : 0; |
| 2157 | |
| 2158 | /* Free previous buffer */ |
| 2159 | |
| 2160 | pbuf->dirty = true; |
| 2161 | pbuf->size = sz; |
| 2162 | |
| 2163 | if (pbuf->buffer) { |
| 2164 | free(pbuf->buffer); |
| 2165 | pbuf->buffer = NULL; |
| 2166 | } |
| 2167 | |
| 2168 | /* If unbinding, we're done */ |
| 2169 | |
| 2170 | if (!buf) |
| 2171 | return; |
| 2172 | |
| 2173 | /* Multiple constant buffers not yet supported */ |
| 2174 | assert(index == 0); |
| 2175 | |
| 2176 | const uint8_t *cpu; |
| 2177 | |
| 2178 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer); |
| 2179 | |
| 2180 | if (rsrc) { |
| 2181 | cpu = rsrc->bo->cpu[0]; |
| 2182 | } else if (buf->user_buffer) { |
| 2183 | cpu = buf->user_buffer; |
| 2184 | } else { |
| 2185 | printf("No constant buffer?\n"); |
| 2186 | return; |
| 2187 | } |
| 2188 | |
| 2189 | /* Copy the constant buffer into the driver context for later upload */ |
| 2190 | |
| 2191 | pbuf->buffer = malloc(sz); |
| 2192 | memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz); |
| 2193 | } |
| 2194 | |
| 2195 | static void |
| 2196 | panfrost_set_stencil_ref( |
| 2197 | struct pipe_context *pctx, |
| 2198 | const struct pipe_stencil_ref *ref) |
| 2199 | { |
| 2200 | struct panfrost_context *ctx = pan_context(pctx); |
| 2201 | ctx->stencil_ref = *ref; |
| 2202 | |
| 2203 | /* Shader core dirty */ |
| 2204 | ctx->dirty |= PAN_DIRTY_FS; |
| 2205 | } |
| 2206 | |
| 2207 | static struct pipe_sampler_view * |
| 2208 | panfrost_create_sampler_view( |
| 2209 | struct pipe_context *pctx, |
| 2210 | struct pipe_resource *texture, |
| 2211 | const struct pipe_sampler_view *template) |
| 2212 | { |
| 2213 | struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view); |
| 2214 | int bytes_per_pixel = util_format_get_blocksize(texture->format); |
| 2215 | |
| 2216 | pipe_reference(NULL, &texture->reference); |
| 2217 | |
| 2218 | struct panfrost_resource *prsrc = (struct panfrost_resource *) texture; |
| 2219 | |
| 2220 | so->base = *template; |
| 2221 | so->base.texture = texture; |
| 2222 | so->base.reference.count = 1; |
| 2223 | so->base.context = pctx; |
| 2224 | |
| 2225 | /* sampler_views correspond to texture descriptors, minus the texture |
| 2226 | * (data) itself. So, we serialise the descriptor here and cache it for |
| 2227 | * later. */ |
| 2228 | |
| 2229 | /* TODO: Other types of textures */ |
| 2230 | assert(template->target == PIPE_TEXTURE_2D); |
| 2231 | |
| 2232 | /* Make sure it's something with which we're familiar */ |
| 2233 | assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4); |
| 2234 | |
| 2235 | /* TODO: Detect from format better */ |
| 2236 | const struct util_format_description *desc = util_format_description(prsrc->base.format); |
| 2237 | |
| 2238 | unsigned char user_swizzle[4] = { |
| 2239 | template->swizzle_r, |
| 2240 | template->swizzle_g, |
| 2241 | template->swizzle_b, |
| 2242 | template->swizzle_a |
| 2243 | }; |
| 2244 | |
| 2245 | enum mali_format format = panfrost_find_format(desc); |
| 2246 | |
| 2247 | struct mali_texture_descriptor texture_descriptor = { |
| 2248 | .width = MALI_POSITIVE(texture->width0), |
| 2249 | .height = MALI_POSITIVE(texture->height0), |
| 2250 | .depth = MALI_POSITIVE(texture->depth0), |
| 2251 | |
| 2252 | /* TODO: Decode */ |
| 2253 | .format = { |
| 2254 | .swizzle = panfrost_translate_swizzle_4(desc->swizzle), |
| 2255 | .format = format, |
| 2256 | |
| 2257 | .usage1 = 0x0, |
| 2258 | .is_not_cubemap = 1, |
| 2259 | |
| 2260 | /* 0x11 - regular texture 2d, uncompressed tiled */ |
| 2261 | /* 0x12 - regular texture 2d, uncompressed linear */ |
| 2262 | /* 0x1c - AFBC compressed (internally tiled, probably) texture 2D */ |
| 2263 | |
| 2264 | .usage2 = prsrc->bo->has_afbc ? 0x1c : (prsrc->bo->tiled ? 0x11 : 0x12), |
| 2265 | }, |
| 2266 | |
| 2267 | .swizzle = panfrost_translate_swizzle_4(user_swizzle) |
| 2268 | }; |
| 2269 | |
| 2270 | /* TODO: Other base levels require adjusting dimensions / level numbers / etc */ |
| 2271 | assert (template->u.tex.first_level == 0); |
| 2272 | |
| 2273 | texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level; |
| 2274 | |
| 2275 | so->hw = texture_descriptor; |
| 2276 | |
| 2277 | return (struct pipe_sampler_view *) so; |
| 2278 | } |
| 2279 | |
| 2280 | static void |
| 2281 | panfrost_set_sampler_views( |
| 2282 | struct pipe_context *pctx, |
| 2283 | enum pipe_shader_type shader, |
| 2284 | unsigned start_slot, unsigned num_views, |
| 2285 | struct pipe_sampler_view **views) |
| 2286 | { |
| 2287 | struct panfrost_context *ctx = pan_context(pctx); |
| 2288 | |
| 2289 | assert(start_slot == 0); |
| 2290 | |
| 2291 | ctx->sampler_view_count[shader] = num_views; |
| 2292 | memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *)); |
| 2293 | |
| 2294 | ctx->dirty |= PAN_DIRTY_TEXTURES; |
| 2295 | } |
| 2296 | |
| 2297 | static void |
| 2298 | panfrost_sampler_view_destroy( |
| 2299 | struct pipe_context *pctx, |
| 2300 | struct pipe_sampler_view *views) |
| 2301 | { |
| 2302 | //struct panfrost_context *ctx = pan_context(pctx); |
| 2303 | |
| 2304 | /* TODO */ |
| 2305 | |
| 2306 | free(views); |
| 2307 | } |
| 2308 | |
| 2309 | static void |
| 2310 | panfrost_set_framebuffer_state(struct pipe_context *pctx, |
| 2311 | const struct pipe_framebuffer_state *fb) |
| 2312 | { |
| 2313 | struct panfrost_context *ctx = pan_context(pctx); |
| 2314 | |
| 2315 | /* Flush when switching away from an FBO */ |
| 2316 | |
| 2317 | if (!panfrost_is_scanout(ctx)) { |
| 2318 | panfrost_flush(pctx, NULL, 0); |
| 2319 | } |
| 2320 | |
| 2321 | ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs; |
| 2322 | ctx->pipe_framebuffer.samples = fb->samples; |
| 2323 | ctx->pipe_framebuffer.layers = fb->layers; |
Alyssa Rosenzweig | c70ed4c | 2019-02-15 07:43:43 +0000 | [diff] [blame] | 2324 | ctx->pipe_framebuffer.width = fb->width; |
| 2325 | ctx->pipe_framebuffer.height = fb->height; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2326 | |
| 2327 | for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { |
| 2328 | struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; |
| 2329 | |
| 2330 | /* check if changing cbuf */ |
| 2331 | if (ctx->pipe_framebuffer.cbufs[i] == cb) continue; |
| 2332 | |
| 2333 | if (cb && (i != 0)) { |
| 2334 | printf("XXX: Multiple render targets not supported before t7xx!\n"); |
| 2335 | assert(0); |
| 2336 | } |
| 2337 | |
| 2338 | /* assign new */ |
| 2339 | pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb); |
| 2340 | |
| 2341 | if (!cb) |
| 2342 | continue; |
| 2343 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 2344 | if (require_sfbd) |
| 2345 | ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); |
| 2346 | else |
| 2347 | ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); |
| 2348 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2349 | panfrost_attach_vt_framebuffer(ctx); |
| 2350 | panfrost_new_frag_framebuffer(ctx); |
| 2351 | panfrost_set_scissor(ctx); |
| 2352 | |
| 2353 | struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture); |
| 2354 | bool is_scanout = panfrost_is_scanout(ctx); |
| 2355 | |
| 2356 | if (!is_scanout && !tex->bo->has_afbc) { |
| 2357 | /* The blob is aggressive about enabling AFBC. As such, |
| 2358 | * it's pretty much necessary to use it here, since we |
| 2359 | * have no traces of non-compressed FBO. */ |
| 2360 | |
| 2361 | panfrost_enable_afbc(ctx, tex, false); |
| 2362 | } |
| 2363 | |
| 2364 | if (!is_scanout && !tex->bo->has_checksum) { |
| 2365 | /* Enable transaction elimination if we can */ |
| 2366 | panfrost_enable_checksum(ctx, tex); |
| 2367 | } |
| 2368 | } |
| 2369 | |
| 2370 | { |
| 2371 | struct pipe_surface *zb = fb->zsbuf; |
| 2372 | |
| 2373 | if (ctx->pipe_framebuffer.zsbuf != zb) { |
| 2374 | pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb); |
| 2375 | |
| 2376 | if (zb) { |
| 2377 | /* FBO has depth */ |
| 2378 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 2379 | if (require_sfbd) |
| 2380 | ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); |
| 2381 | else |
| 2382 | ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); |
| 2383 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2384 | panfrost_attach_vt_framebuffer(ctx); |
| 2385 | panfrost_new_frag_framebuffer(ctx); |
| 2386 | panfrost_set_scissor(ctx); |
| 2387 | |
| 2388 | struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture); |
| 2389 | |
| 2390 | if (!tex->bo->has_afbc && !panfrost_is_scanout(ctx)) |
| 2391 | panfrost_enable_afbc(ctx, tex, true); |
| 2392 | } |
| 2393 | } |
| 2394 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2395 | } |
| 2396 | |
| 2397 | static void * |
| 2398 | panfrost_create_blend_state(struct pipe_context *pipe, |
| 2399 | const struct pipe_blend_state *blend) |
| 2400 | { |
| 2401 | struct panfrost_context *ctx = pan_context(pipe); |
| 2402 | struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state); |
| 2403 | so->base = *blend; |
| 2404 | |
| 2405 | /* TODO: The following features are not yet implemented */ |
| 2406 | assert(!blend->logicop_enable); |
| 2407 | assert(!blend->alpha_to_coverage); |
| 2408 | assert(!blend->alpha_to_one); |
| 2409 | |
| 2410 | /* Compile the blend state, first as fixed-function if we can */ |
| 2411 | |
| 2412 | if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color)) |
| 2413 | return so; |
| 2414 | |
| 2415 | /* If we can't, compile a blend shader instead */ |
| 2416 | |
| 2417 | panfrost_make_blend_shader(ctx, so, &ctx->blend_color); |
| 2418 | |
| 2419 | return so; |
| 2420 | } |
| 2421 | |
| 2422 | static void |
| 2423 | panfrost_bind_blend_state(struct pipe_context *pipe, |
| 2424 | void *cso) |
| 2425 | { |
| 2426 | struct panfrost_context *ctx = pan_context(pipe); |
| 2427 | struct pipe_blend_state *blend = (struct pipe_blend_state *) cso; |
| 2428 | struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso; |
| 2429 | ctx->blend = pblend; |
| 2430 | |
| 2431 | if (!blend) |
| 2432 | return; |
| 2433 | |
| 2434 | SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither); |
| 2435 | |
| 2436 | /* TODO: Attach color */ |
| 2437 | |
| 2438 | /* Shader itself is not dirty, but the shader core is */ |
| 2439 | ctx->dirty |= PAN_DIRTY_FS; |
| 2440 | } |
| 2441 | |
| 2442 | static void |
| 2443 | panfrost_delete_blend_state(struct pipe_context *pipe, |
| 2444 | void *blend) |
| 2445 | { |
Alyssa Rosenzweig | acc52ff | 2019-02-14 04:00:19 +0000 | [diff] [blame] | 2446 | struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend; |
| 2447 | |
| 2448 | if (so->has_blend_shader) { |
| 2449 | printf("Deleting blend state leak blend shaders bytecode\n"); |
| 2450 | } |
| 2451 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2452 | free(blend); |
| 2453 | } |
| 2454 | |
| 2455 | static void |
| 2456 | panfrost_set_blend_color(struct pipe_context *pipe, |
| 2457 | const struct pipe_blend_color *blend_color) |
| 2458 | { |
| 2459 | struct panfrost_context *ctx = pan_context(pipe); |
| 2460 | |
| 2461 | /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */ |
| 2462 | |
| 2463 | if (blend_color) { |
| 2464 | ctx->blend_color = *blend_color; |
| 2465 | |
| 2466 | /* The blend mode depends on the blend constant color, due to the |
| 2467 | * fixed/programmable split. So, we're forced to regenerate the blend |
| 2468 | * equation */ |
| 2469 | |
| 2470 | /* TODO: Attach color */ |
| 2471 | } |
| 2472 | } |
| 2473 | |
| 2474 | static void * |
| 2475 | panfrost_create_depth_stencil_state(struct pipe_context *pipe, |
| 2476 | const struct pipe_depth_stencil_alpha_state *depth_stencil) |
| 2477 | { |
| 2478 | return mem_dup(depth_stencil, sizeof(*depth_stencil)); |
| 2479 | } |
| 2480 | |
| 2481 | static void |
| 2482 | panfrost_bind_depth_stencil_state(struct pipe_context *pipe, |
| 2483 | void *cso) |
| 2484 | { |
| 2485 | struct panfrost_context *ctx = pan_context(pipe); |
| 2486 | struct pipe_depth_stencil_alpha_state *depth_stencil = cso; |
| 2487 | ctx->depth_stencil = depth_stencil; |
| 2488 | |
| 2489 | if (!depth_stencil) |
| 2490 | return; |
| 2491 | |
| 2492 | /* Alpha does not exist in the hardware (it's not in ES3), so it's |
| 2493 | * emulated in the fragment shader */ |
| 2494 | |
| 2495 | if (depth_stencil->alpha.enabled) { |
| 2496 | /* We need to trigger a new shader (maybe) */ |
| 2497 | ctx->base.bind_fs_state(&ctx->base, ctx->fs); |
| 2498 | } |
| 2499 | |
| 2500 | /* Stencil state */ |
| 2501 | SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */ |
| 2502 | |
| 2503 | panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front); |
| 2504 | ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask; |
| 2505 | |
| 2506 | panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back); |
| 2507 | ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask; |
| 2508 | |
| 2509 | /* Depth state (TODO: Refactor) */ |
| 2510 | SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled); |
| 2511 | |
| 2512 | int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS; |
| 2513 | |
| 2514 | ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK; |
| 2515 | ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func)); |
| 2516 | |
| 2517 | /* Bounds test not implemented */ |
| 2518 | assert(!depth_stencil->depth.bounds_test); |
| 2519 | |
| 2520 | ctx->dirty |= PAN_DIRTY_FS; |
| 2521 | } |
| 2522 | |
| 2523 | static void |
| 2524 | panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) |
| 2525 | { |
| 2526 | free( depth ); |
| 2527 | } |
| 2528 | |
| 2529 | static void |
| 2530 | panfrost_set_sample_mask(struct pipe_context *pipe, |
| 2531 | unsigned sample_mask) |
| 2532 | { |
| 2533 | } |
| 2534 | |
| 2535 | static void |
| 2536 | panfrost_set_clip_state(struct pipe_context *pipe, |
| 2537 | const struct pipe_clip_state *clip) |
| 2538 | { |
| 2539 | //struct panfrost_context *panfrost = pan_context(pipe); |
| 2540 | } |
| 2541 | |
| 2542 | static void |
| 2543 | panfrost_set_viewport_states(struct pipe_context *pipe, |
| 2544 | unsigned start_slot, |
| 2545 | unsigned num_viewports, |
| 2546 | const struct pipe_viewport_state *viewports) |
| 2547 | { |
| 2548 | struct panfrost_context *ctx = pan_context(pipe); |
| 2549 | |
| 2550 | assert(start_slot == 0); |
| 2551 | assert(num_viewports == 1); |
| 2552 | |
| 2553 | ctx->pipe_viewport = *viewports; |
| 2554 | |
| 2555 | #if 0 |
| 2556 | /* TODO: What if not centered? */ |
| 2557 | float w = abs(viewports->scale[0]) * 2.0; |
| 2558 | float h = abs(viewports->scale[1]) * 2.0; |
| 2559 | |
| 2560 | ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w); |
| 2561 | ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h); |
| 2562 | #endif |
| 2563 | } |
| 2564 | |
| 2565 | static void |
| 2566 | panfrost_set_scissor_states(struct pipe_context *pipe, |
| 2567 | unsigned start_slot, |
| 2568 | unsigned num_scissors, |
| 2569 | const struct pipe_scissor_state *scissors) |
| 2570 | { |
| 2571 | struct panfrost_context *ctx = pan_context(pipe); |
| 2572 | |
| 2573 | assert(start_slot == 0); |
| 2574 | assert(num_scissors == 1); |
| 2575 | |
| 2576 | ctx->scissor = *scissors; |
| 2577 | |
| 2578 | panfrost_set_scissor(ctx); |
| 2579 | } |
| 2580 | |
| 2581 | static void |
| 2582 | panfrost_set_polygon_stipple(struct pipe_context *pipe, |
| 2583 | const struct pipe_poly_stipple *stipple) |
| 2584 | { |
| 2585 | //struct panfrost_context *panfrost = pan_context(pipe); |
| 2586 | } |
| 2587 | |
| 2588 | static void |
| 2589 | panfrost_set_active_query_state(struct pipe_context *pipe, |
| 2590 | boolean enable) |
| 2591 | { |
| 2592 | //struct panfrost_context *panfrost = pan_context(pipe); |
| 2593 | } |
| 2594 | |
| 2595 | static void |
| 2596 | panfrost_destroy(struct pipe_context *pipe) |
| 2597 | { |
| 2598 | struct panfrost_context *panfrost = pan_context(pipe); |
| 2599 | |
| 2600 | if (panfrost->blitter) |
| 2601 | util_blitter_destroy(panfrost->blitter); |
| 2602 | } |
| 2603 | |
| 2604 | static struct pipe_query * |
| 2605 | panfrost_create_query(struct pipe_context *pipe, |
| 2606 | unsigned type, |
| 2607 | unsigned index) |
| 2608 | { |
| 2609 | struct panfrost_query *q = CALLOC_STRUCT(panfrost_query); |
| 2610 | |
| 2611 | q->type = type; |
| 2612 | q->index = index; |
| 2613 | |
| 2614 | return (struct pipe_query *) q; |
| 2615 | } |
| 2616 | |
| 2617 | static void |
| 2618 | panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q) |
| 2619 | { |
| 2620 | FREE(q); |
| 2621 | } |
| 2622 | |
| 2623 | static boolean |
| 2624 | panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) |
| 2625 | { |
| 2626 | struct panfrost_context *ctx = pan_context(pipe); |
| 2627 | struct panfrost_query *query = (struct panfrost_query *) q; |
| 2628 | |
| 2629 | switch (query->type) { |
Alyssa Rosenzweig | 5155bcf | 2019-02-14 02:50:30 +0000 | [diff] [blame] | 2630 | case PIPE_QUERY_OCCLUSION_COUNTER: |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2631 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
| 2632 | case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: |
| 2633 | { |
| 2634 | /* Allocate a word for the query results to be stored */ |
| 2635 | query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR); |
| 2636 | |
| 2637 | ctx->occlusion_query = query; |
| 2638 | |
| 2639 | break; |
| 2640 | } |
| 2641 | |
| 2642 | default: |
| 2643 | fprintf(stderr, "Skipping query %d\n", query->type); |
| 2644 | break; |
| 2645 | } |
| 2646 | |
| 2647 | return true; |
| 2648 | } |
| 2649 | |
| 2650 | static bool |
| 2651 | panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q) |
| 2652 | { |
| 2653 | struct panfrost_context *ctx = pan_context(pipe); |
| 2654 | ctx->occlusion_query = NULL; |
| 2655 | return true; |
| 2656 | } |
| 2657 | |
| 2658 | static boolean |
| 2659 | panfrost_get_query_result(struct pipe_context *pipe, |
| 2660 | struct pipe_query *q, |
| 2661 | boolean wait, |
| 2662 | union pipe_query_result *vresult) |
| 2663 | { |
| 2664 | /* STUB */ |
| 2665 | struct panfrost_query *query = (struct panfrost_query *) q; |
| 2666 | |
| 2667 | /* We need to flush out the jobs to actually run the counter, TODO |
| 2668 | * check wait, TODO wallpaper after if needed */ |
| 2669 | |
| 2670 | panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); |
| 2671 | |
| 2672 | switch (query->type) { |
Alyssa Rosenzweig | 5155bcf | 2019-02-14 02:50:30 +0000 | [diff] [blame] | 2673 | case PIPE_QUERY_OCCLUSION_COUNTER: |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2674 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
| 2675 | case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { |
| 2676 | /* Read back the query results */ |
| 2677 | unsigned *result = (unsigned *) query->transfer.cpu; |
| 2678 | unsigned passed = *result; |
| 2679 | |
Alyssa Rosenzweig | 5155bcf | 2019-02-14 02:50:30 +0000 | [diff] [blame] | 2680 | if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { |
| 2681 | vresult->u64 = passed; |
| 2682 | } else { |
| 2683 | vresult->b = !!passed; |
| 2684 | } |
| 2685 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2686 | break; |
| 2687 | } |
| 2688 | default: |
| 2689 | fprintf(stderr, "Skipped query get %d\n", query->type); |
| 2690 | break; |
| 2691 | } |
| 2692 | |
| 2693 | return true; |
| 2694 | } |
| 2695 | |
| 2696 | static void |
| 2697 | panfrost_setup_hardware(struct panfrost_context *ctx) |
| 2698 | { |
| 2699 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 2700 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 2701 | |
| 2702 | for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) { |
| 2703 | /* Allocate the beginning of the transient pool */ |
| 2704 | int entry_size = (1 << 22); /* 4MB */ |
| 2705 | |
| 2706 | ctx->transient_pools[i].entry_size = entry_size; |
| 2707 | ctx->transient_pools[i].entry_count = 1; |
| 2708 | |
| 2709 | ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT); |
| 2710 | } |
| 2711 | |
| 2712 | screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); |
Alyssa Rosenzweig | cdca103 | 2019-02-25 02:32:45 +0000 | [diff] [blame] | 2713 | screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2714 | screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); |
Alyssa Rosenzweig | f44d465 | 2019-02-25 02:31:09 +0000 | [diff] [blame] | 2715 | screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); |
| 2716 | screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2717 | |
| 2718 | } |
| 2719 | |
| 2720 | /* New context creation, which also does hardware initialisation since I don't |
| 2721 | * know the better way to structure this :smirk: */ |
| 2722 | |
| 2723 | struct pipe_context * |
| 2724 | panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) |
| 2725 | { |
| 2726 | struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context); |
| 2727 | memset(ctx, 0, sizeof(*ctx)); |
| 2728 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 2729 | |
| 2730 | gallium->screen = screen; |
| 2731 | |
| 2732 | gallium->destroy = panfrost_destroy; |
| 2733 | |
| 2734 | gallium->set_framebuffer_state = panfrost_set_framebuffer_state; |
| 2735 | |
| 2736 | gallium->flush = panfrost_flush; |
| 2737 | gallium->clear = panfrost_clear; |
| 2738 | gallium->draw_vbo = panfrost_draw_vbo; |
| 2739 | |
| 2740 | gallium->set_vertex_buffers = panfrost_set_vertex_buffers; |
| 2741 | gallium->set_constant_buffer = panfrost_set_constant_buffer; |
| 2742 | |
| 2743 | gallium->set_stencil_ref = panfrost_set_stencil_ref; |
| 2744 | |
| 2745 | gallium->create_sampler_view = panfrost_create_sampler_view; |
| 2746 | gallium->set_sampler_views = panfrost_set_sampler_views; |
| 2747 | gallium->sampler_view_destroy = panfrost_sampler_view_destroy; |
| 2748 | |
| 2749 | gallium->create_rasterizer_state = panfrost_create_rasterizer_state; |
| 2750 | gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state; |
| 2751 | gallium->delete_rasterizer_state = panfrost_generic_cso_delete; |
| 2752 | |
| 2753 | gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state; |
| 2754 | gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state; |
| 2755 | gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state; |
| 2756 | |
| 2757 | gallium->create_fs_state = panfrost_create_shader_state; |
| 2758 | gallium->delete_fs_state = panfrost_delete_shader_state; |
| 2759 | gallium->bind_fs_state = panfrost_bind_fs_state; |
| 2760 | |
| 2761 | gallium->create_vs_state = panfrost_create_shader_state; |
| 2762 | gallium->delete_vs_state = panfrost_delete_shader_state; |
| 2763 | gallium->bind_vs_state = panfrost_bind_vs_state; |
| 2764 | |
| 2765 | gallium->create_sampler_state = panfrost_create_sampler_state; |
| 2766 | gallium->delete_sampler_state = panfrost_generic_cso_delete; |
| 2767 | gallium->bind_sampler_states = panfrost_bind_sampler_states; |
| 2768 | |
| 2769 | gallium->create_blend_state = panfrost_create_blend_state; |
| 2770 | gallium->bind_blend_state = panfrost_bind_blend_state; |
| 2771 | gallium->delete_blend_state = panfrost_delete_blend_state; |
| 2772 | |
| 2773 | gallium->set_blend_color = panfrost_set_blend_color; |
| 2774 | |
| 2775 | gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state; |
| 2776 | gallium->bind_depth_stencil_alpha_state = panfrost_bind_depth_stencil_state; |
| 2777 | gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state; |
| 2778 | |
| 2779 | gallium->set_sample_mask = panfrost_set_sample_mask; |
| 2780 | |
| 2781 | gallium->set_clip_state = panfrost_set_clip_state; |
| 2782 | gallium->set_viewport_states = panfrost_set_viewport_states; |
| 2783 | gallium->set_scissor_states = panfrost_set_scissor_states; |
| 2784 | gallium->set_polygon_stipple = panfrost_set_polygon_stipple; |
| 2785 | gallium->set_active_query_state = panfrost_set_active_query_state; |
| 2786 | |
| 2787 | gallium->create_query = panfrost_create_query; |
| 2788 | gallium->destroy_query = panfrost_destroy_query; |
| 2789 | gallium->begin_query = panfrost_begin_query; |
| 2790 | gallium->end_query = panfrost_end_query; |
| 2791 | gallium->get_query_result = panfrost_get_query_result; |
| 2792 | |
| 2793 | panfrost_resource_context_init(gallium); |
| 2794 | |
| 2795 | panfrost_setup_hardware(ctx); |
| 2796 | |
| 2797 | /* XXX: leaks */ |
| 2798 | gallium->stream_uploader = u_upload_create_default(gallium); |
| 2799 | gallium->const_uploader = gallium->stream_uploader; |
| 2800 | assert(gallium->stream_uploader); |
| 2801 | |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 2802 | /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */ |
| 2803 | ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1; |
| 2804 | |
| 2805 | ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2806 | |
| 2807 | ctx->blitter = util_blitter_create(gallium); |
| 2808 | assert(ctx->blitter); |
| 2809 | |
| 2810 | /* Prepare for render! */ |
| 2811 | |
Alyssa Rosenzweig | 59c9623 | 2019-02-25 05:32:16 +0000 | [diff] [blame] | 2812 | panfrost_job_init(ctx); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2813 | panfrost_emit_vertex_payload(ctx); |
| 2814 | panfrost_emit_tiler_payload(ctx); |
| 2815 | panfrost_invalidate_frame(ctx); |
| 2816 | panfrost_viewport(ctx, 0.0, 1.0, 0, 0, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); |
| 2817 | panfrost_default_shader_backend(ctx); |
| 2818 | panfrost_generate_space_filler_indices(); |
| 2819 | |
| 2820 | return gallium; |
| 2821 | } |