Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1 | /* |
| 2 | * © Copyright 2018 Alyssa Rosenzweig |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | * SOFTWARE. |
| 22 | * |
| 23 | */ |
| 24 | |
| 25 | #include <sys/poll.h> |
| 26 | #include <errno.h> |
| 27 | |
| 28 | #include "pan_context.h" |
| 29 | #include "pan_swizzle.h" |
| 30 | #include "pan_format.h" |
| 31 | |
| 32 | #include "util/macros.h" |
| 33 | #include "util/u_format.h" |
| 34 | #include "util/u_inlines.h" |
| 35 | #include "util/u_upload_mgr.h" |
| 36 | #include "util/u_memory.h" |
| 37 | #include "util/half_float.h" |
| 38 | #include "indices/u_primconvert.h" |
| 39 | #include "tgsi/tgsi_parse.h" |
| 40 | |
| 41 | #include "pan_screen.h" |
| 42 | #include "pan_blending.h" |
| 43 | #include "pan_blend_shaders.h" |
| 44 | #include "pan_wallpaper.h" |
| 45 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 46 | static int performance_counter_number = 0; |
Alyssa Rosenzweig | 4c82abb | 2019-02-25 03:31:29 +0000 | [diff] [blame^] | 47 | extern const char *pan_counters_base; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 48 | |
| 49 | /* Do not actually send anything to the GPU; merely generate the cmdstream as fast as possible. Disables framebuffer writes */ |
| 50 | //#define DRY_RUN |
| 51 | |
| 52 | #define SET_BIT(lval, bit, cond) \ |
| 53 | if (cond) \ |
| 54 | lval |= (bit); \ |
| 55 | else \ |
| 56 | lval &= ~(bit); |
| 57 | |
| 58 | /* TODO: Sample size, etc */ |
| 59 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 60 | /* True for t6XX, false for t8xx. TODO: Run-time settable for automatic |
| 61 | * hardware configuration. */ |
| 62 | |
| 63 | static bool is_t6xx = false; |
| 64 | |
| 65 | /* If set, we'll require the use of single render-target framebuffer |
| 66 | * descriptors (SFBD), for older hardware -- specifically, <T760 hardware, If |
| 67 | * false, we'll use the MFBD no matter what. New hardware -does- retain support |
| 68 | * for SFBD, and in theory we could flip between them on a per-RT basis, but |
| 69 | * there's no real advantage to doing so */ |
| 70 | |
| 71 | static bool require_sfbd = false; |
| 72 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 73 | static void |
| 74 | panfrost_set_framebuffer_msaa(struct panfrost_context *ctx, bool enabled) |
| 75 | { |
| 76 | SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_HAS_MSAA, enabled); |
| 77 | SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_MSAA, !enabled); |
| 78 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 79 | if (require_sfbd) { |
| 80 | SET_BIT(ctx->fragment_sfbd.format, MALI_FRAMEBUFFER_MSAA_A | MALI_FRAMEBUFFER_MSAA_B, enabled); |
| 81 | } else { |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 82 | SET_BIT(ctx->fragment_rts[0].format.flags, MALI_MFBD_FORMAT_MSAA, enabled); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 83 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 84 | SET_BIT(ctx->fragment_mfbd.unk1, (1 << 4) | (1 << 1), enabled); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 85 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 86 | /* XXX */ |
| 87 | ctx->fragment_mfbd.rt_count_2 = enabled ? 4 : 1; |
| 88 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 89 | } |
| 90 | |
| 91 | /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically |
| 92 | * indepdent between color buffers and depth/stencil). To enable, we allocate |
| 93 | * the AFBC metadata buffer and mark that it is enabled. We do -not- actually |
| 94 | * edit the fragment job here. This routine should be called ONCE per |
| 95 | * AFBC-compressed buffer, rather than on every frame. */ |
| 96 | |
| 97 | static void |
| 98 | panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) |
| 99 | { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 100 | if (require_sfbd) { |
| 101 | printf("AFBC not supported yet on SFBD\n"); |
| 102 | assert(0); |
| 103 | } |
| 104 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 105 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 106 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 107 | /* AFBC metadata is 16 bytes per tile */ |
| 108 | int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 109 | int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 110 | int bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); |
| 111 | int stride = bytes_per_pixel * rsrc->base.width0; /* TODO: Alignment? */ |
| 112 | |
| 113 | stride *= 2; /* TODO: Should this be carried over? */ |
| 114 | int main_size = stride * rsrc->base.height0; |
| 115 | rsrc->bo->afbc_metadata_size = tile_w * tile_h * 16; |
| 116 | |
| 117 | /* Allocate the AFBC slab itself, large enough to hold the above */ |
| 118 | screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, |
| 119 | (rsrc->bo->afbc_metadata_size + main_size + 4095) / 4096, |
| 120 | true, 0, 0, 0); |
| 121 | |
| 122 | rsrc->bo->has_afbc = true; |
| 123 | |
| 124 | /* Compressed textured reads use a tagged pointer to the metadata */ |
| 125 | |
| 126 | rsrc->bo->gpu[0] = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1); |
| 127 | rsrc->bo->cpu[0] = rsrc->bo->afbc_slab.cpu; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 128 | } |
| 129 | |
| 130 | static void |
| 131 | panfrost_enable_checksum(struct panfrost_context *ctx, struct panfrost_resource *rsrc) |
| 132 | { |
| 133 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 134 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 135 | int tile_w = (rsrc->base.width0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 136 | int tile_h = (rsrc->base.height0 + (MALI_TILE_LENGTH - 1)) >> MALI_TILE_SHIFT; |
| 137 | |
| 138 | /* 8 byte checksum per tile */ |
| 139 | rsrc->bo->checksum_stride = tile_w * 8; |
| 140 | int pages = (((rsrc->bo->checksum_stride * tile_h) + 4095) / 4096); |
| 141 | screen->driver->allocate_slab(screen, &rsrc->bo->checksum_slab, pages, false, 0, 0, 0); |
| 142 | |
| 143 | rsrc->bo->has_checksum = true; |
| 144 | } |
| 145 | |
| 146 | /* ..by contrast, this routine runs for every FRAGMENT job, but does no |
| 147 | * allocation. AFBC is enabled on a per-surface basis */ |
| 148 | |
| 149 | static void |
| 150 | panfrost_set_fragment_afbc(struct panfrost_context *ctx) |
| 151 | { |
| 152 | for (int cb = 0; cb < ctx->pipe_framebuffer.nr_cbufs; ++cb) { |
| 153 | struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[cb]->texture; |
| 154 | |
| 155 | /* Non-AFBC is the default */ |
| 156 | if (!rsrc->bo->has_afbc) |
| 157 | continue; |
| 158 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 159 | if (require_sfbd) { |
| 160 | fprintf(stderr, "Color AFBC not supported on SFBD\n"); |
| 161 | assert(0); |
| 162 | } |
| 163 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 164 | /* Enable AFBC for the render target */ |
| 165 | ctx->fragment_rts[0].afbc.metadata = rsrc->bo->afbc_slab.gpu; |
| 166 | ctx->fragment_rts[0].afbc.stride = 0; |
| 167 | ctx->fragment_rts[0].afbc.unk = 0x30009; |
| 168 | |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 169 | ctx->fragment_rts[0].format.flags |= MALI_MFBD_FORMAT_AFBC; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 170 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 171 | /* Point rendering to our special framebuffer */ |
| 172 | ctx->fragment_rts[0].framebuffer = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; |
| 173 | |
| 174 | /* WAT? Stride is diff from the scanout case */ |
| 175 | ctx->fragment_rts[0].framebuffer_stride = ctx->pipe_framebuffer.width * 2 * 4; |
| 176 | } |
| 177 | |
| 178 | /* Enable depth/stencil AFBC for the framebuffer (not the render target) */ |
| 179 | if (ctx->pipe_framebuffer.zsbuf) { |
| 180 | struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture; |
| 181 | |
| 182 | if (rsrc->bo->has_afbc) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 183 | if (require_sfbd) { |
| 184 | fprintf(stderr, "Depth AFBC not supported on SFBD\n"); |
| 185 | assert(0); |
| 186 | } |
| 187 | |
| 188 | ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 189 | |
| 190 | ctx->fragment_extra.ds_afbc.depth_stencil_afbc_metadata = rsrc->bo->afbc_slab.gpu; |
| 191 | ctx->fragment_extra.ds_afbc.depth_stencil_afbc_stride = 0; |
| 192 | |
| 193 | ctx->fragment_extra.ds_afbc.depth_stencil = rsrc->bo->afbc_slab.gpu + rsrc->bo->afbc_metadata_size; |
| 194 | |
| 195 | ctx->fragment_extra.ds_afbc.zero1 = 0x10009; |
| 196 | ctx->fragment_extra.ds_afbc.padding = 0x1000; |
| 197 | |
| 198 | ctx->fragment_extra.unk = 0x435; /* General 0x400 in all unks. 0x5 for depth/stencil. 0x10 for AFBC encoded depth stencil. Unclear where the 0x20 is from */ |
| 199 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 200 | ctx->fragment_mfbd.unk3 |= 0x400; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 201 | } |
| 202 | } |
| 203 | |
| 204 | /* For the special case of a depth-only FBO, we need to attach a dummy render target */ |
| 205 | |
| 206 | if (ctx->pipe_framebuffer.nr_cbufs == 0) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 207 | if (require_sfbd) { |
| 208 | fprintf(stderr, "Depth-only FBO not supported on SFBD\n"); |
| 209 | assert(0); |
| 210 | } |
| 211 | |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 212 | struct mali_rt_format null_rt = { |
| 213 | .unk1 = 0x4000000, |
| 214 | .unk4 = 0x8 |
| 215 | }; |
| 216 | |
| 217 | ctx->fragment_rts[0].format = null_rt; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 218 | ctx->fragment_rts[0].framebuffer = 0; |
| 219 | ctx->fragment_rts[0].framebuffer_stride = 0; |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | /* Framebuffer descriptor */ |
| 224 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 225 | static void |
| 226 | panfrost_set_framebuffer_resolution(struct mali_single_framebuffer *fb, int w, int h) |
| 227 | { |
| 228 | fb->width = MALI_POSITIVE(w); |
| 229 | fb->height = MALI_POSITIVE(h); |
| 230 | |
| 231 | /* No idea why this is needed, but it's how resolution_check is |
| 232 | * calculated. It's not clear to us yet why the hardware wants this. |
| 233 | * The formula itself was discovered mostly by manual bruteforce and |
| 234 | * aggressive algebraic simplification. */ |
| 235 | |
| 236 | fb->resolution_check = ((w + h) / 3) << 4; |
| 237 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 238 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 239 | static struct mali_single_framebuffer |
| 240 | panfrost_emit_sfbd(struct panfrost_context *ctx) |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 241 | { |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 242 | struct mali_single_framebuffer framebuffer = { |
| 243 | .unknown2 = 0x1f, |
| 244 | .format = 0x30000000, |
| 245 | .clear_flags = 0x1000, |
| 246 | .unknown_address_0 = ctx->scratchpad.gpu, |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 247 | .unknown_address_1 = ctx->misc_0.gpu, |
| 248 | .unknown_address_2 = ctx->misc_0.gpu + 40960, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 249 | .tiler_flags = 0xf0, |
| 250 | .tiler_heap_free = ctx->tiler_heap.gpu, |
| 251 | .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, |
| 252 | }; |
| 253 | |
| 254 | panfrost_set_framebuffer_resolution(&framebuffer, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 255 | |
| 256 | return framebuffer; |
| 257 | } |
| 258 | |
| 259 | static struct bifrost_framebuffer |
| 260 | panfrost_emit_mfbd(struct panfrost_context *ctx) |
| 261 | { |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 262 | struct bifrost_framebuffer framebuffer = { |
Alyssa Rosenzweig | 77fea55 | 2019-02-21 05:57:29 +0000 | [diff] [blame] | 263 | /* It is not yet clear what tiler_meta means or how it's |
| 264 | * calculated, but we can tell the lower 32-bits are a |
| 265 | * (monotonically increasing?) function of tile count and |
| 266 | * geometry complexity; I suspect it defines a memory size of |
| 267 | * some kind? for the tiler. It's really unclear at the |
| 268 | * moment... but to add to the confusion, the hardware is happy |
| 269 | * enough to accept a zero in this field, so we don't even have |
| 270 | * to worry about it right now. |
| 271 | * |
| 272 | * The byte (just after the 32-bit mark) is much more |
| 273 | * interesting. The higher nibble I've only ever seen as 0xF, |
| 274 | * but the lower one I've seen as 0x0 or 0xF, and it's not |
| 275 | * obvious what the difference is. But what -is- obvious is |
| 276 | * that when the lower nibble is zero, performance is severely |
| 277 | * degraded compared to when the lower nibble is set. |
| 278 | * Evidently, that nibble enables some sort of fast path, |
| 279 | * perhaps relating to caching or tile flush? Regardless, at |
| 280 | * this point there's no clear reason not to set it, aside from |
| 281 | * substantially increased memory requirements (of the misc_0 |
| 282 | * buffer) */ |
| 283 | |
| 284 | .tiler_meta = ((uint64_t) 0xff << 32) | 0x0, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 285 | |
| 286 | .width1 = MALI_POSITIVE(ctx->pipe_framebuffer.width), |
| 287 | .height1 = MALI_POSITIVE(ctx->pipe_framebuffer.height), |
| 288 | .width2 = MALI_POSITIVE(ctx->pipe_framebuffer.width), |
| 289 | .height2 = MALI_POSITIVE(ctx->pipe_framebuffer.height), |
| 290 | |
| 291 | .unk1 = 0x1080, |
| 292 | |
| 293 | /* TODO: MRT */ |
| 294 | .rt_count_1 = MALI_POSITIVE(1), |
| 295 | .rt_count_2 = 4, |
| 296 | |
| 297 | .unknown2 = 0x1f, |
| 298 | |
Alyssa Rosenzweig | 77fea55 | 2019-02-21 05:57:29 +0000 | [diff] [blame] | 299 | /* Corresponds to unknown_address_X of SFBD */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 300 | .scratchpad = ctx->scratchpad.gpu, |
| 301 | .tiler_scratch_start = ctx->misc_0.gpu, |
Alyssa Rosenzweig | 77fea55 | 2019-02-21 05:57:29 +0000 | [diff] [blame] | 302 | |
| 303 | /* The constant added here is, like the lower word of |
| 304 | * tiler_meta, (loosely) another product of framebuffer size |
| 305 | * and geometry complexity. It must be sufficiently large for |
| 306 | * the tiler_meta fast path to work; if it's too small, there |
| 307 | * will be DATA_INVALID_FAULTs. Conversely, it must be less |
| 308 | * than the total size of misc_0, or else there's no room. It's |
| 309 | * possible this constant configures a partition between two |
| 310 | * parts of misc_0? We haven't investigated the functionality, |
| 311 | * as these buffers are internally used by the hardware |
| 312 | * (presumably by the tiler) but not seemingly touched by the driver |
| 313 | */ |
| 314 | |
| 315 | .tiler_scratch_middle = ctx->misc_0.gpu + 0xf0000, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 316 | |
| 317 | .tiler_heap_start = ctx->tiler_heap.gpu, |
| 318 | .tiler_heap_end = ctx->tiler_heap.gpu + ctx->tiler_heap.size, |
| 319 | }; |
| 320 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 321 | return framebuffer; |
| 322 | } |
| 323 | |
| 324 | /* Are we currently rendering to the screen (rather than an FBO)? */ |
| 325 | |
| 326 | static bool |
| 327 | panfrost_is_scanout(struct panfrost_context *ctx) |
| 328 | { |
| 329 | /* If there is no color buffer, it's an FBO */ |
| 330 | if (!ctx->pipe_framebuffer.nr_cbufs) |
| 331 | return false; |
| 332 | |
| 333 | /* If we're too early that no framebuffer was sent, it's scanout */ |
| 334 | if (!ctx->pipe_framebuffer.cbufs[0]) |
| 335 | return true; |
| 336 | |
| 337 | return ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_DISPLAY_TARGET || |
| 338 | ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SCANOUT || |
| 339 | ctx->pipe_framebuffer.cbufs[0]->texture->bind & PIPE_BIND_SHARED; |
| 340 | } |
| 341 | |
| 342 | /* The above function is for generalised fbd emission, used in both fragment as |
| 343 | * well as vertex/tiler payloads. This payload is specific to fragment |
| 344 | * payloads. */ |
| 345 | |
| 346 | static void |
| 347 | panfrost_new_frag_framebuffer(struct panfrost_context *ctx) |
| 348 | { |
| 349 | mali_ptr framebuffer; |
| 350 | int stride; |
| 351 | |
| 352 | if (ctx->pipe_framebuffer.nr_cbufs > 0) { |
| 353 | framebuffer = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture)->bo->gpu[0]; |
| 354 | stride = util_format_get_stride(ctx->pipe_framebuffer.cbufs[0]->format, ctx->pipe_framebuffer.width); |
| 355 | } else { |
| 356 | /* Depth-only framebuffer -> dummy RT */ |
| 357 | framebuffer = 0; |
| 358 | stride = 0; |
| 359 | } |
| 360 | |
| 361 | /* The default is upside down from OpenGL's perspective. */ |
| 362 | if (panfrost_is_scanout(ctx)) { |
| 363 | framebuffer += stride * (ctx->pipe_framebuffer.height - 1); |
| 364 | stride = -stride; |
| 365 | } |
| 366 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 367 | if (require_sfbd) { |
| 368 | struct mali_single_framebuffer fb = panfrost_emit_sfbd(ctx); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 369 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 370 | fb.framebuffer = framebuffer; |
| 371 | fb.stride = stride; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 372 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 373 | fb.format = 0xb84e0281; /* RGB32, no MSAA */ |
| 374 | memcpy(&ctx->fragment_sfbd, &fb, sizeof(fb)); |
| 375 | } else { |
| 376 | struct bifrost_framebuffer fb = panfrost_emit_mfbd(ctx); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 377 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 378 | /* XXX: MRT case */ |
| 379 | fb.rt_count_2 = 1; |
| 380 | fb.unk3 = 0x100; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 381 | |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 382 | /* By default, Gallium seems to need a BGR framebuffer */ |
| 383 | unsigned char bgra[4] = { |
| 384 | PIPE_SWIZZLE_Z, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_X, PIPE_SWIZZLE_W |
| 385 | }; |
| 386 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 387 | struct bifrost_render_target rt = { |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 388 | .format = { |
| 389 | .unk1 = 0x4000000, |
| 390 | .unk2 = 0x1, |
| 391 | .nr_channels = MALI_POSITIVE(4), |
| 392 | .flags = 0x444, |
| 393 | .swizzle = panfrost_translate_swizzle_4(bgra), |
| 394 | .unk4 = 0x8 |
| 395 | }, |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 396 | .framebuffer = framebuffer, |
| 397 | .framebuffer_stride = (stride / 16) & 0xfffffff, |
| 398 | }; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 399 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 400 | memcpy(&ctx->fragment_rts[0], &rt, sizeof(rt)); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 401 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 402 | memset(&ctx->fragment_extra, 0, sizeof(ctx->fragment_extra)); |
| 403 | memcpy(&ctx->fragment_mfbd, &fb, sizeof(fb)); |
| 404 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 405 | } |
| 406 | |
| 407 | /* Maps float 0.0-1.0 to int 0x00-0xFF */ |
| 408 | static uint8_t |
| 409 | normalised_float_to_u8(float f) |
| 410 | { |
| 411 | return (uint8_t) (int) (f * 255.0f); |
| 412 | } |
| 413 | |
| 414 | static void |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 415 | panfrost_clear_sfbd(struct panfrost_context *ctx, |
| 416 | bool clear_color, |
| 417 | bool clear_depth, |
| 418 | bool clear_stencil, |
| 419 | uint32_t packed_color, |
| 420 | double depth, unsigned stencil |
| 421 | ) |
| 422 | { |
| 423 | struct mali_single_framebuffer *sfbd = &ctx->fragment_sfbd; |
| 424 | |
| 425 | if (clear_color) { |
| 426 | sfbd->clear_color_1 = packed_color; |
| 427 | sfbd->clear_color_2 = packed_color; |
| 428 | sfbd->clear_color_3 = packed_color; |
| 429 | sfbd->clear_color_4 = packed_color; |
| 430 | } |
| 431 | |
| 432 | if (clear_depth) { |
| 433 | sfbd->clear_depth_1 = depth; |
| 434 | sfbd->clear_depth_2 = depth; |
| 435 | sfbd->clear_depth_3 = depth; |
| 436 | sfbd->clear_depth_4 = depth; |
| 437 | } |
| 438 | |
| 439 | if (clear_stencil) { |
| 440 | sfbd->clear_stencil = stencil; |
| 441 | } |
| 442 | |
| 443 | /* Setup buffers */ |
| 444 | |
| 445 | if (clear_depth) { |
| 446 | sfbd->depth_buffer = ctx->depth_stencil_buffer.gpu; |
| 447 | sfbd->depth_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; |
| 448 | } |
| 449 | |
| 450 | if (clear_stencil) { |
| 451 | sfbd->stencil_buffer = ctx->depth_stencil_buffer.gpu; |
| 452 | sfbd->stencil_buffer_enable = MALI_DEPTH_STENCIL_ENABLE; |
| 453 | } |
| 454 | |
| 455 | /* Set flags based on what has been cleared, for the SFBD case */ |
| 456 | /* XXX: What do these flags mean? */ |
| 457 | int clear_flags = 0x101100; |
| 458 | |
| 459 | if (clear_color && clear_depth && clear_stencil) { |
| 460 | /* On a tiler like this, it's fastest to clear all three buffers at once */ |
| 461 | |
| 462 | clear_flags |= MALI_CLEAR_FAST; |
| 463 | } else { |
| 464 | clear_flags |= MALI_CLEAR_SLOW; |
| 465 | |
| 466 | if (clear_stencil) |
| 467 | clear_flags |= MALI_CLEAR_SLOW_STENCIL; |
| 468 | } |
| 469 | |
| 470 | sfbd->clear_flags = clear_flags; |
| 471 | } |
| 472 | |
| 473 | static void |
| 474 | panfrost_clear_mfbd(struct panfrost_context *ctx, |
| 475 | bool clear_color, |
| 476 | bool clear_depth, |
| 477 | bool clear_stencil, |
| 478 | uint32_t packed_color, |
| 479 | double depth, unsigned stencil |
| 480 | ) |
| 481 | { |
| 482 | struct bifrost_render_target *buffer_color = &ctx->fragment_rts[0]; |
| 483 | struct bifrost_framebuffer *buffer_ds = &ctx->fragment_mfbd; |
| 484 | |
| 485 | if (clear_color) { |
| 486 | buffer_color->clear_color_1 = packed_color; |
| 487 | buffer_color->clear_color_2 = packed_color; |
| 488 | buffer_color->clear_color_3 = packed_color; |
| 489 | buffer_color->clear_color_4 = packed_color; |
| 490 | } |
| 491 | |
| 492 | if (clear_depth) { |
| 493 | buffer_ds->clear_depth = depth; |
| 494 | } |
| 495 | |
| 496 | if (clear_stencil) { |
| 497 | buffer_ds->clear_stencil = stencil; |
| 498 | } |
| 499 | |
| 500 | if (clear_depth || clear_stencil) { |
| 501 | /* Setup combined 24/8 depth/stencil */ |
| 502 | ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; |
| 503 | //ctx->fragment_extra.unk = /*0x405*/0x404; |
| 504 | ctx->fragment_extra.unk = 0x405; |
| 505 | ctx->fragment_extra.ds_linear.depth = ctx->depth_stencil_buffer.gpu; |
| 506 | ctx->fragment_extra.ds_linear.depth_stride = ctx->pipe_framebuffer.width * 4; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | static void |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 511 | panfrost_clear( |
| 512 | struct pipe_context *pipe, |
| 513 | unsigned buffers, |
| 514 | const union pipe_color_union *color, |
| 515 | double depth, unsigned stencil) |
| 516 | { |
| 517 | struct panfrost_context *ctx = pan_context(pipe); |
| 518 | |
| 519 | if (!color) { |
| 520 | printf("Warning: clear color null?\n"); |
| 521 | return; |
| 522 | } |
| 523 | |
| 524 | /* Save settings for FBO switch */ |
| 525 | ctx->last_clear.buffers = buffers; |
| 526 | ctx->last_clear.color = color; |
| 527 | ctx->last_clear.depth = depth; |
| 528 | ctx->last_clear.depth = depth; |
| 529 | |
| 530 | bool clear_color = buffers & PIPE_CLEAR_COLOR; |
| 531 | bool clear_depth = buffers & PIPE_CLEAR_DEPTH; |
| 532 | bool clear_stencil = buffers & PIPE_CLEAR_STENCIL; |
| 533 | |
| 534 | /* Remember that we've done something */ |
| 535 | ctx->frame_cleared = true; |
| 536 | |
| 537 | /* Alpha clear only meaningful without alpha channel */ |
| 538 | bool has_alpha = ctx->pipe_framebuffer.nr_cbufs && util_format_has_alpha(ctx->pipe_framebuffer.cbufs[0]->format); |
| 539 | float clear_alpha = has_alpha ? color->f[3] : 1.0f; |
| 540 | |
| 541 | uint32_t packed_color = |
| 542 | (normalised_float_to_u8(clear_alpha) << 24) | |
| 543 | (normalised_float_to_u8(color->f[2]) << 16) | |
| 544 | (normalised_float_to_u8(color->f[1]) << 8) | |
| 545 | (normalised_float_to_u8(color->f[0]) << 0); |
| 546 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 547 | if (require_sfbd) { |
| 548 | panfrost_clear_sfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 549 | } else { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 550 | panfrost_clear_mfbd(ctx, clear_color, clear_depth, clear_stencil, packed_color, depth, stencil); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 551 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 552 | } |
| 553 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 554 | static mali_ptr |
| 555 | panfrost_attach_vt_mfbd(struct panfrost_context *ctx) |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 556 | { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 557 | /* MFBD needs a sequential semi-render target upload, but what exactly this is, is beyond me for now */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 558 | struct bifrost_render_target rts_list[] = { |
| 559 | { |
| 560 | .chunknown = { |
| 561 | .unk = 0x30005, |
| 562 | }, |
| 563 | .framebuffer = ctx->misc_0.gpu, |
| 564 | .zero2 = 0x3, |
| 565 | }, |
| 566 | }; |
| 567 | |
| 568 | /* Allocate memory for the three components */ |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 569 | int size = 1024 + sizeof(ctx->vt_framebuffer_mfbd) + sizeof(rts_list); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 570 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); |
| 571 | |
| 572 | /* Opaque 1024-block */ |
| 573 | rts_list[0].chunknown.pointer = transfer.gpu; |
| 574 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 575 | memcpy(transfer.cpu + 1024, &ctx->vt_framebuffer_mfbd, sizeof(ctx->vt_framebuffer_mfbd)); |
| 576 | memcpy(transfer.cpu + 1024 + sizeof(ctx->vt_framebuffer_mfbd), rts_list, sizeof(rts_list)); |
| 577 | |
| 578 | return (transfer.gpu + 1024) | MALI_MFBD; |
| 579 | } |
| 580 | |
| 581 | static mali_ptr |
| 582 | panfrost_attach_vt_sfbd(struct panfrost_context *ctx) |
| 583 | { |
| 584 | return panfrost_upload_transient(ctx, &ctx->vt_framebuffer_sfbd, sizeof(ctx->vt_framebuffer_sfbd)) | MALI_SFBD; |
| 585 | } |
| 586 | |
| 587 | static void |
| 588 | panfrost_attach_vt_framebuffer(struct panfrost_context *ctx) |
| 589 | { |
| 590 | mali_ptr framebuffer = require_sfbd ? |
| 591 | panfrost_attach_vt_sfbd(ctx) : |
| 592 | panfrost_attach_vt_mfbd(ctx); |
| 593 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 594 | ctx->payload_vertex.postfix.framebuffer = framebuffer; |
| 595 | ctx->payload_tiler.postfix.framebuffer = framebuffer; |
| 596 | } |
| 597 | |
| 598 | static void |
| 599 | panfrost_viewport(struct panfrost_context *ctx, |
Alyssa Rosenzweig | 49985ce | 2019-02-15 06:45:07 +0000 | [diff] [blame] | 600 | float depth_clip_near, |
| 601 | float depth_clip_far, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 602 | int viewport_x0, int viewport_y0, |
| 603 | int viewport_x1, int viewport_y1) |
| 604 | { |
Alyssa Rosenzweig | 49985ce | 2019-02-15 06:45:07 +0000 | [diff] [blame] | 605 | /* Clip bounds are encoded as floats. The viewport itself is encoded as |
| 606 | * (somewhat) asymmetric ints. */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 607 | |
| 608 | struct mali_viewport ret = { |
Alyssa Rosenzweig | 2c6a7fb | 2019-02-16 01:04:07 +0000 | [diff] [blame] | 609 | /* By default, do no viewport clipping, i.e. clip to (-inf, |
| 610 | * inf) in each direction. Clipping to the viewport in theory |
| 611 | * should work, but in practice causes issues when we're not |
| 612 | * explicitly trying to scissor */ |
| 613 | |
| 614 | .clip_minx = -inff, |
| 615 | .clip_miny = -inff, |
| 616 | .clip_maxx = inff, |
| 617 | .clip_maxy = inff, |
| 618 | |
| 619 | /* We always perform depth clipping (TODO: Can this be disabled?) */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 620 | |
Alyssa Rosenzweig | 49985ce | 2019-02-15 06:45:07 +0000 | [diff] [blame] | 621 | .clip_minz = depth_clip_near, |
| 622 | .clip_maxz = depth_clip_far, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 623 | |
| 624 | .viewport0 = { viewport_x0, viewport_y0 }, |
| 625 | .viewport1 = { MALI_POSITIVE(viewport_x1), MALI_POSITIVE(viewport_y1) }, |
| 626 | }; |
| 627 | |
| 628 | memcpy(ctx->viewport, &ret, sizeof(ret)); |
| 629 | } |
| 630 | |
| 631 | /* Reset per-frame context, called on context initialisation as well as after |
| 632 | * flushing a frame */ |
| 633 | |
| 634 | static void |
| 635 | panfrost_invalidate_frame(struct panfrost_context *ctx) |
| 636 | { |
| 637 | unsigned transient_count = ctx->transient_pools[ctx->cmdstream_i].entry_index*ctx->transient_pools[0].entry_size + ctx->transient_pools[ctx->cmdstream_i].entry_offset; |
| 638 | printf("Uploaded transient %d bytes\n", transient_count); |
| 639 | |
| 640 | /* Rotate cmdstream */ |
| 641 | if ((++ctx->cmdstream_i) == (sizeof(ctx->transient_pools) / sizeof(ctx->transient_pools[0]))) |
| 642 | ctx->cmdstream_i = 0; |
| 643 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 644 | if (require_sfbd) |
| 645 | ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); |
| 646 | else |
| 647 | ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); |
| 648 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 649 | panfrost_new_frag_framebuffer(ctx); |
| 650 | |
| 651 | /* Reset varyings allocated */ |
| 652 | ctx->varying_height = 0; |
| 653 | |
| 654 | /* The transient cmdstream is dirty every frame; the only bits worth preserving |
| 655 | * (textures, shaders, etc) are in other buffers anyways */ |
| 656 | |
| 657 | ctx->transient_pools[ctx->cmdstream_i].entry_index = 0; |
| 658 | ctx->transient_pools[ctx->cmdstream_i].entry_offset = 0; |
| 659 | |
| 660 | /* Regenerate payloads */ |
| 661 | panfrost_attach_vt_framebuffer(ctx); |
| 662 | |
| 663 | if (ctx->rasterizer) |
| 664 | ctx->dirty |= PAN_DIRTY_RASTERIZER; |
| 665 | |
| 666 | /* XXX */ |
| 667 | ctx->dirty |= PAN_DIRTY_SAMPLERS | PAN_DIRTY_TEXTURES; |
| 668 | } |
| 669 | |
| 670 | /* In practice, every field of these payloads should be configurable |
| 671 | * arbitrarily, which means these functions are basically catch-all's for |
| 672 | * as-of-yet unwavering unknowns */ |
| 673 | |
| 674 | static void |
| 675 | panfrost_emit_vertex_payload(struct panfrost_context *ctx) |
| 676 | { |
| 677 | struct midgard_payload_vertex_tiler payload = { |
| 678 | .prefix = { |
| 679 | .workgroups_z_shift = 32, |
| 680 | .workgroups_x_shift_2 = 0x2, |
| 681 | .workgroups_x_shift_3 = 0x5, |
| 682 | }, |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 683 | .gl_enables = 0x4 | (is_t6xx ? 0 : 0x2), |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 684 | }; |
| 685 | |
| 686 | memcpy(&ctx->payload_vertex, &payload, sizeof(payload)); |
| 687 | } |
| 688 | |
| 689 | static void |
| 690 | panfrost_emit_tiler_payload(struct panfrost_context *ctx) |
| 691 | { |
| 692 | struct midgard_payload_vertex_tiler payload = { |
| 693 | .prefix = { |
| 694 | .workgroups_z_shift = 32, |
| 695 | .workgroups_x_shift_2 = 0x2, |
| 696 | .workgroups_x_shift_3 = 0x6, |
| 697 | |
| 698 | .zero1 = 0xffff, /* Why is this only seen on test-quad-textured? */ |
| 699 | }, |
| 700 | }; |
| 701 | |
| 702 | /* Reserve the viewport */ |
| 703 | struct panfrost_transfer t = panfrost_allocate_chunk(ctx, sizeof(struct mali_viewport), HEAP_DESCRIPTOR); |
| 704 | ctx->viewport = (struct mali_viewport *) t.cpu; |
| 705 | payload.postfix.viewport = t.gpu; |
| 706 | |
| 707 | memcpy(&ctx->payload_tiler, &payload, sizeof(payload)); |
| 708 | } |
| 709 | |
| 710 | static unsigned |
| 711 | translate_tex_wrap(enum pipe_tex_wrap w) |
| 712 | { |
| 713 | switch (w) { |
| 714 | case PIPE_TEX_WRAP_REPEAT: |
| 715 | return MALI_WRAP_REPEAT; |
| 716 | |
| 717 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
| 718 | return MALI_WRAP_CLAMP_TO_EDGE; |
| 719 | |
| 720 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
| 721 | return MALI_WRAP_CLAMP_TO_BORDER; |
| 722 | |
| 723 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
| 724 | return MALI_WRAP_MIRRORED_REPEAT; |
| 725 | |
| 726 | default: |
| 727 | assert(0); |
| 728 | return 0; |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | static unsigned |
| 733 | translate_tex_filter(enum pipe_tex_filter f) |
| 734 | { |
| 735 | switch (f) { |
| 736 | case PIPE_TEX_FILTER_NEAREST: |
| 737 | return MALI_NEAREST; |
| 738 | |
| 739 | case PIPE_TEX_FILTER_LINEAR: |
| 740 | return MALI_LINEAR; |
| 741 | |
| 742 | default: |
| 743 | assert(0); |
| 744 | return 0; |
| 745 | } |
| 746 | } |
| 747 | |
| 748 | static unsigned |
| 749 | translate_mip_filter(enum pipe_tex_mipfilter f) |
| 750 | { |
| 751 | return (f == PIPE_TEX_MIPFILTER_LINEAR) ? MALI_MIP_LINEAR : 0; |
| 752 | } |
| 753 | |
| 754 | static unsigned |
| 755 | panfrost_translate_compare_func(enum pipe_compare_func in) |
| 756 | { |
| 757 | switch (in) { |
| 758 | case PIPE_FUNC_NEVER: |
| 759 | return MALI_FUNC_NEVER; |
| 760 | |
| 761 | case PIPE_FUNC_LESS: |
| 762 | return MALI_FUNC_LESS; |
| 763 | |
| 764 | case PIPE_FUNC_EQUAL: |
| 765 | return MALI_FUNC_EQUAL; |
| 766 | |
| 767 | case PIPE_FUNC_LEQUAL: |
| 768 | return MALI_FUNC_LEQUAL; |
| 769 | |
| 770 | case PIPE_FUNC_GREATER: |
| 771 | return MALI_FUNC_GREATER; |
| 772 | |
| 773 | case PIPE_FUNC_NOTEQUAL: |
| 774 | return MALI_FUNC_NOTEQUAL; |
| 775 | |
| 776 | case PIPE_FUNC_GEQUAL: |
| 777 | return MALI_FUNC_GEQUAL; |
| 778 | |
| 779 | case PIPE_FUNC_ALWAYS: |
| 780 | return MALI_FUNC_ALWAYS; |
| 781 | } |
| 782 | |
| 783 | assert (0); |
| 784 | return 0; /* Unreachable */ |
| 785 | } |
| 786 | |
| 787 | static unsigned |
| 788 | panfrost_translate_alt_compare_func(enum pipe_compare_func in) |
| 789 | { |
| 790 | switch (in) { |
| 791 | case PIPE_FUNC_NEVER: |
| 792 | return MALI_ALT_FUNC_NEVER; |
| 793 | |
| 794 | case PIPE_FUNC_LESS: |
| 795 | return MALI_ALT_FUNC_LESS; |
| 796 | |
| 797 | case PIPE_FUNC_EQUAL: |
| 798 | return MALI_ALT_FUNC_EQUAL; |
| 799 | |
| 800 | case PIPE_FUNC_LEQUAL: |
| 801 | return MALI_ALT_FUNC_LEQUAL; |
| 802 | |
| 803 | case PIPE_FUNC_GREATER: |
| 804 | return MALI_ALT_FUNC_GREATER; |
| 805 | |
| 806 | case PIPE_FUNC_NOTEQUAL: |
| 807 | return MALI_ALT_FUNC_NOTEQUAL; |
| 808 | |
| 809 | case PIPE_FUNC_GEQUAL: |
| 810 | return MALI_ALT_FUNC_GEQUAL; |
| 811 | |
| 812 | case PIPE_FUNC_ALWAYS: |
| 813 | return MALI_ALT_FUNC_ALWAYS; |
| 814 | } |
| 815 | |
| 816 | assert (0); |
| 817 | return 0; /* Unreachable */ |
| 818 | } |
| 819 | |
| 820 | static unsigned |
| 821 | panfrost_translate_stencil_op(enum pipe_stencil_op in) |
| 822 | { |
| 823 | switch (in) { |
| 824 | case PIPE_STENCIL_OP_KEEP: |
| 825 | return MALI_STENCIL_KEEP; |
| 826 | |
| 827 | case PIPE_STENCIL_OP_ZERO: |
| 828 | return MALI_STENCIL_ZERO; |
| 829 | |
| 830 | case PIPE_STENCIL_OP_REPLACE: |
| 831 | return MALI_STENCIL_REPLACE; |
| 832 | |
| 833 | case PIPE_STENCIL_OP_INCR: |
| 834 | return MALI_STENCIL_INCR; |
| 835 | |
| 836 | case PIPE_STENCIL_OP_DECR: |
| 837 | return MALI_STENCIL_DECR; |
| 838 | |
| 839 | case PIPE_STENCIL_OP_INCR_WRAP: |
| 840 | return MALI_STENCIL_INCR_WRAP; |
| 841 | |
| 842 | case PIPE_STENCIL_OP_DECR_WRAP: |
| 843 | return MALI_STENCIL_DECR_WRAP; |
| 844 | |
| 845 | case PIPE_STENCIL_OP_INVERT: |
| 846 | return MALI_STENCIL_INVERT; |
| 847 | } |
| 848 | |
| 849 | assert (0); |
| 850 | return 0; /* Unreachable */ |
| 851 | } |
| 852 | |
| 853 | static void |
| 854 | panfrost_make_stencil_state(const struct pipe_stencil_state *in, struct mali_stencil_test *out) |
| 855 | { |
| 856 | out->ref = 0; /* Gallium gets it from elsewhere */ |
| 857 | |
| 858 | out->mask = in->valuemask; |
| 859 | out->func = panfrost_translate_compare_func(in->func); |
| 860 | out->sfail = panfrost_translate_stencil_op(in->fail_op); |
| 861 | out->dpfail = panfrost_translate_stencil_op(in->zfail_op); |
| 862 | out->dppass = panfrost_translate_stencil_op(in->zpass_op); |
| 863 | } |
| 864 | |
| 865 | static void |
| 866 | panfrost_default_shader_backend(struct panfrost_context *ctx) |
| 867 | { |
| 868 | struct mali_shader_meta shader = { |
| 869 | .alpha_coverage = ~MALI_ALPHA_COVERAGE(0.000000), |
| 870 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 871 | .unknown2_3 = MALI_DEPTH_FUNC(MALI_FUNC_ALWAYS) | 0x3010, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 872 | .unknown2_4 = MALI_NO_MSAA | 0x4e0, |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 873 | }; |
| 874 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 875 | if (is_t6xx) { |
| 876 | shader.unknown2_4 |= 0x10; |
| 877 | } |
| 878 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 879 | struct pipe_stencil_state default_stencil = { |
| 880 | .enabled = 0, |
| 881 | .func = PIPE_FUNC_ALWAYS, |
| 882 | .fail_op = MALI_STENCIL_KEEP, |
| 883 | .zfail_op = MALI_STENCIL_KEEP, |
| 884 | .zpass_op = MALI_STENCIL_KEEP, |
| 885 | .writemask = 0xFF, |
| 886 | .valuemask = 0xFF |
| 887 | }; |
| 888 | |
| 889 | panfrost_make_stencil_state(&default_stencil, &shader.stencil_front); |
| 890 | shader.stencil_mask_front = default_stencil.writemask; |
| 891 | |
| 892 | panfrost_make_stencil_state(&default_stencil, &shader.stencil_back); |
| 893 | shader.stencil_mask_back = default_stencil.writemask; |
| 894 | |
| 895 | if (default_stencil.enabled) |
| 896 | shader.unknown2_4 |= MALI_STENCIL_TEST; |
| 897 | |
| 898 | memcpy(&ctx->fragment_shader_core, &shader, sizeof(shader)); |
| 899 | } |
| 900 | |
| 901 | /* Generates a vertex/tiler job. This is, in some sense, the heart of the |
| 902 | * graphics command stream. It should be called once per draw, accordding to |
| 903 | * presentations. Set is_tiler for "tiler" jobs (fragment shader jobs, but in |
| 904 | * Mali parlance, "fragment" refers to framebuffer writeout). Clear it for |
| 905 | * vertex jobs. */ |
| 906 | |
| 907 | struct panfrost_transfer |
| 908 | panfrost_vertex_tiler_job(struct panfrost_context *ctx, bool is_tiler, bool is_elided_tiler) |
| 909 | { |
| 910 | /* Each draw call corresponds to two jobs, and we want to offset to leave room for the set-value job */ |
| 911 | int draw_job_index = 1 + (2 * ctx->draw_count); |
| 912 | |
| 913 | struct mali_job_descriptor_header job = { |
| 914 | .job_type = is_tiler ? JOB_TYPE_TILER : JOB_TYPE_VERTEX, |
| 915 | .job_index = draw_job_index + (is_tiler ? 1 : 0), |
| 916 | #ifdef __LP64__ |
| 917 | .job_descriptor_size = 1, |
| 918 | #endif |
| 919 | }; |
| 920 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 921 | /* Only non-elided tiler jobs have dependencies which are known at this point */ |
| 922 | |
| 923 | if (is_tiler && !is_elided_tiler) { |
| 924 | /* Tiler jobs depend on vertex jobs */ |
| 925 | |
| 926 | job.job_dependency_index_1 = draw_job_index; |
| 927 | |
| 928 | /* Tiler jobs also depend on the previous tiler job */ |
| 929 | |
| 930 | if (ctx->draw_count) |
| 931 | job.job_dependency_index_2 = draw_job_index - 1; |
| 932 | } |
| 933 | |
| 934 | struct midgard_payload_vertex_tiler *payload = is_tiler ? &ctx->payload_tiler : &ctx->payload_vertex; |
| 935 | |
| 936 | /* There's some padding hacks on 32-bit */ |
| 937 | |
| 938 | #ifdef __LP64__ |
| 939 | int offset = 0; |
| 940 | #else |
| 941 | int offset = 4; |
| 942 | #endif |
| 943 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(*payload)); |
| 944 | memcpy(transfer.cpu, &job, sizeof(job)); |
| 945 | memcpy(transfer.cpu + sizeof(job) - offset, payload, sizeof(*payload)); |
| 946 | return transfer; |
| 947 | } |
| 948 | |
| 949 | /* Generates a set value job. It's unclear what exactly this does, why it's |
| 950 | * necessary, and when to call it. */ |
| 951 | |
| 952 | static void |
| 953 | panfrost_set_value_job(struct panfrost_context *ctx) |
| 954 | { |
| 955 | struct mali_job_descriptor_header job = { |
| 956 | .job_type = JOB_TYPE_SET_VALUE, |
| 957 | .job_descriptor_size = 1, |
| 958 | .job_index = 1 + (2 * ctx->draw_count), |
| 959 | }; |
| 960 | |
| 961 | struct mali_payload_set_value payload = { |
| 962 | .out = ctx->misc_0.gpu, |
| 963 | .unknown = 0x3, |
| 964 | }; |
| 965 | |
| 966 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(job) + sizeof(payload)); |
| 967 | memcpy(transfer.cpu, &job, sizeof(job)); |
| 968 | memcpy(transfer.cpu + sizeof(job), &payload, sizeof(payload)); |
| 969 | |
| 970 | ctx->u_set_value_job = (struct mali_job_descriptor_header *) transfer.cpu; |
| 971 | ctx->set_value_job = transfer.gpu; |
| 972 | } |
| 973 | |
| 974 | /* Generate a fragment job. This should be called once per frame. (According to |
| 975 | * presentations, this is supposed to correspond to eglSwapBuffers) */ |
| 976 | |
| 977 | mali_ptr |
| 978 | panfrost_fragment_job(struct panfrost_context *ctx) |
| 979 | { |
| 980 | /* Update fragment FBD */ |
| 981 | panfrost_set_fragment_afbc(ctx); |
| 982 | |
| 983 | if (ctx->pipe_framebuffer.nr_cbufs == 1) { |
| 984 | struct panfrost_resource *rsrc = (struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[0]->texture; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 985 | |
| 986 | if (rsrc->bo->has_checksum) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 987 | if (require_sfbd) { |
| 988 | fprintf(stderr, "Checksumming not supported on SFBD\n"); |
| 989 | assert(0); |
| 990 | } |
| 991 | |
| 992 | int stride = util_format_get_stride(rsrc->base.format, rsrc->base.width0); |
| 993 | |
| 994 | ctx->fragment_mfbd.unk3 |= MALI_MFBD_EXTRA; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 995 | ctx->fragment_extra.unk |= 0x420; |
| 996 | ctx->fragment_extra.checksum_stride = rsrc->bo->checksum_stride; |
| 997 | ctx->fragment_extra.checksum = rsrc->bo->gpu[0] + stride * rsrc->base.height0; |
| 998 | } |
| 999 | } |
| 1000 | |
| 1001 | /* The frame is complete and therefore the framebuffer descriptor is |
| 1002 | * ready for linkage and upload */ |
| 1003 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1004 | size_t sz = require_sfbd ? sizeof(struct mali_single_framebuffer) : (sizeof(struct bifrost_framebuffer) + sizeof(struct bifrost_fb_extra) + sizeof(struct bifrost_render_target) * 1); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1005 | struct panfrost_transfer fbd_t = panfrost_allocate_transient(ctx, sz); |
| 1006 | off_t offset = 0; |
| 1007 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1008 | if (require_sfbd) { |
| 1009 | /* Upload just the SFBD all at once */ |
| 1010 | memcpy(fbd_t.cpu, &ctx->fragment_sfbd, sizeof(ctx->fragment_sfbd)); |
| 1011 | offset += sizeof(ctx->fragment_sfbd); |
| 1012 | } else { |
| 1013 | /* Upload the MFBD header */ |
| 1014 | memcpy(fbd_t.cpu, &ctx->fragment_mfbd, sizeof(ctx->fragment_mfbd)); |
| 1015 | offset += sizeof(ctx->fragment_mfbd); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1016 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1017 | /* Upload extra framebuffer info if necessary */ |
| 1018 | if (ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { |
| 1019 | memcpy(fbd_t.cpu + offset, &ctx->fragment_extra, sizeof(struct bifrost_fb_extra)); |
| 1020 | offset += sizeof(struct bifrost_fb_extra); |
| 1021 | } |
| 1022 | |
| 1023 | /* Upload (single) render target */ |
| 1024 | memcpy(fbd_t.cpu + offset, &ctx->fragment_rts[0], sizeof(struct bifrost_render_target) * 1); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1025 | } |
| 1026 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1027 | /* Generate the fragment (frame) job */ |
| 1028 | |
| 1029 | struct mali_job_descriptor_header header = { |
| 1030 | .job_type = JOB_TYPE_FRAGMENT, |
| 1031 | .job_index = 1, |
| 1032 | #ifdef __LP64__ |
| 1033 | .job_descriptor_size = 1 |
| 1034 | #endif |
| 1035 | }; |
| 1036 | |
| 1037 | struct mali_payload_fragment payload = { |
| 1038 | .min_tile_coord = MALI_COORDINATE_TO_TILE_MIN(0, 0), |
| 1039 | .max_tile_coord = MALI_COORDINATE_TO_TILE_MAX(ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height), |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1040 | .framebuffer = fbd_t.gpu | (require_sfbd ? MALI_SFBD : MALI_MFBD), |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1041 | }; |
| 1042 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1043 | if (!require_sfbd && ctx->fragment_mfbd.unk3 & MALI_MFBD_EXTRA) { |
| 1044 | /* Signal that there is an extra portion of the framebuffer |
| 1045 | * descriptor */ |
| 1046 | |
| 1047 | payload.framebuffer |= 2; |
| 1048 | } |
| 1049 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1050 | /* Normally, there should be no padding. However, fragment jobs are |
| 1051 | * shared with 64-bit Bifrost systems, and accordingly there is 4-bytes |
| 1052 | * of zero padding in between. */ |
| 1053 | |
| 1054 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(header) + sizeof(payload)); |
| 1055 | memcpy(transfer.cpu, &header, sizeof(header)); |
| 1056 | memcpy(transfer.cpu + sizeof(header), &payload, sizeof(payload)); |
| 1057 | return transfer.gpu; |
| 1058 | } |
| 1059 | |
| 1060 | /* Emits attributes and varying descriptors, which should be called every draw, |
| 1061 | * excepting some obscure circumstances */ |
| 1062 | |
| 1063 | static void |
| 1064 | panfrost_emit_vertex_data(struct panfrost_context *ctx) |
| 1065 | { |
| 1066 | /* TODO: Only update the dirtied buffers */ |
| 1067 | union mali_attr attrs[PIPE_MAX_ATTRIBS]; |
| 1068 | union mali_attr varyings[PIPE_MAX_ATTRIBS]; |
| 1069 | |
| 1070 | unsigned invocation_count = MALI_NEGATIVE(ctx->payload_tiler.prefix.invocation_count); |
| 1071 | |
| 1072 | for (int i = 0; i < ctx->vertex_buffer_count; ++i) { |
| 1073 | struct pipe_vertex_buffer *buf = &ctx->vertex_buffers[i]; |
| 1074 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer.resource); |
| 1075 | |
| 1076 | /* Let's figure out the layout of the attributes in memory so |
| 1077 | * we can be smart about size computation. The idea is to |
| 1078 | * figure out the maximum src_offset, which tells us the latest |
| 1079 | * spot a vertex could start. Meanwhile, we figure out the size |
| 1080 | * of the attribute memory (assuming interleaved |
| 1081 | * representation) and tack on the max src_offset for a |
| 1082 | * reasonably good upper bound on the size. |
| 1083 | * |
| 1084 | * Proving correctness is left as an exercise to the reader. |
| 1085 | */ |
| 1086 | |
| 1087 | unsigned max_src_offset = 0; |
| 1088 | |
| 1089 | for (unsigned j = 0; j < ctx->vertex->num_elements; ++j) { |
| 1090 | if (ctx->vertex->pipe[j].vertex_buffer_index != i) continue; |
| 1091 | max_src_offset = MAX2(max_src_offset, ctx->vertex->pipe[j].src_offset); |
| 1092 | } |
| 1093 | |
| 1094 | /* Offset vertex count by draw_start to make sure we upload enough */ |
| 1095 | attrs[i].stride = buf->stride; |
| 1096 | attrs[i].size = buf->stride * (ctx->payload_vertex.draw_start + invocation_count) + max_src_offset; |
| 1097 | |
| 1098 | /* Vertex elements are -already- GPU-visible, at |
| 1099 | * rsrc->gpu. However, attribute buffers must be 64 aligned. If |
| 1100 | * it is not, for now we have to duplicate the buffer. */ |
| 1101 | |
| 1102 | mali_ptr effective_address = (rsrc->bo->gpu[0] + buf->buffer_offset); |
| 1103 | |
| 1104 | if (effective_address & 0x3F) { |
| 1105 | attrs[i].elements = panfrost_upload_transient(ctx, rsrc->bo->cpu[0] + buf->buffer_offset, attrs[i].size) | 1; |
| 1106 | } else { |
| 1107 | attrs[i].elements = effective_address | 1; |
| 1108 | } |
| 1109 | } |
| 1110 | |
| 1111 | struct panfrost_varyings *vars = &ctx->vs->variants[ctx->vs->active_variant].varyings; |
| 1112 | |
| 1113 | for (int i = 0; i < vars->varying_buffer_count; ++i) { |
| 1114 | mali_ptr varying_address = ctx->varying_mem.gpu + ctx->varying_height; |
| 1115 | |
| 1116 | varyings[i].elements = varying_address | 1; |
| 1117 | varyings[i].stride = vars->varyings_stride[i]; |
| 1118 | varyings[i].size = vars->varyings_stride[i] * invocation_count; |
| 1119 | |
| 1120 | /* If this varying has to be linked somewhere, do it now. See |
| 1121 | * pan_assemble.c for the indices. TODO: Use a more generic |
| 1122 | * linking interface */ |
| 1123 | |
| 1124 | if (i == 1) { |
| 1125 | /* gl_Position */ |
| 1126 | ctx->payload_tiler.postfix.position_varying = varying_address; |
| 1127 | } else if (i == 2) { |
| 1128 | /* gl_PointSize */ |
| 1129 | ctx->payload_tiler.primitive_size.pointer = varying_address; |
| 1130 | } |
| 1131 | |
| 1132 | /* Varyings appear to need 64-byte alignment */ |
| 1133 | ctx->varying_height += ALIGN(varyings[i].size, 64); |
| 1134 | |
| 1135 | /* Ensure that we fit */ |
| 1136 | assert(ctx->varying_height < ctx->varying_mem.size); |
| 1137 | } |
| 1138 | |
| 1139 | ctx->payload_vertex.postfix.attributes = panfrost_upload_transient(ctx, attrs, ctx->vertex_buffer_count * sizeof(union mali_attr)); |
| 1140 | |
| 1141 | mali_ptr varyings_p = panfrost_upload_transient(ctx, &varyings, vars->varying_buffer_count * sizeof(union mali_attr)); |
| 1142 | ctx->payload_vertex.postfix.varyings = varyings_p; |
| 1143 | ctx->payload_tiler.postfix.varyings = varyings_p; |
| 1144 | } |
| 1145 | |
| 1146 | /* Go through dirty flags and actualise them in the cmdstream. */ |
| 1147 | |
| 1148 | void |
| 1149 | panfrost_emit_for_draw(struct panfrost_context *ctx, bool with_vertex_data) |
| 1150 | { |
| 1151 | if (with_vertex_data) { |
| 1152 | panfrost_emit_vertex_data(ctx); |
| 1153 | } |
| 1154 | |
| 1155 | if (ctx->dirty & PAN_DIRTY_RASTERIZER) { |
| 1156 | ctx->payload_tiler.gl_enables = ctx->rasterizer->tiler_gl_enables; |
| 1157 | panfrost_set_framebuffer_msaa(ctx, ctx->rasterizer->base.multisample); |
| 1158 | } |
| 1159 | |
| 1160 | if (ctx->occlusion_query) { |
Alyssa Rosenzweig | 2d22b53 | 2019-02-14 02:44:03 +0000 | [diff] [blame] | 1161 | ctx->payload_tiler.gl_enables |= MALI_OCCLUSION_QUERY | MALI_OCCLUSION_PRECISE; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1162 | ctx->payload_tiler.postfix.occlusion_counter = ctx->occlusion_query->transfer.gpu; |
| 1163 | } |
| 1164 | |
| 1165 | if (ctx->dirty & PAN_DIRTY_VS) { |
| 1166 | assert(ctx->vs); |
| 1167 | |
| 1168 | struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; |
| 1169 | |
| 1170 | /* Late shader descriptor assignments */ |
| 1171 | vs->tripipe->texture_count = ctx->sampler_view_count[PIPE_SHADER_VERTEX]; |
| 1172 | vs->tripipe->sampler_count = ctx->sampler_count[PIPE_SHADER_VERTEX]; |
| 1173 | |
| 1174 | /* Who knows */ |
| 1175 | vs->tripipe->midgard1.unknown1 = 0x2201; |
| 1176 | |
| 1177 | ctx->payload_vertex.postfix._shader_upper = vs->tripipe_gpu >> 4; |
| 1178 | |
| 1179 | /* Varying descriptor is tied to the vertex shader. Also the |
| 1180 | * fragment shader, I suppose, but it's generated with the |
| 1181 | * vertex shader so */ |
| 1182 | |
| 1183 | struct panfrost_varyings *varyings = &ctx->vs->variants[ctx->vs->active_variant].varyings; |
| 1184 | |
| 1185 | ctx->payload_vertex.postfix.varying_meta = varyings->varyings_descriptor; |
| 1186 | ctx->payload_tiler.postfix.varying_meta = varyings->varyings_descriptor_fragment; |
| 1187 | } |
| 1188 | |
| 1189 | if (ctx->dirty & (PAN_DIRTY_RASTERIZER | PAN_DIRTY_VS)) { |
| 1190 | /* Check if we need to link the gl_PointSize varying */ |
| 1191 | assert(ctx->vs); |
| 1192 | struct panfrost_shader_state *vs = &ctx->vs->variants[ctx->vs->active_variant]; |
| 1193 | |
| 1194 | bool needs_gl_point_size = vs->writes_point_size && ctx->payload_tiler.prefix.draw_mode == MALI_POINTS; |
| 1195 | |
| 1196 | if (!needs_gl_point_size) { |
| 1197 | /* If the size is constant, write it out. Otherwise, |
| 1198 | * don't touch primitive_size (since we would clobber |
| 1199 | * the pointer there) */ |
| 1200 | |
| 1201 | ctx->payload_tiler.primitive_size.constant = ctx->rasterizer->base.line_width; |
| 1202 | } |
| 1203 | |
| 1204 | /* Set the flag for varying (pointer) point size if the shader needs that */ |
| 1205 | SET_BIT(ctx->payload_tiler.prefix.unknown_draw, MALI_DRAW_VARYING_SIZE, needs_gl_point_size); |
| 1206 | } |
| 1207 | |
| 1208 | /* TODO: Maybe dirty track FS, maybe not. For now, it's transient. */ |
| 1209 | if (ctx->fs) |
| 1210 | ctx->dirty |= PAN_DIRTY_FS; |
| 1211 | |
| 1212 | if (ctx->dirty & PAN_DIRTY_FS) { |
| 1213 | assert(ctx->fs); |
| 1214 | struct panfrost_shader_state *variant = &ctx->fs->variants[ctx->fs->active_variant]; |
| 1215 | |
| 1216 | #define COPY(name) ctx->fragment_shader_core.name = variant->tripipe->name |
| 1217 | |
| 1218 | COPY(shader); |
| 1219 | COPY(attribute_count); |
| 1220 | COPY(varying_count); |
| 1221 | COPY(midgard1.uniform_count); |
| 1222 | COPY(midgard1.work_count); |
| 1223 | COPY(midgard1.unknown2); |
| 1224 | |
| 1225 | #undef COPY |
| 1226 | /* If there is a blend shader, work registers are shared */ |
| 1227 | |
| 1228 | if (ctx->blend->has_blend_shader) |
| 1229 | ctx->fragment_shader_core.midgard1.work_count = /*MAX2(ctx->fragment_shader_core.midgard1.work_count, ctx->blend->blend_work_count)*/16; |
| 1230 | |
| 1231 | /* Set late due to depending on render state */ |
| 1232 | /* The one at the end seems to mean "1 UBO" */ |
| 1233 | ctx->fragment_shader_core.midgard1.unknown1 = MALI_NO_ALPHA_TO_COVERAGE | 0x200 | 0x2201; |
| 1234 | |
| 1235 | /* Assign texture/sample count right before upload */ |
| 1236 | ctx->fragment_shader_core.texture_count = ctx->sampler_view_count[PIPE_SHADER_FRAGMENT]; |
| 1237 | ctx->fragment_shader_core.sampler_count = ctx->sampler_count[PIPE_SHADER_FRAGMENT]; |
| 1238 | |
| 1239 | /* Assign the stencil refs late */ |
| 1240 | ctx->fragment_shader_core.stencil_front.ref = ctx->stencil_ref.ref_value[0]; |
| 1241 | ctx->fragment_shader_core.stencil_back.ref = ctx->stencil_ref.ref_value[1]; |
| 1242 | |
| 1243 | /* CAN_DISCARD should be set if the fragment shader possibly |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1244 | * contains a 'discard' instruction. It is likely this is |
| 1245 | * related to optimizations related to forward-pixel kill, as |
| 1246 | * per "Mali Performance 3: Is EGL_BUFFER_PRESERVED a good |
| 1247 | * thing?" by Peter Harris |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1248 | */ |
| 1249 | |
| 1250 | if (variant->can_discard) { |
| 1251 | ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; |
| 1252 | ctx->fragment_shader_core.midgard1.unknown1 &= ~MALI_NO_ALPHA_TO_COVERAGE; |
| 1253 | ctx->fragment_shader_core.midgard1.unknown1 |= 0x4000; |
| 1254 | ctx->fragment_shader_core.midgard1.unknown1 = 0x4200; |
| 1255 | } |
| 1256 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1257 | /* Check if we're using the default blend descriptor (fast path) */ |
| 1258 | |
| 1259 | bool no_blending = |
| 1260 | !ctx->blend->has_blend_shader && |
| 1261 | (ctx->blend->equation.rgb_mode == 0x122) && |
| 1262 | (ctx->blend->equation.alpha_mode == 0x122) && |
| 1263 | (ctx->blend->equation.color_mask == 0xf); |
| 1264 | |
| 1265 | if (require_sfbd) { |
| 1266 | /* When only a single render target platform is used, the blend |
| 1267 | * information is inside the shader meta itself. We |
| 1268 | * additionally need to signal CAN_DISCARD for nontrivial blend |
| 1269 | * modes (so we're able to read back the destination buffer) */ |
| 1270 | |
| 1271 | if (ctx->blend->has_blend_shader) { |
| 1272 | ctx->fragment_shader_core.blend_shader = ctx->blend->blend_shader; |
| 1273 | } else { |
| 1274 | memcpy(&ctx->fragment_shader_core.blend_equation, &ctx->blend->equation, sizeof(ctx->blend->equation)); |
| 1275 | } |
| 1276 | |
| 1277 | if (!no_blending) { |
| 1278 | ctx->fragment_shader_core.unknown2_3 |= MALI_CAN_DISCARD; |
| 1279 | } |
| 1280 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1281 | |
| 1282 | size_t size = sizeof(struct mali_shader_meta) + sizeof(struct mali_blend_meta); |
| 1283 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); |
| 1284 | memcpy(transfer.cpu, &ctx->fragment_shader_core, sizeof(struct mali_shader_meta)); |
| 1285 | |
| 1286 | ctx->payload_tiler.postfix._shader_upper = (transfer.gpu) >> 4; |
| 1287 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1288 | if (!require_sfbd) { |
| 1289 | /* Additional blend descriptor tacked on for jobs using MFBD */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1290 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1291 | unsigned blend_count = 0; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1292 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1293 | if (ctx->blend->has_blend_shader) { |
| 1294 | /* For a blend shader, the bottom nibble corresponds to |
| 1295 | * the number of work registers used, which signals the |
| 1296 | * -existence- of a blend shader */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1297 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1298 | assert(ctx->blend->blend_work_count >= 2); |
| 1299 | blend_count |= MIN2(ctx->blend->blend_work_count, 3); |
| 1300 | } else { |
| 1301 | /* Otherwise, the bottom bit simply specifies if |
| 1302 | * blending (anything other than REPLACE) is enabled */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1303 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1304 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1305 | if (!no_blending) |
| 1306 | blend_count |= 0x1; |
| 1307 | } |
| 1308 | |
| 1309 | /* Second blend equation is always a simple replace */ |
| 1310 | |
| 1311 | uint64_t replace_magic = 0xf0122122; |
| 1312 | struct mali_blend_equation replace_mode; |
| 1313 | memcpy(&replace_mode, &replace_magic, sizeof(replace_mode)); |
| 1314 | |
| 1315 | struct mali_blend_meta blend_meta[] = { |
| 1316 | { |
| 1317 | .unk1 = 0x200 | blend_count, |
| 1318 | .blend_equation_1 = ctx->blend->equation, |
| 1319 | .blend_equation_2 = replace_mode |
| 1320 | }, |
| 1321 | }; |
| 1322 | |
| 1323 | if (ctx->blend->has_blend_shader) |
| 1324 | memcpy(&blend_meta[0].blend_equation_1, &ctx->blend->blend_shader, sizeof(ctx->blend->blend_shader)); |
| 1325 | |
| 1326 | memcpy(transfer.cpu + sizeof(struct mali_shader_meta), blend_meta, sizeof(blend_meta)); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1327 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1328 | } |
| 1329 | |
| 1330 | if (ctx->dirty & PAN_DIRTY_VERTEX) { |
| 1331 | ctx->payload_vertex.postfix.attribute_meta = ctx->vertex->descriptor_ptr; |
| 1332 | } |
| 1333 | |
| 1334 | if (ctx->dirty & PAN_DIRTY_SAMPLERS) { |
| 1335 | /* Upload samplers back to back, no padding */ |
| 1336 | |
| 1337 | for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { |
| 1338 | if (!ctx->sampler_count[t]) continue; |
| 1339 | |
| 1340 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, sizeof(struct mali_sampler_descriptor) * ctx->sampler_count[t]); |
| 1341 | struct mali_sampler_descriptor *desc = (struct mali_sampler_descriptor *) transfer.cpu; |
| 1342 | |
| 1343 | for (int i = 0; i < ctx->sampler_count[t]; ++i) { |
| 1344 | desc[i] = ctx->samplers[t][i]->hw; |
| 1345 | } |
| 1346 | |
| 1347 | if (t == PIPE_SHADER_FRAGMENT) |
| 1348 | ctx->payload_tiler.postfix.sampler_descriptor = transfer.gpu; |
| 1349 | else if (t == PIPE_SHADER_VERTEX) |
| 1350 | ctx->payload_vertex.postfix.sampler_descriptor = transfer.gpu; |
| 1351 | else |
| 1352 | assert(0); |
| 1353 | } |
| 1354 | } |
| 1355 | |
| 1356 | if (ctx->dirty & PAN_DIRTY_TEXTURES) { |
| 1357 | for (int t = 0; t <= PIPE_SHADER_FRAGMENT; ++t) { |
| 1358 | /* Shortcircuit */ |
| 1359 | if (!ctx->sampler_view_count[t]) continue; |
| 1360 | |
| 1361 | uint64_t trampolines[PIPE_MAX_SHADER_SAMPLER_VIEWS]; |
| 1362 | |
| 1363 | for (int i = 0; i < ctx->sampler_view_count[t]; ++i) { |
| 1364 | if (!ctx->sampler_views[t][i]) |
| 1365 | continue; |
| 1366 | |
| 1367 | struct pipe_resource *tex_rsrc = ctx->sampler_views[t][i]->base.texture; |
| 1368 | struct panfrost_resource *rsrc = (struct panfrost_resource *) tex_rsrc; |
| 1369 | |
| 1370 | /* Inject the address in. */ |
| 1371 | for (int l = 0; l < (tex_rsrc->last_level + 1); ++l) |
| 1372 | ctx->sampler_views[t][i]->hw.swizzled_bitmaps[l] = rsrc->bo->gpu[l]; |
| 1373 | |
| 1374 | /* Workaround maybe-errata (?) with non-mipmaps */ |
| 1375 | int s = ctx->sampler_views[t][i]->hw.nr_mipmap_levels; |
| 1376 | |
| 1377 | if (!rsrc->bo->is_mipmap) { |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1378 | if (is_t6xx) { |
| 1379 | /* HW ERRATA, not needed after t6XX */ |
| 1380 | ctx->sampler_views[t][i]->hw.swizzled_bitmaps[1] = rsrc->bo->gpu[0]; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1381 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1382 | ctx->sampler_views[t][i]->hw.unknown3A = 1; |
| 1383 | } |
| 1384 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1385 | ctx->sampler_views[t][i]->hw.nr_mipmap_levels = 0; |
| 1386 | } |
| 1387 | |
| 1388 | trampolines[i] = panfrost_upload_transient(ctx, &ctx->sampler_views[t][i]->hw, sizeof(struct mali_texture_descriptor)); |
| 1389 | |
| 1390 | /* Restore */ |
| 1391 | ctx->sampler_views[t][i]->hw.nr_mipmap_levels = s; |
| 1392 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1393 | if (is_t6xx) { |
| 1394 | ctx->sampler_views[t][i]->hw.unknown3A = 0; |
| 1395 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1396 | } |
| 1397 | |
| 1398 | mali_ptr trampoline = panfrost_upload_transient(ctx, trampolines, sizeof(uint64_t) * ctx->sampler_view_count[t]); |
| 1399 | |
| 1400 | if (t == PIPE_SHADER_FRAGMENT) |
| 1401 | ctx->payload_tiler.postfix.texture_trampoline = trampoline; |
| 1402 | else if (t == PIPE_SHADER_VERTEX) |
| 1403 | ctx->payload_vertex.postfix.texture_trampoline = trampoline; |
| 1404 | else |
| 1405 | assert(0); |
| 1406 | } |
| 1407 | } |
| 1408 | |
| 1409 | /* Generate the viewport vector of the form: <width/2, height/2, centerx, centery> */ |
| 1410 | const struct pipe_viewport_state *vp = &ctx->pipe_viewport; |
| 1411 | |
| 1412 | float viewport_vec4[] = { |
| 1413 | vp->scale[0], |
| 1414 | fabsf(vp->scale[1]), |
| 1415 | |
| 1416 | vp->translate[0], |
| 1417 | /* -1.0 * vp->translate[1] */ fabs(1.0 * vp->scale[1]) /* XXX */ |
| 1418 | }; |
| 1419 | |
| 1420 | for (int i = 0; i < PIPE_SHADER_TYPES; ++i) { |
| 1421 | struct panfrost_constant_buffer *buf = &ctx->constant_buffer[i]; |
| 1422 | |
| 1423 | if (i == PIPE_SHADER_VERTEX || i == PIPE_SHADER_FRAGMENT) { |
| 1424 | /* It doesn't matter if we don't use all the memory; |
| 1425 | * we'd need a dummy UBO anyway. Compute the max */ |
| 1426 | |
| 1427 | size_t size = sizeof(viewport_vec4) + buf->size; |
| 1428 | struct panfrost_transfer transfer = panfrost_allocate_transient(ctx, size); |
| 1429 | |
| 1430 | /* Keep track how much we've uploaded */ |
| 1431 | off_t offset = 0; |
| 1432 | |
| 1433 | if (i == PIPE_SHADER_VERTEX) { |
| 1434 | /* Upload viewport */ |
| 1435 | memcpy(transfer.cpu + offset, viewport_vec4, sizeof(viewport_vec4)); |
| 1436 | offset += sizeof(viewport_vec4); |
| 1437 | } |
| 1438 | |
| 1439 | /* Upload uniforms */ |
| 1440 | memcpy(transfer.cpu + offset, buf->buffer, buf->size); |
| 1441 | |
| 1442 | int uniform_count = 0; |
| 1443 | |
| 1444 | struct mali_vertex_tiler_postfix *postfix; |
| 1445 | |
| 1446 | switch (i) { |
| 1447 | case PIPE_SHADER_VERTEX: |
| 1448 | uniform_count = ctx->vs->variants[ctx->vs->active_variant].uniform_count; |
| 1449 | postfix = &ctx->payload_vertex.postfix; |
| 1450 | break; |
| 1451 | |
| 1452 | case PIPE_SHADER_FRAGMENT: |
| 1453 | uniform_count = ctx->fs->variants[ctx->fs->active_variant].uniform_count; |
| 1454 | postfix = &ctx->payload_tiler.postfix; |
| 1455 | break; |
| 1456 | |
| 1457 | default: |
| 1458 | printf("Unknown shader stage %d in uniform upload\n", i); |
| 1459 | assert(0); |
| 1460 | } |
| 1461 | |
| 1462 | /* Also attach the same buffer as a UBO for extended access */ |
| 1463 | |
| 1464 | struct mali_uniform_buffer_meta uniform_buffers[] = { |
| 1465 | { |
| 1466 | .size = MALI_POSITIVE((2 + uniform_count)), |
| 1467 | .ptr = transfer.gpu >> 2, |
| 1468 | }, |
| 1469 | }; |
| 1470 | |
| 1471 | mali_ptr ubufs = panfrost_upload_transient(ctx, uniform_buffers, sizeof(uniform_buffers)); |
| 1472 | postfix->uniforms = transfer.gpu; |
| 1473 | postfix->uniform_buffers = ubufs; |
| 1474 | |
| 1475 | buf->dirty = 0; |
| 1476 | } |
| 1477 | } |
| 1478 | |
| 1479 | ctx->dirty = 0; |
| 1480 | } |
| 1481 | |
| 1482 | /* Corresponds to exactly one draw, but does not submit anything */ |
| 1483 | |
| 1484 | static void |
| 1485 | panfrost_queue_draw(struct panfrost_context *ctx) |
| 1486 | { |
| 1487 | /* TODO: Expand the array? */ |
| 1488 | if (ctx->draw_count >= MAX_DRAW_CALLS) { |
| 1489 | printf("Job buffer overflow, ignoring draw\n"); |
| 1490 | assert(0); |
| 1491 | } |
| 1492 | |
| 1493 | /* Handle dirty flags now */ |
| 1494 | panfrost_emit_for_draw(ctx, true); |
| 1495 | |
| 1496 | struct panfrost_transfer vertex = panfrost_vertex_tiler_job(ctx, false, false); |
| 1497 | struct panfrost_transfer tiler = panfrost_vertex_tiler_job(ctx, true, false); |
| 1498 | |
| 1499 | ctx->u_vertex_jobs[ctx->vertex_job_count] = (struct mali_job_descriptor_header *) vertex.cpu; |
| 1500 | ctx->vertex_jobs[ctx->vertex_job_count++] = vertex.gpu; |
| 1501 | |
| 1502 | ctx->u_tiler_jobs[ctx->tiler_job_count] = (struct mali_job_descriptor_header *) tiler.cpu; |
| 1503 | ctx->tiler_jobs[ctx->tiler_job_count++] = tiler.gpu; |
| 1504 | |
| 1505 | ctx->draw_count++; |
| 1506 | } |
| 1507 | |
| 1508 | /* At the end of the frame, the vertex and tiler jobs are linked together and |
| 1509 | * then the fragment job is plonked at the end. Set value job is first for |
| 1510 | * unknown reasons. */ |
| 1511 | |
| 1512 | static void |
| 1513 | panfrost_link_job_pair(struct mali_job_descriptor_header *first, mali_ptr next) |
| 1514 | { |
| 1515 | if (first->job_descriptor_size) |
| 1516 | first->next_job_64 = (u64) (uintptr_t) next; |
| 1517 | else |
| 1518 | first->next_job_32 = (u32) (uintptr_t) next; |
| 1519 | } |
| 1520 | |
| 1521 | static void |
| 1522 | panfrost_link_jobs(struct panfrost_context *ctx) |
| 1523 | { |
| 1524 | if (ctx->draw_count) { |
| 1525 | /* Generate the set_value_job */ |
| 1526 | panfrost_set_value_job(ctx); |
| 1527 | |
| 1528 | /* Have the first vertex job depend on the set value job */ |
| 1529 | ctx->u_vertex_jobs[0]->job_dependency_index_1 = ctx->u_set_value_job->job_index; |
| 1530 | |
| 1531 | /* SV -> V */ |
| 1532 | panfrost_link_job_pair(ctx->u_set_value_job, ctx->vertex_jobs[0]); |
| 1533 | } |
| 1534 | |
| 1535 | /* V -> V/T ; T -> T/null */ |
| 1536 | for (int i = 0; i < ctx->vertex_job_count; ++i) { |
| 1537 | bool isLast = (i + 1) == ctx->vertex_job_count; |
| 1538 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1539 | panfrost_link_job_pair(ctx->u_vertex_jobs[i], isLast ? ctx->tiler_jobs[0] : ctx->vertex_jobs[i + 1]); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1540 | } |
| 1541 | |
| 1542 | /* T -> T/null */ |
| 1543 | for (int i = 0; i < ctx->tiler_job_count; ++i) { |
| 1544 | bool isLast = (i + 1) == ctx->tiler_job_count; |
| 1545 | panfrost_link_job_pair(ctx->u_tiler_jobs[i], isLast ? 0 : ctx->tiler_jobs[i + 1]); |
| 1546 | } |
| 1547 | } |
| 1548 | |
| 1549 | /* The entire frame is in memory -- send it off to the kernel! */ |
| 1550 | |
| 1551 | static void |
| 1552 | panfrost_submit_frame(struct panfrost_context *ctx, bool flush_immediate) |
| 1553 | { |
| 1554 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 1555 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 1556 | |
| 1557 | /* Edge case if screen is cleared and nothing else */ |
| 1558 | bool has_draws = ctx->draw_count > 0; |
| 1559 | |
| 1560 | /* Workaround a bizarre lockup (a hardware errata?) */ |
| 1561 | if (!has_draws) |
| 1562 | flush_immediate = true; |
| 1563 | |
| 1564 | /* A number of jobs are batched -- this must be linked and cleared */ |
| 1565 | panfrost_link_jobs(ctx); |
| 1566 | |
| 1567 | ctx->draw_count = 0; |
| 1568 | ctx->vertex_job_count = 0; |
| 1569 | ctx->tiler_job_count = 0; |
| 1570 | |
| 1571 | #ifndef DRY_RUN |
| 1572 | |
Alyssa Rosenzweig | d43ec10 | 2019-02-05 05:13:50 +0000 | [diff] [blame] | 1573 | bool is_scanout = panfrost_is_scanout(ctx); |
| 1574 | int fragment_id = screen->driver->submit_vs_fs_job(ctx, has_draws, is_scanout); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1575 | |
| 1576 | /* If visual, we can stall a frame */ |
| 1577 | |
| 1578 | if (panfrost_is_scanout(ctx) && !flush_immediate) |
| 1579 | screen->driver->force_flush_fragment(ctx); |
| 1580 | |
| 1581 | screen->last_fragment_id = fragment_id; |
| 1582 | screen->last_fragment_flushed = false; |
| 1583 | |
| 1584 | /* If readback, flush now (hurts the pipelined performance) */ |
| 1585 | if (panfrost_is_scanout(ctx) && flush_immediate) |
| 1586 | screen->driver->force_flush_fragment(ctx); |
| 1587 | |
Alyssa Rosenzweig | 4c82abb | 2019-02-25 03:31:29 +0000 | [diff] [blame^] | 1588 | if (screen->driver->dump_counters && pan_counters_base) { |
Alyssa Rosenzweig | 4a4726a | 2019-02-18 23:32:05 +0000 | [diff] [blame] | 1589 | screen->driver->dump_counters(screen); |
| 1590 | |
| 1591 | char filename[128]; |
Alyssa Rosenzweig | 4c82abb | 2019-02-25 03:31:29 +0000 | [diff] [blame^] | 1592 | snprintf(filename, sizeof(filename), "%s/frame%d.mdgprf", pan_counters_base, ++performance_counter_number); |
Alyssa Rosenzweig | 4a4726a | 2019-02-18 23:32:05 +0000 | [diff] [blame] | 1593 | FILE *fp = fopen(filename, "wb"); |
| 1594 | fwrite(screen->perf_counters.cpu, 4096, sizeof(uint32_t), fp); |
| 1595 | fclose(fp); |
| 1596 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1597 | |
| 1598 | #endif |
| 1599 | } |
| 1600 | |
| 1601 | bool dont_scanout = false; |
| 1602 | |
| 1603 | void |
| 1604 | panfrost_flush( |
| 1605 | struct pipe_context *pipe, |
| 1606 | struct pipe_fence_handle **fence, |
| 1607 | unsigned flags) |
| 1608 | { |
| 1609 | struct panfrost_context *ctx = pan_context(pipe); |
| 1610 | |
| 1611 | /* If there is nothing drawn, skip the frame */ |
| 1612 | if (!ctx->draw_count && !ctx->frame_cleared) return; |
| 1613 | |
| 1614 | if (!ctx->frame_cleared) { |
| 1615 | /* While there are draws, there was no clear. This is a partial |
| 1616 | * update, which needs to be handled via the "wallpaper" |
| 1617 | * method. We also need to fake a clear, just to get the |
| 1618 | * FRAGMENT job correct. */ |
| 1619 | |
| 1620 | panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil); |
| 1621 | |
| 1622 | panfrost_draw_wallpaper(pipe); |
| 1623 | } |
| 1624 | |
| 1625 | /* Frame clear handled, reset */ |
| 1626 | ctx->frame_cleared = false; |
| 1627 | |
| 1628 | /* Whether to stall the pipeline for immediately correct results */ |
| 1629 | bool flush_immediate = flags & PIPE_FLUSH_END_OF_FRAME; |
| 1630 | |
| 1631 | /* Submit the frame itself */ |
| 1632 | panfrost_submit_frame(ctx, flush_immediate); |
| 1633 | |
| 1634 | /* Prepare for the next frame */ |
| 1635 | panfrost_invalidate_frame(ctx); |
| 1636 | } |
| 1637 | |
| 1638 | #define DEFINE_CASE(c) case PIPE_PRIM_##c: return MALI_##c; |
| 1639 | |
| 1640 | static int |
| 1641 | g2m_draw_mode(enum pipe_prim_type mode) |
| 1642 | { |
| 1643 | switch (mode) { |
| 1644 | DEFINE_CASE(POINTS); |
| 1645 | DEFINE_CASE(LINES); |
| 1646 | DEFINE_CASE(LINE_LOOP); |
| 1647 | DEFINE_CASE(LINE_STRIP); |
| 1648 | DEFINE_CASE(TRIANGLES); |
| 1649 | DEFINE_CASE(TRIANGLE_STRIP); |
| 1650 | DEFINE_CASE(TRIANGLE_FAN); |
| 1651 | DEFINE_CASE(QUADS); |
| 1652 | DEFINE_CASE(QUAD_STRIP); |
| 1653 | DEFINE_CASE(POLYGON); |
| 1654 | |
| 1655 | default: |
| 1656 | printf("Illegal draw mode %d\n", mode); |
| 1657 | assert(0); |
| 1658 | return MALI_LINE_LOOP; |
| 1659 | } |
| 1660 | } |
| 1661 | |
| 1662 | #undef DEFINE_CASE |
| 1663 | |
| 1664 | static unsigned |
| 1665 | panfrost_translate_index_size(unsigned size) |
| 1666 | { |
| 1667 | switch (size) { |
| 1668 | case 1: |
| 1669 | return MALI_DRAW_INDEXED_UINT8; |
| 1670 | |
| 1671 | case 2: |
| 1672 | return MALI_DRAW_INDEXED_UINT16; |
| 1673 | |
| 1674 | case 4: |
| 1675 | return MALI_DRAW_INDEXED_UINT32; |
| 1676 | |
| 1677 | default: |
| 1678 | printf("Unknown index size %d\n", size); |
| 1679 | assert(0); |
| 1680 | return 0; |
| 1681 | } |
| 1682 | } |
| 1683 | |
| 1684 | static const uint8_t * |
| 1685 | panfrost_get_index_buffer_raw(const struct pipe_draw_info *info) |
| 1686 | { |
| 1687 | if (info->has_user_indices) { |
| 1688 | return (const uint8_t *) info->index.user; |
| 1689 | } else { |
| 1690 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); |
| 1691 | return (const uint8_t *) rsrc->bo->cpu[0]; |
| 1692 | } |
| 1693 | } |
| 1694 | |
| 1695 | /* Gets a GPU address for the associated index buffer. Only gauranteed to be |
| 1696 | * good for the duration of the draw (transient), could last longer */ |
| 1697 | |
| 1698 | static mali_ptr |
| 1699 | panfrost_get_index_buffer_mapped(struct panfrost_context *ctx, const struct pipe_draw_info *info) |
| 1700 | { |
| 1701 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (info->index.resource); |
| 1702 | |
| 1703 | off_t offset = info->start * info->index_size; |
| 1704 | |
| 1705 | if (!info->has_user_indices) { |
| 1706 | /* Only resources can be directly mapped */ |
| 1707 | return rsrc->bo->gpu[0] + offset; |
| 1708 | } else { |
| 1709 | /* Otherwise, we need to upload to transient memory */ |
| 1710 | const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info); |
| 1711 | return panfrost_upload_transient(ctx, ibuf8 + offset, info->count * info->index_size); |
| 1712 | } |
| 1713 | } |
| 1714 | |
| 1715 | static void |
| 1716 | panfrost_draw_vbo( |
| 1717 | struct pipe_context *pipe, |
| 1718 | const struct pipe_draw_info *info); |
| 1719 | |
| 1720 | #define CALCULATE_MIN_MAX_INDEX(T, buffer, start, count) \ |
| 1721 | for (unsigned _idx = (start); _idx < (start + count); ++_idx) { \ |
| 1722 | T idx = buffer[_idx]; \ |
| 1723 | if (idx > max_index) max_index = idx; \ |
| 1724 | if (idx < min_index) min_index = idx; \ |
| 1725 | } |
| 1726 | |
| 1727 | static void |
| 1728 | panfrost_draw_vbo( |
| 1729 | struct pipe_context *pipe, |
| 1730 | const struct pipe_draw_info *info) |
| 1731 | { |
| 1732 | struct panfrost_context *ctx = pan_context(pipe); |
| 1733 | |
| 1734 | ctx->payload_vertex.draw_start = info->start; |
| 1735 | ctx->payload_tiler.draw_start = info->start; |
| 1736 | |
| 1737 | int mode = info->mode; |
| 1738 | |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 1739 | /* Fallback for unsupported modes */ |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1740 | |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 1741 | if (!(ctx->draw_modes & mode)) { |
| 1742 | if (mode == PIPE_PRIM_QUADS && info->count == 4 && ctx->rasterizer && !ctx->rasterizer->base.flatshade) { |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1743 | mode = PIPE_PRIM_TRIANGLE_FAN; |
| 1744 | } else { |
| 1745 | if (info->count < 4) { |
| 1746 | /* Degenerate case? */ |
| 1747 | return; |
| 1748 | } |
| 1749 | |
| 1750 | util_primconvert_save_rasterizer_state(ctx->primconvert, &ctx->rasterizer->base); |
| 1751 | util_primconvert_draw_vbo(ctx->primconvert, info); |
| 1752 | return; |
| 1753 | } |
| 1754 | } |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1755 | |
| 1756 | ctx->payload_tiler.prefix.draw_mode = g2m_draw_mode(mode); |
| 1757 | |
| 1758 | ctx->vertex_count = info->count; |
| 1759 | |
| 1760 | /* For non-indexed draws, they're the same */ |
| 1761 | unsigned invocation_count = ctx->vertex_count; |
| 1762 | |
| 1763 | /* For higher amounts of vertices (greater than what fits in a 16-bit |
| 1764 | * short), the other value is needed, otherwise there will be bizarre |
| 1765 | * rendering artefacts. It's not clear what these values mean yet. */ |
| 1766 | |
| 1767 | ctx->payload_tiler.prefix.unknown_draw &= ~(0x3000 | 0x18000); |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 1768 | ctx->payload_tiler.prefix.unknown_draw |= (mode == PIPE_PRIM_POINTS || ctx->vertex_count > 65535) ? 0x3000 : 0x18000; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1769 | |
| 1770 | if (info->index_size) { |
| 1771 | /* Calculate the min/max index used so we can figure out how |
| 1772 | * many times to invoke the vertex shader */ |
| 1773 | |
| 1774 | const uint8_t *ibuf8 = panfrost_get_index_buffer_raw(info); |
| 1775 | |
| 1776 | int min_index = INT_MAX; |
| 1777 | int max_index = 0; |
| 1778 | |
| 1779 | if (info->index_size == 1) { |
| 1780 | CALCULATE_MIN_MAX_INDEX(uint8_t, ibuf8, info->start, info->count); |
| 1781 | } else if (info->index_size == 2) { |
| 1782 | const uint16_t *ibuf16 = (const uint16_t *) ibuf8; |
| 1783 | CALCULATE_MIN_MAX_INDEX(uint16_t, ibuf16, info->start, info->count); |
| 1784 | } else if (info->index_size == 4) { |
| 1785 | const uint32_t *ibuf32 = (const uint32_t *) ibuf8; |
| 1786 | CALCULATE_MIN_MAX_INDEX(uint32_t, ibuf32, info->start, info->count); |
| 1787 | } else { |
| 1788 | assert(0); |
| 1789 | } |
| 1790 | |
| 1791 | /* Make sure we didn't go crazy */ |
| 1792 | assert(min_index < INT_MAX); |
| 1793 | assert(max_index > 0); |
| 1794 | assert(max_index > min_index); |
| 1795 | |
| 1796 | /* Use the corresponding values */ |
| 1797 | invocation_count = max_index - min_index + 1; |
| 1798 | ctx->payload_vertex.draw_start = min_index; |
| 1799 | ctx->payload_tiler.draw_start = min_index; |
| 1800 | |
| 1801 | ctx->payload_tiler.prefix.negative_start = -min_index; |
| 1802 | ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(info->count); |
| 1803 | |
| 1804 | //assert(!info->restart_index); /* TODO: Research */ |
| 1805 | assert(!info->index_bias); |
| 1806 | //assert(!info->min_index); /* TODO: Use value */ |
| 1807 | |
| 1808 | ctx->payload_tiler.prefix.unknown_draw |= panfrost_translate_index_size(info->index_size); |
| 1809 | ctx->payload_tiler.prefix.indices = panfrost_get_index_buffer_mapped(ctx, info); |
| 1810 | } else { |
| 1811 | /* Index count == vertex count, if no indexing is applied, as |
| 1812 | * if it is internally indexed in the expected order */ |
| 1813 | |
| 1814 | ctx->payload_tiler.prefix.negative_start = 0; |
| 1815 | ctx->payload_tiler.prefix.index_count = MALI_POSITIVE(ctx->vertex_count); |
| 1816 | |
| 1817 | /* Reverse index state */ |
| 1818 | ctx->payload_tiler.prefix.unknown_draw &= ~MALI_DRAW_INDEXED_UINT32; |
| 1819 | ctx->payload_tiler.prefix.indices = (uintptr_t) NULL; |
| 1820 | } |
| 1821 | |
| 1822 | ctx->payload_vertex.prefix.invocation_count = MALI_POSITIVE(invocation_count); |
| 1823 | ctx->payload_tiler.prefix.invocation_count = MALI_POSITIVE(invocation_count); |
| 1824 | |
| 1825 | /* Fire off the draw itself */ |
| 1826 | panfrost_queue_draw(ctx); |
| 1827 | } |
| 1828 | |
| 1829 | /* CSO state */ |
| 1830 | |
| 1831 | static void |
| 1832 | panfrost_generic_cso_delete(struct pipe_context *pctx, void *hwcso) |
| 1833 | { |
| 1834 | free(hwcso); |
| 1835 | } |
| 1836 | |
| 1837 | static void |
| 1838 | panfrost_set_scissor(struct panfrost_context *ctx) |
| 1839 | { |
| 1840 | const struct pipe_scissor_state *ss = &ctx->scissor; |
| 1841 | |
| 1842 | if (ss && ctx->rasterizer && ctx->rasterizer->base.scissor && 0) { |
| 1843 | ctx->viewport->viewport0[0] = ss->minx; |
| 1844 | ctx->viewport->viewport0[1] = ss->miny; |
| 1845 | ctx->viewport->viewport1[0] = MALI_POSITIVE(ss->maxx); |
| 1846 | ctx->viewport->viewport1[1] = MALI_POSITIVE(ss->maxy); |
| 1847 | } else { |
| 1848 | ctx->viewport->viewport0[0] = 0; |
| 1849 | ctx->viewport->viewport0[1] = 0; |
| 1850 | ctx->viewport->viewport1[0] = MALI_POSITIVE(ctx->pipe_framebuffer.width); |
| 1851 | ctx->viewport->viewport1[1] = MALI_POSITIVE(ctx->pipe_framebuffer.height); |
| 1852 | } |
| 1853 | } |
| 1854 | |
| 1855 | static void * |
| 1856 | panfrost_create_rasterizer_state( |
| 1857 | struct pipe_context *pctx, |
| 1858 | const struct pipe_rasterizer_state *cso) |
| 1859 | { |
| 1860 | struct panfrost_rasterizer *so = CALLOC_STRUCT(panfrost_rasterizer); |
| 1861 | |
| 1862 | so->base = *cso; |
| 1863 | |
| 1864 | /* Bitmask, unknown meaning of the start value */ |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 1865 | so->tiler_gl_enables = is_t6xx ? 0x105 : 0x7; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1866 | |
| 1867 | so->tiler_gl_enables |= MALI_FRONT_FACE( |
| 1868 | cso->front_ccw ? MALI_CCW : MALI_CW); |
| 1869 | |
| 1870 | if (cso->cull_face & PIPE_FACE_FRONT) |
| 1871 | so->tiler_gl_enables |= MALI_CULL_FACE_FRONT; |
| 1872 | |
| 1873 | if (cso->cull_face & PIPE_FACE_BACK) |
| 1874 | so->tiler_gl_enables |= MALI_CULL_FACE_BACK; |
| 1875 | |
| 1876 | return so; |
| 1877 | } |
| 1878 | |
| 1879 | static void |
| 1880 | panfrost_bind_rasterizer_state( |
| 1881 | struct pipe_context *pctx, |
| 1882 | void *hwcso) |
| 1883 | { |
| 1884 | struct panfrost_context *ctx = pan_context(pctx); |
| 1885 | struct pipe_rasterizer_state *cso = hwcso; |
| 1886 | |
| 1887 | /* TODO: Why can't rasterizer be NULL ever? Other drivers are fine.. */ |
| 1888 | if (!hwcso) |
| 1889 | return; |
| 1890 | |
| 1891 | /* If scissor test has changed, we'll need to update that now */ |
| 1892 | bool update_scissor = !ctx->rasterizer || ctx->rasterizer->base.scissor != cso->scissor; |
| 1893 | |
| 1894 | ctx->rasterizer = hwcso; |
| 1895 | |
| 1896 | /* Actualise late changes */ |
| 1897 | if (update_scissor) |
| 1898 | panfrost_set_scissor(ctx); |
| 1899 | |
| 1900 | ctx->dirty |= PAN_DIRTY_RASTERIZER; |
| 1901 | } |
| 1902 | |
| 1903 | static void * |
| 1904 | panfrost_create_vertex_elements_state( |
| 1905 | struct pipe_context *pctx, |
| 1906 | unsigned num_elements, |
| 1907 | const struct pipe_vertex_element *elements) |
| 1908 | { |
| 1909 | struct panfrost_context *ctx = pan_context(pctx); |
| 1910 | struct panfrost_vertex_state *so = CALLOC_STRUCT(panfrost_vertex_state); |
| 1911 | |
| 1912 | so->num_elements = num_elements; |
| 1913 | memcpy(so->pipe, elements, sizeof(*elements) * num_elements); |
| 1914 | |
| 1915 | struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_attr_meta) * num_elements, HEAP_DESCRIPTOR); |
| 1916 | so->hw = (struct mali_attr_meta *) transfer.cpu; |
| 1917 | so->descriptor_ptr = transfer.gpu; |
| 1918 | |
| 1919 | /* Allocate memory for the descriptor state */ |
| 1920 | |
| 1921 | for (int i = 0; i < num_elements; ++i) { |
| 1922 | so->hw[i].index = elements[i].vertex_buffer_index; |
| 1923 | |
| 1924 | enum pipe_format fmt = elements[i].src_format; |
| 1925 | const struct util_format_description *desc = util_format_description(fmt); |
| 1926 | so->hw[i].unknown1 = 0x2; |
| 1927 | so->hw[i].swizzle = panfrost_get_default_swizzle(desc->nr_channels); |
| 1928 | |
| 1929 | so->hw[i].format = panfrost_find_format(desc); |
| 1930 | |
| 1931 | /* The field itself should probably be shifted over */ |
| 1932 | so->hw[i].src_offset = elements[i].src_offset; |
| 1933 | } |
| 1934 | |
| 1935 | return so; |
| 1936 | } |
| 1937 | |
| 1938 | static void |
| 1939 | panfrost_bind_vertex_elements_state( |
| 1940 | struct pipe_context *pctx, |
| 1941 | void *hwcso) |
| 1942 | { |
| 1943 | struct panfrost_context *ctx = pan_context(pctx); |
| 1944 | |
| 1945 | ctx->vertex = hwcso; |
| 1946 | ctx->dirty |= PAN_DIRTY_VERTEX; |
| 1947 | } |
| 1948 | |
| 1949 | static void |
| 1950 | panfrost_delete_vertex_elements_state(struct pipe_context *pctx, void *hwcso) |
| 1951 | { |
Alyssa Rosenzweig | acc52ff | 2019-02-14 04:00:19 +0000 | [diff] [blame] | 1952 | struct panfrost_vertex_state *so = (struct panfrost_vertex_state *) hwcso; |
| 1953 | unsigned bytes = sizeof(struct mali_attr_meta) * so->num_elements; |
| 1954 | printf("Vertex elements delete leaks descriptor (%d bytes)\n", bytes); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1955 | free(hwcso); |
| 1956 | } |
| 1957 | |
| 1958 | static void * |
| 1959 | panfrost_create_shader_state( |
| 1960 | struct pipe_context *pctx, |
| 1961 | const struct pipe_shader_state *cso) |
| 1962 | { |
| 1963 | struct panfrost_shader_variants *so = CALLOC_STRUCT(panfrost_shader_variants); |
| 1964 | so->base = *cso; |
| 1965 | |
| 1966 | /* Token deep copy to prevent memory corruption */ |
| 1967 | |
| 1968 | if (cso->type == PIPE_SHADER_IR_TGSI) |
| 1969 | so->base.tokens = tgsi_dup_tokens(so->base.tokens); |
| 1970 | |
| 1971 | return so; |
| 1972 | } |
| 1973 | |
| 1974 | static void |
| 1975 | panfrost_delete_shader_state( |
| 1976 | struct pipe_context *pctx, |
| 1977 | void *so) |
| 1978 | { |
Alyssa Rosenzweig | acc52ff | 2019-02-14 04:00:19 +0000 | [diff] [blame] | 1979 | struct panfrost_shader_variants *cso = (struct panfrost_shader_variants *) so; |
| 1980 | |
| 1981 | if (cso->base.type == PIPE_SHADER_IR_TGSI) { |
| 1982 | printf("Deleting TGSI shader leaks duplicated tokens\n"); |
| 1983 | } |
| 1984 | |
| 1985 | unsigned leak = cso->variant_count * sizeof(struct mali_shader_meta); |
| 1986 | printf("Deleting shader state leaks descriptors (%d bytes), and shader bytecode\n", leak); |
| 1987 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 1988 | free(so); |
| 1989 | } |
| 1990 | |
| 1991 | static void * |
| 1992 | panfrost_create_sampler_state( |
| 1993 | struct pipe_context *pctx, |
| 1994 | const struct pipe_sampler_state *cso) |
| 1995 | { |
| 1996 | struct panfrost_sampler_state *so = CALLOC_STRUCT(panfrost_sampler_state); |
| 1997 | so->base = *cso; |
| 1998 | |
| 1999 | /* sampler_state corresponds to mali_sampler_descriptor, which we can generate entirely here */ |
| 2000 | |
| 2001 | struct mali_sampler_descriptor sampler_descriptor = { |
| 2002 | .filter_mode = MALI_TEX_MIN(translate_tex_filter(cso->min_img_filter)) |
| 2003 | | MALI_TEX_MAG(translate_tex_filter(cso->mag_img_filter)) |
| 2004 | | translate_mip_filter(cso->min_mip_filter) |
| 2005 | | 0x20, |
| 2006 | |
| 2007 | .wrap_s = translate_tex_wrap(cso->wrap_s), |
| 2008 | .wrap_t = translate_tex_wrap(cso->wrap_t), |
| 2009 | .wrap_r = translate_tex_wrap(cso->wrap_r), |
| 2010 | .compare_func = panfrost_translate_alt_compare_func(cso->compare_func), |
| 2011 | .border_color = { |
| 2012 | cso->border_color.f[0], |
| 2013 | cso->border_color.f[1], |
| 2014 | cso->border_color.f[2], |
| 2015 | cso->border_color.f[3] |
| 2016 | }, |
| 2017 | .min_lod = FIXED_16(0.0), |
| 2018 | .max_lod = FIXED_16(31.0), |
| 2019 | .unknown2 = 1, |
| 2020 | }; |
| 2021 | |
| 2022 | so->hw = sampler_descriptor; |
| 2023 | |
| 2024 | return so; |
| 2025 | } |
| 2026 | |
| 2027 | static void |
| 2028 | panfrost_bind_sampler_states( |
| 2029 | struct pipe_context *pctx, |
| 2030 | enum pipe_shader_type shader, |
| 2031 | unsigned start_slot, unsigned num_sampler, |
| 2032 | void **sampler) |
| 2033 | { |
| 2034 | assert(start_slot == 0); |
| 2035 | |
| 2036 | struct panfrost_context *ctx = pan_context(pctx); |
| 2037 | |
| 2038 | /* XXX: Should upload, not just copy? */ |
| 2039 | ctx->sampler_count[shader] = num_sampler; |
| 2040 | memcpy(ctx->samplers[shader], sampler, num_sampler * sizeof (void *)); |
| 2041 | |
| 2042 | ctx->dirty |= PAN_DIRTY_SAMPLERS; |
| 2043 | } |
| 2044 | |
| 2045 | static bool |
| 2046 | panfrost_variant_matches(struct panfrost_context *ctx, struct panfrost_shader_state *variant) |
| 2047 | { |
| 2048 | struct pipe_alpha_state *alpha = &ctx->depth_stencil->alpha; |
| 2049 | |
| 2050 | if (alpha->enabled || variant->alpha_state.enabled) { |
| 2051 | /* Make sure enable state is at least the same */ |
| 2052 | if (alpha->enabled != variant->alpha_state.enabled) { |
| 2053 | return false; |
| 2054 | } |
| 2055 | |
| 2056 | /* Check that the contents of the test are the same */ |
| 2057 | bool same_func = alpha->func == variant->alpha_state.func; |
| 2058 | bool same_ref = alpha->ref_value == variant->alpha_state.ref_value; |
| 2059 | |
| 2060 | if (!(same_func && same_ref)) { |
| 2061 | return false; |
| 2062 | } |
| 2063 | } |
| 2064 | /* Otherwise, we're good to go */ |
| 2065 | return true; |
| 2066 | } |
| 2067 | |
| 2068 | static void |
| 2069 | panfrost_bind_fs_state( |
| 2070 | struct pipe_context *pctx, |
| 2071 | void *hwcso) |
| 2072 | { |
| 2073 | struct panfrost_context *ctx = pan_context(pctx); |
| 2074 | |
| 2075 | ctx->fs = hwcso; |
| 2076 | |
| 2077 | if (hwcso) { |
| 2078 | /* Match the appropriate variant */ |
| 2079 | |
| 2080 | signed variant = -1; |
| 2081 | |
| 2082 | struct panfrost_shader_variants *variants = (struct panfrost_shader_variants *) hwcso; |
| 2083 | |
| 2084 | for (unsigned i = 0; i < variants->variant_count; ++i) { |
| 2085 | if (panfrost_variant_matches(ctx, &variants->variants[i])) { |
| 2086 | variant = i; |
| 2087 | break; |
| 2088 | } |
| 2089 | } |
| 2090 | |
| 2091 | if (variant == -1) { |
| 2092 | /* No variant matched, so create a new one */ |
| 2093 | variant = variants->variant_count++; |
| 2094 | assert(variants->variant_count < MAX_SHADER_VARIANTS); |
| 2095 | |
| 2096 | variants->variants[variant].base = hwcso; |
| 2097 | variants->variants[variant].alpha_state = ctx->depth_stencil->alpha; |
| 2098 | |
| 2099 | /* Allocate the mapped descriptor ahead-of-time. TODO: Use for FS as well as VS */ |
| 2100 | struct panfrost_context *ctx = pan_context(pctx); |
| 2101 | struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); |
| 2102 | |
| 2103 | variants->variants[variant].tripipe = (struct mali_shader_meta *) transfer.cpu; |
| 2104 | variants->variants[variant].tripipe_gpu = transfer.gpu; |
| 2105 | |
| 2106 | } |
| 2107 | |
| 2108 | /* Select this variant */ |
| 2109 | variants->active_variant = variant; |
| 2110 | |
| 2111 | struct panfrost_shader_state *shader_state = &variants->variants[variant]; |
| 2112 | assert(panfrost_variant_matches(ctx, shader_state)); |
| 2113 | |
| 2114 | /* Now we have a variant selected, so compile and go */ |
| 2115 | |
| 2116 | if (!shader_state->compiled) { |
| 2117 | panfrost_shader_compile(ctx, shader_state->tripipe, NULL, JOB_TYPE_TILER, shader_state); |
| 2118 | shader_state->compiled = true; |
| 2119 | } |
| 2120 | } |
| 2121 | |
| 2122 | ctx->dirty |= PAN_DIRTY_FS; |
| 2123 | } |
| 2124 | |
| 2125 | static void |
| 2126 | panfrost_bind_vs_state( |
| 2127 | struct pipe_context *pctx, |
| 2128 | void *hwcso) |
| 2129 | { |
| 2130 | struct panfrost_context *ctx = pan_context(pctx); |
| 2131 | |
| 2132 | ctx->vs = hwcso; |
| 2133 | |
| 2134 | if (hwcso) { |
| 2135 | if (!ctx->vs->variants[0].compiled) { |
| 2136 | ctx->vs->variants[0].base = hwcso; |
| 2137 | |
| 2138 | /* TODO DRY from above */ |
| 2139 | struct panfrost_transfer transfer = panfrost_allocate_chunk(ctx, sizeof(struct mali_shader_meta), HEAP_DESCRIPTOR); |
| 2140 | ctx->vs->variants[0].tripipe = (struct mali_shader_meta *) transfer.cpu; |
| 2141 | ctx->vs->variants[0].tripipe_gpu = transfer.gpu; |
| 2142 | |
| 2143 | panfrost_shader_compile(ctx, ctx->vs->variants[0].tripipe, NULL, JOB_TYPE_VERTEX, &ctx->vs->variants[0]); |
| 2144 | ctx->vs->variants[0].compiled = true; |
| 2145 | } |
| 2146 | } |
| 2147 | |
| 2148 | ctx->dirty |= PAN_DIRTY_VS; |
| 2149 | } |
| 2150 | |
| 2151 | static void |
| 2152 | panfrost_set_vertex_buffers( |
| 2153 | struct pipe_context *pctx, |
| 2154 | unsigned start_slot, |
| 2155 | unsigned num_buffers, |
| 2156 | const struct pipe_vertex_buffer *buffers) |
| 2157 | { |
| 2158 | struct panfrost_context *ctx = pan_context(pctx); |
| 2159 | assert(num_buffers <= PIPE_MAX_ATTRIBS); |
| 2160 | |
| 2161 | /* XXX: Dirty tracking? etc */ |
| 2162 | if (buffers) { |
| 2163 | size_t sz = sizeof(buffers[0]) * num_buffers; |
| 2164 | ctx->vertex_buffers = malloc(sz); |
| 2165 | ctx->vertex_buffer_count = num_buffers; |
| 2166 | memcpy(ctx->vertex_buffers, buffers, sz); |
| 2167 | } else { |
| 2168 | if (ctx->vertex_buffers) { |
| 2169 | free(ctx->vertex_buffers); |
| 2170 | ctx->vertex_buffers = NULL; |
| 2171 | } |
| 2172 | |
| 2173 | ctx->vertex_buffer_count = 0; |
| 2174 | } |
| 2175 | } |
| 2176 | |
| 2177 | static void |
| 2178 | panfrost_set_constant_buffer( |
| 2179 | struct pipe_context *pctx, |
| 2180 | enum pipe_shader_type shader, uint index, |
| 2181 | const struct pipe_constant_buffer *buf) |
| 2182 | { |
| 2183 | struct panfrost_context *ctx = pan_context(pctx); |
| 2184 | struct panfrost_constant_buffer *pbuf = &ctx->constant_buffer[shader]; |
| 2185 | |
| 2186 | size_t sz = buf ? buf->buffer_size : 0; |
| 2187 | |
| 2188 | /* Free previous buffer */ |
| 2189 | |
| 2190 | pbuf->dirty = true; |
| 2191 | pbuf->size = sz; |
| 2192 | |
| 2193 | if (pbuf->buffer) { |
| 2194 | free(pbuf->buffer); |
| 2195 | pbuf->buffer = NULL; |
| 2196 | } |
| 2197 | |
| 2198 | /* If unbinding, we're done */ |
| 2199 | |
| 2200 | if (!buf) |
| 2201 | return; |
| 2202 | |
| 2203 | /* Multiple constant buffers not yet supported */ |
| 2204 | assert(index == 0); |
| 2205 | |
| 2206 | const uint8_t *cpu; |
| 2207 | |
| 2208 | struct panfrost_resource *rsrc = (struct panfrost_resource *) (buf->buffer); |
| 2209 | |
| 2210 | if (rsrc) { |
| 2211 | cpu = rsrc->bo->cpu[0]; |
| 2212 | } else if (buf->user_buffer) { |
| 2213 | cpu = buf->user_buffer; |
| 2214 | } else { |
| 2215 | printf("No constant buffer?\n"); |
| 2216 | return; |
| 2217 | } |
| 2218 | |
| 2219 | /* Copy the constant buffer into the driver context for later upload */ |
| 2220 | |
| 2221 | pbuf->buffer = malloc(sz); |
| 2222 | memcpy(pbuf->buffer, cpu + buf->buffer_offset, sz); |
| 2223 | } |
| 2224 | |
| 2225 | static void |
| 2226 | panfrost_set_stencil_ref( |
| 2227 | struct pipe_context *pctx, |
| 2228 | const struct pipe_stencil_ref *ref) |
| 2229 | { |
| 2230 | struct panfrost_context *ctx = pan_context(pctx); |
| 2231 | ctx->stencil_ref = *ref; |
| 2232 | |
| 2233 | /* Shader core dirty */ |
| 2234 | ctx->dirty |= PAN_DIRTY_FS; |
| 2235 | } |
| 2236 | |
| 2237 | static struct pipe_sampler_view * |
| 2238 | panfrost_create_sampler_view( |
| 2239 | struct pipe_context *pctx, |
| 2240 | struct pipe_resource *texture, |
| 2241 | const struct pipe_sampler_view *template) |
| 2242 | { |
| 2243 | struct panfrost_sampler_view *so = CALLOC_STRUCT(panfrost_sampler_view); |
| 2244 | int bytes_per_pixel = util_format_get_blocksize(texture->format); |
| 2245 | |
| 2246 | pipe_reference(NULL, &texture->reference); |
| 2247 | |
| 2248 | struct panfrost_resource *prsrc = (struct panfrost_resource *) texture; |
| 2249 | |
| 2250 | so->base = *template; |
| 2251 | so->base.texture = texture; |
| 2252 | so->base.reference.count = 1; |
| 2253 | so->base.context = pctx; |
| 2254 | |
| 2255 | /* sampler_views correspond to texture descriptors, minus the texture |
| 2256 | * (data) itself. So, we serialise the descriptor here and cache it for |
| 2257 | * later. */ |
| 2258 | |
| 2259 | /* TODO: Other types of textures */ |
| 2260 | assert(template->target == PIPE_TEXTURE_2D); |
| 2261 | |
| 2262 | /* Make sure it's something with which we're familiar */ |
| 2263 | assert(bytes_per_pixel >= 1 && bytes_per_pixel <= 4); |
| 2264 | |
| 2265 | /* TODO: Detect from format better */ |
| 2266 | const struct util_format_description *desc = util_format_description(prsrc->base.format); |
| 2267 | |
| 2268 | unsigned char user_swizzle[4] = { |
| 2269 | template->swizzle_r, |
| 2270 | template->swizzle_g, |
| 2271 | template->swizzle_b, |
| 2272 | template->swizzle_a |
| 2273 | }; |
| 2274 | |
| 2275 | enum mali_format format = panfrost_find_format(desc); |
| 2276 | |
| 2277 | struct mali_texture_descriptor texture_descriptor = { |
| 2278 | .width = MALI_POSITIVE(texture->width0), |
| 2279 | .height = MALI_POSITIVE(texture->height0), |
| 2280 | .depth = MALI_POSITIVE(texture->depth0), |
| 2281 | |
| 2282 | /* TODO: Decode */ |
| 2283 | .format = { |
| 2284 | .swizzle = panfrost_translate_swizzle_4(desc->swizzle), |
| 2285 | .format = format, |
| 2286 | |
| 2287 | .usage1 = 0x0, |
| 2288 | .is_not_cubemap = 1, |
| 2289 | |
| 2290 | /* 0x11 - regular texture 2d, uncompressed tiled */ |
| 2291 | /* 0x12 - regular texture 2d, uncompressed linear */ |
| 2292 | /* 0x1c - AFBC compressed (internally tiled, probably) texture 2D */ |
| 2293 | |
| 2294 | .usage2 = prsrc->bo->has_afbc ? 0x1c : (prsrc->bo->tiled ? 0x11 : 0x12), |
| 2295 | }, |
| 2296 | |
| 2297 | .swizzle = panfrost_translate_swizzle_4(user_swizzle) |
| 2298 | }; |
| 2299 | |
| 2300 | /* TODO: Other base levels require adjusting dimensions / level numbers / etc */ |
| 2301 | assert (template->u.tex.first_level == 0); |
| 2302 | |
| 2303 | texture_descriptor.nr_mipmap_levels = template->u.tex.last_level - template->u.tex.first_level; |
| 2304 | |
| 2305 | so->hw = texture_descriptor; |
| 2306 | |
| 2307 | return (struct pipe_sampler_view *) so; |
| 2308 | } |
| 2309 | |
| 2310 | static void |
| 2311 | panfrost_set_sampler_views( |
| 2312 | struct pipe_context *pctx, |
| 2313 | enum pipe_shader_type shader, |
| 2314 | unsigned start_slot, unsigned num_views, |
| 2315 | struct pipe_sampler_view **views) |
| 2316 | { |
| 2317 | struct panfrost_context *ctx = pan_context(pctx); |
| 2318 | |
| 2319 | assert(start_slot == 0); |
| 2320 | |
| 2321 | ctx->sampler_view_count[shader] = num_views; |
| 2322 | memcpy(ctx->sampler_views[shader], views, num_views * sizeof (void *)); |
| 2323 | |
| 2324 | ctx->dirty |= PAN_DIRTY_TEXTURES; |
| 2325 | } |
| 2326 | |
| 2327 | static void |
| 2328 | panfrost_sampler_view_destroy( |
| 2329 | struct pipe_context *pctx, |
| 2330 | struct pipe_sampler_view *views) |
| 2331 | { |
| 2332 | //struct panfrost_context *ctx = pan_context(pctx); |
| 2333 | |
| 2334 | /* TODO */ |
| 2335 | |
| 2336 | free(views); |
| 2337 | } |
| 2338 | |
| 2339 | static void |
| 2340 | panfrost_set_framebuffer_state(struct pipe_context *pctx, |
| 2341 | const struct pipe_framebuffer_state *fb) |
| 2342 | { |
| 2343 | struct panfrost_context *ctx = pan_context(pctx); |
| 2344 | |
| 2345 | /* Flush when switching away from an FBO */ |
| 2346 | |
| 2347 | if (!panfrost_is_scanout(ctx)) { |
| 2348 | panfrost_flush(pctx, NULL, 0); |
| 2349 | } |
| 2350 | |
| 2351 | ctx->pipe_framebuffer.nr_cbufs = fb->nr_cbufs; |
| 2352 | ctx->pipe_framebuffer.samples = fb->samples; |
| 2353 | ctx->pipe_framebuffer.layers = fb->layers; |
Alyssa Rosenzweig | c70ed4c | 2019-02-15 07:43:43 +0000 | [diff] [blame] | 2354 | ctx->pipe_framebuffer.width = fb->width; |
| 2355 | ctx->pipe_framebuffer.height = fb->height; |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2356 | |
| 2357 | for (int i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { |
| 2358 | struct pipe_surface *cb = i < fb->nr_cbufs ? fb->cbufs[i] : NULL; |
| 2359 | |
| 2360 | /* check if changing cbuf */ |
| 2361 | if (ctx->pipe_framebuffer.cbufs[i] == cb) continue; |
| 2362 | |
| 2363 | if (cb && (i != 0)) { |
| 2364 | printf("XXX: Multiple render targets not supported before t7xx!\n"); |
| 2365 | assert(0); |
| 2366 | } |
| 2367 | |
| 2368 | /* assign new */ |
| 2369 | pipe_surface_reference(&ctx->pipe_framebuffer.cbufs[i], cb); |
| 2370 | |
| 2371 | if (!cb) |
| 2372 | continue; |
| 2373 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 2374 | if (require_sfbd) |
| 2375 | ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); |
| 2376 | else |
| 2377 | ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); |
| 2378 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2379 | panfrost_attach_vt_framebuffer(ctx); |
| 2380 | panfrost_new_frag_framebuffer(ctx); |
| 2381 | panfrost_set_scissor(ctx); |
| 2382 | |
| 2383 | struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.cbufs[i]->texture); |
| 2384 | bool is_scanout = panfrost_is_scanout(ctx); |
| 2385 | |
| 2386 | if (!is_scanout && !tex->bo->has_afbc) { |
| 2387 | /* The blob is aggressive about enabling AFBC. As such, |
| 2388 | * it's pretty much necessary to use it here, since we |
| 2389 | * have no traces of non-compressed FBO. */ |
| 2390 | |
| 2391 | panfrost_enable_afbc(ctx, tex, false); |
| 2392 | } |
| 2393 | |
| 2394 | if (!is_scanout && !tex->bo->has_checksum) { |
| 2395 | /* Enable transaction elimination if we can */ |
| 2396 | panfrost_enable_checksum(ctx, tex); |
| 2397 | } |
| 2398 | } |
| 2399 | |
| 2400 | { |
| 2401 | struct pipe_surface *zb = fb->zsbuf; |
| 2402 | |
| 2403 | if (ctx->pipe_framebuffer.zsbuf != zb) { |
| 2404 | pipe_surface_reference(&ctx->pipe_framebuffer.zsbuf, zb); |
| 2405 | |
| 2406 | if (zb) { |
| 2407 | /* FBO has depth */ |
| 2408 | |
Alyssa Rosenzweig | 97aa054 | 2019-02-10 20:06:21 +0000 | [diff] [blame] | 2409 | if (require_sfbd) |
| 2410 | ctx->vt_framebuffer_sfbd = panfrost_emit_sfbd(ctx); |
| 2411 | else |
| 2412 | ctx->vt_framebuffer_mfbd = panfrost_emit_mfbd(ctx); |
| 2413 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2414 | panfrost_attach_vt_framebuffer(ctx); |
| 2415 | panfrost_new_frag_framebuffer(ctx); |
| 2416 | panfrost_set_scissor(ctx); |
| 2417 | |
| 2418 | struct panfrost_resource *tex = ((struct panfrost_resource *) ctx->pipe_framebuffer.zsbuf->texture); |
| 2419 | |
| 2420 | if (!tex->bo->has_afbc && !panfrost_is_scanout(ctx)) |
| 2421 | panfrost_enable_afbc(ctx, tex, true); |
| 2422 | } |
| 2423 | } |
| 2424 | } |
| 2425 | |
| 2426 | /* Force a clear XXX wrong? */ |
| 2427 | if (ctx->last_clear.color) |
| 2428 | panfrost_clear(&ctx->base, ctx->last_clear.buffers, ctx->last_clear.color, ctx->last_clear.depth, ctx->last_clear.stencil); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2429 | } |
| 2430 | |
| 2431 | static void * |
| 2432 | panfrost_create_blend_state(struct pipe_context *pipe, |
| 2433 | const struct pipe_blend_state *blend) |
| 2434 | { |
| 2435 | struct panfrost_context *ctx = pan_context(pipe); |
| 2436 | struct panfrost_blend_state *so = CALLOC_STRUCT(panfrost_blend_state); |
| 2437 | so->base = *blend; |
| 2438 | |
| 2439 | /* TODO: The following features are not yet implemented */ |
| 2440 | assert(!blend->logicop_enable); |
| 2441 | assert(!blend->alpha_to_coverage); |
| 2442 | assert(!blend->alpha_to_one); |
| 2443 | |
| 2444 | /* Compile the blend state, first as fixed-function if we can */ |
| 2445 | |
| 2446 | if (panfrost_make_fixed_blend_mode(&blend->rt[0], &so->equation, blend->rt[0].colormask, &ctx->blend_color)) |
| 2447 | return so; |
| 2448 | |
| 2449 | /* If we can't, compile a blend shader instead */ |
| 2450 | |
| 2451 | panfrost_make_blend_shader(ctx, so, &ctx->blend_color); |
| 2452 | |
| 2453 | return so; |
| 2454 | } |
| 2455 | |
| 2456 | static void |
| 2457 | panfrost_bind_blend_state(struct pipe_context *pipe, |
| 2458 | void *cso) |
| 2459 | { |
| 2460 | struct panfrost_context *ctx = pan_context(pipe); |
| 2461 | struct pipe_blend_state *blend = (struct pipe_blend_state *) cso; |
| 2462 | struct panfrost_blend_state *pblend = (struct panfrost_blend_state *) cso; |
| 2463 | ctx->blend = pblend; |
| 2464 | |
| 2465 | if (!blend) |
| 2466 | return; |
| 2467 | |
| 2468 | SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_NO_DITHER, !blend->dither); |
| 2469 | |
| 2470 | /* TODO: Attach color */ |
| 2471 | |
| 2472 | /* Shader itself is not dirty, but the shader core is */ |
| 2473 | ctx->dirty |= PAN_DIRTY_FS; |
| 2474 | } |
| 2475 | |
| 2476 | static void |
| 2477 | panfrost_delete_blend_state(struct pipe_context *pipe, |
| 2478 | void *blend) |
| 2479 | { |
Alyssa Rosenzweig | acc52ff | 2019-02-14 04:00:19 +0000 | [diff] [blame] | 2480 | struct panfrost_blend_state *so = (struct panfrost_blend_state *) blend; |
| 2481 | |
| 2482 | if (so->has_blend_shader) { |
| 2483 | printf("Deleting blend state leak blend shaders bytecode\n"); |
| 2484 | } |
| 2485 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2486 | free(blend); |
| 2487 | } |
| 2488 | |
| 2489 | static void |
| 2490 | panfrost_set_blend_color(struct pipe_context *pipe, |
| 2491 | const struct pipe_blend_color *blend_color) |
| 2492 | { |
| 2493 | struct panfrost_context *ctx = pan_context(pipe); |
| 2494 | |
| 2495 | /* If blend_color is we're unbinding, so ctx->blend_color is now undefined -> nothing to do */ |
| 2496 | |
| 2497 | if (blend_color) { |
| 2498 | ctx->blend_color = *blend_color; |
| 2499 | |
| 2500 | /* The blend mode depends on the blend constant color, due to the |
| 2501 | * fixed/programmable split. So, we're forced to regenerate the blend |
| 2502 | * equation */ |
| 2503 | |
| 2504 | /* TODO: Attach color */ |
| 2505 | } |
| 2506 | } |
| 2507 | |
| 2508 | static void * |
| 2509 | panfrost_create_depth_stencil_state(struct pipe_context *pipe, |
| 2510 | const struct pipe_depth_stencil_alpha_state *depth_stencil) |
| 2511 | { |
| 2512 | return mem_dup(depth_stencil, sizeof(*depth_stencil)); |
| 2513 | } |
| 2514 | |
| 2515 | static void |
| 2516 | panfrost_bind_depth_stencil_state(struct pipe_context *pipe, |
| 2517 | void *cso) |
| 2518 | { |
| 2519 | struct panfrost_context *ctx = pan_context(pipe); |
| 2520 | struct pipe_depth_stencil_alpha_state *depth_stencil = cso; |
| 2521 | ctx->depth_stencil = depth_stencil; |
| 2522 | |
| 2523 | if (!depth_stencil) |
| 2524 | return; |
| 2525 | |
| 2526 | /* Alpha does not exist in the hardware (it's not in ES3), so it's |
| 2527 | * emulated in the fragment shader */ |
| 2528 | |
| 2529 | if (depth_stencil->alpha.enabled) { |
| 2530 | /* We need to trigger a new shader (maybe) */ |
| 2531 | ctx->base.bind_fs_state(&ctx->base, ctx->fs); |
| 2532 | } |
| 2533 | |
| 2534 | /* Stencil state */ |
| 2535 | SET_BIT(ctx->fragment_shader_core.unknown2_4, MALI_STENCIL_TEST, depth_stencil->stencil[0].enabled); /* XXX: which one? */ |
| 2536 | |
| 2537 | panfrost_make_stencil_state(&depth_stencil->stencil[0], &ctx->fragment_shader_core.stencil_front); |
| 2538 | ctx->fragment_shader_core.stencil_mask_front = depth_stencil->stencil[0].writemask; |
| 2539 | |
| 2540 | panfrost_make_stencil_state(&depth_stencil->stencil[1], &ctx->fragment_shader_core.stencil_back); |
| 2541 | ctx->fragment_shader_core.stencil_mask_back = depth_stencil->stencil[1].writemask; |
| 2542 | |
| 2543 | /* Depth state (TODO: Refactor) */ |
| 2544 | SET_BIT(ctx->fragment_shader_core.unknown2_3, MALI_DEPTH_TEST, depth_stencil->depth.enabled); |
| 2545 | |
| 2546 | int func = depth_stencil->depth.enabled ? depth_stencil->depth.func : PIPE_FUNC_ALWAYS; |
| 2547 | |
| 2548 | ctx->fragment_shader_core.unknown2_3 &= ~MALI_DEPTH_FUNC_MASK; |
| 2549 | ctx->fragment_shader_core.unknown2_3 |= MALI_DEPTH_FUNC(panfrost_translate_compare_func(func)); |
| 2550 | |
| 2551 | /* Bounds test not implemented */ |
| 2552 | assert(!depth_stencil->depth.bounds_test); |
| 2553 | |
| 2554 | ctx->dirty |= PAN_DIRTY_FS; |
| 2555 | } |
| 2556 | |
| 2557 | static void |
| 2558 | panfrost_delete_depth_stencil_state(struct pipe_context *pipe, void *depth) |
| 2559 | { |
| 2560 | free( depth ); |
| 2561 | } |
| 2562 | |
| 2563 | static void |
| 2564 | panfrost_set_sample_mask(struct pipe_context *pipe, |
| 2565 | unsigned sample_mask) |
| 2566 | { |
| 2567 | } |
| 2568 | |
| 2569 | static void |
| 2570 | panfrost_set_clip_state(struct pipe_context *pipe, |
| 2571 | const struct pipe_clip_state *clip) |
| 2572 | { |
| 2573 | //struct panfrost_context *panfrost = pan_context(pipe); |
| 2574 | } |
| 2575 | |
| 2576 | static void |
| 2577 | panfrost_set_viewport_states(struct pipe_context *pipe, |
| 2578 | unsigned start_slot, |
| 2579 | unsigned num_viewports, |
| 2580 | const struct pipe_viewport_state *viewports) |
| 2581 | { |
| 2582 | struct panfrost_context *ctx = pan_context(pipe); |
| 2583 | |
| 2584 | assert(start_slot == 0); |
| 2585 | assert(num_viewports == 1); |
| 2586 | |
| 2587 | ctx->pipe_viewport = *viewports; |
| 2588 | |
| 2589 | #if 0 |
| 2590 | /* TODO: What if not centered? */ |
| 2591 | float w = abs(viewports->scale[0]) * 2.0; |
| 2592 | float h = abs(viewports->scale[1]) * 2.0; |
| 2593 | |
| 2594 | ctx->viewport.viewport1[0] = MALI_POSITIVE((int) w); |
| 2595 | ctx->viewport.viewport1[1] = MALI_POSITIVE((int) h); |
| 2596 | #endif |
| 2597 | } |
| 2598 | |
| 2599 | static void |
| 2600 | panfrost_set_scissor_states(struct pipe_context *pipe, |
| 2601 | unsigned start_slot, |
| 2602 | unsigned num_scissors, |
| 2603 | const struct pipe_scissor_state *scissors) |
| 2604 | { |
| 2605 | struct panfrost_context *ctx = pan_context(pipe); |
| 2606 | |
| 2607 | assert(start_slot == 0); |
| 2608 | assert(num_scissors == 1); |
| 2609 | |
| 2610 | ctx->scissor = *scissors; |
| 2611 | |
| 2612 | panfrost_set_scissor(ctx); |
| 2613 | } |
| 2614 | |
| 2615 | static void |
| 2616 | panfrost_set_polygon_stipple(struct pipe_context *pipe, |
| 2617 | const struct pipe_poly_stipple *stipple) |
| 2618 | { |
| 2619 | //struct panfrost_context *panfrost = pan_context(pipe); |
| 2620 | } |
| 2621 | |
| 2622 | static void |
| 2623 | panfrost_set_active_query_state(struct pipe_context *pipe, |
| 2624 | boolean enable) |
| 2625 | { |
| 2626 | //struct panfrost_context *panfrost = pan_context(pipe); |
| 2627 | } |
| 2628 | |
| 2629 | static void |
| 2630 | panfrost_destroy(struct pipe_context *pipe) |
| 2631 | { |
| 2632 | struct panfrost_context *panfrost = pan_context(pipe); |
| 2633 | |
| 2634 | if (panfrost->blitter) |
| 2635 | util_blitter_destroy(panfrost->blitter); |
| 2636 | } |
| 2637 | |
| 2638 | static struct pipe_query * |
| 2639 | panfrost_create_query(struct pipe_context *pipe, |
| 2640 | unsigned type, |
| 2641 | unsigned index) |
| 2642 | { |
| 2643 | struct panfrost_query *q = CALLOC_STRUCT(panfrost_query); |
| 2644 | |
| 2645 | q->type = type; |
| 2646 | q->index = index; |
| 2647 | |
| 2648 | return (struct pipe_query *) q; |
| 2649 | } |
| 2650 | |
| 2651 | static void |
| 2652 | panfrost_destroy_query(struct pipe_context *pipe, struct pipe_query *q) |
| 2653 | { |
| 2654 | FREE(q); |
| 2655 | } |
| 2656 | |
| 2657 | static boolean |
| 2658 | panfrost_begin_query(struct pipe_context *pipe, struct pipe_query *q) |
| 2659 | { |
| 2660 | struct panfrost_context *ctx = pan_context(pipe); |
| 2661 | struct panfrost_query *query = (struct panfrost_query *) q; |
| 2662 | |
| 2663 | switch (query->type) { |
Alyssa Rosenzweig | 5155bcf | 2019-02-14 02:50:30 +0000 | [diff] [blame] | 2664 | case PIPE_QUERY_OCCLUSION_COUNTER: |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2665 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
| 2666 | case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: |
| 2667 | { |
| 2668 | /* Allocate a word for the query results to be stored */ |
| 2669 | query->transfer = panfrost_allocate_chunk(ctx, sizeof(unsigned), HEAP_DESCRIPTOR); |
| 2670 | |
| 2671 | ctx->occlusion_query = query; |
| 2672 | |
| 2673 | break; |
| 2674 | } |
| 2675 | |
| 2676 | default: |
| 2677 | fprintf(stderr, "Skipping query %d\n", query->type); |
| 2678 | break; |
| 2679 | } |
| 2680 | |
| 2681 | return true; |
| 2682 | } |
| 2683 | |
| 2684 | static bool |
| 2685 | panfrost_end_query(struct pipe_context *pipe, struct pipe_query *q) |
| 2686 | { |
| 2687 | struct panfrost_context *ctx = pan_context(pipe); |
| 2688 | ctx->occlusion_query = NULL; |
| 2689 | return true; |
| 2690 | } |
| 2691 | |
| 2692 | static boolean |
| 2693 | panfrost_get_query_result(struct pipe_context *pipe, |
| 2694 | struct pipe_query *q, |
| 2695 | boolean wait, |
| 2696 | union pipe_query_result *vresult) |
| 2697 | { |
| 2698 | /* STUB */ |
| 2699 | struct panfrost_query *query = (struct panfrost_query *) q; |
| 2700 | |
| 2701 | /* We need to flush out the jobs to actually run the counter, TODO |
| 2702 | * check wait, TODO wallpaper after if needed */ |
| 2703 | |
| 2704 | panfrost_flush(pipe, NULL, PIPE_FLUSH_END_OF_FRAME); |
| 2705 | |
| 2706 | switch (query->type) { |
Alyssa Rosenzweig | 5155bcf | 2019-02-14 02:50:30 +0000 | [diff] [blame] | 2707 | case PIPE_QUERY_OCCLUSION_COUNTER: |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2708 | case PIPE_QUERY_OCCLUSION_PREDICATE: |
| 2709 | case PIPE_QUERY_OCCLUSION_PREDICATE_CONSERVATIVE: { |
| 2710 | /* Read back the query results */ |
| 2711 | unsigned *result = (unsigned *) query->transfer.cpu; |
| 2712 | unsigned passed = *result; |
| 2713 | |
Alyssa Rosenzweig | 5155bcf | 2019-02-14 02:50:30 +0000 | [diff] [blame] | 2714 | if (query->type == PIPE_QUERY_OCCLUSION_COUNTER) { |
| 2715 | vresult->u64 = passed; |
| 2716 | } else { |
| 2717 | vresult->b = !!passed; |
| 2718 | } |
| 2719 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2720 | break; |
| 2721 | } |
| 2722 | default: |
| 2723 | fprintf(stderr, "Skipped query get %d\n", query->type); |
| 2724 | break; |
| 2725 | } |
| 2726 | |
| 2727 | return true; |
| 2728 | } |
| 2729 | |
| 2730 | static void |
| 2731 | panfrost_setup_hardware(struct panfrost_context *ctx) |
| 2732 | { |
| 2733 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 2734 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 2735 | |
| 2736 | for (int i = 0; i < ARRAY_SIZE(ctx->transient_pools); ++i) { |
| 2737 | /* Allocate the beginning of the transient pool */ |
| 2738 | int entry_size = (1 << 22); /* 4MB */ |
| 2739 | |
| 2740 | ctx->transient_pools[i].entry_size = entry_size; |
| 2741 | ctx->transient_pools[i].entry_count = 1; |
| 2742 | |
| 2743 | ctx->transient_pools[i].entries[0] = (struct panfrost_memory_entry *) pb_slab_alloc(&screen->slabs, entry_size, HEAP_TRANSIENT); |
| 2744 | } |
| 2745 | |
| 2746 | screen->driver->allocate_slab(screen, &ctx->scratchpad, 64, false, 0, 0, 0); |
Alyssa Rosenzweig | cdca103 | 2019-02-25 02:32:45 +0000 | [diff] [blame] | 2747 | screen->driver->allocate_slab(screen, &ctx->varying_mem, 16384, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_COHERENT_LOCAL, 0, 0); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2748 | screen->driver->allocate_slab(screen, &ctx->shaders, 4096, true, PAN_ALLOCATE_EXECUTE, 0, 0); |
Alyssa Rosenzweig | f44d465 | 2019-02-25 02:31:09 +0000 | [diff] [blame] | 2749 | screen->driver->allocate_slab(screen, &ctx->tiler_heap, 32768, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); |
| 2750 | screen->driver->allocate_slab(screen, &ctx->misc_0, 128*128, false, PAN_ALLOCATE_INVISIBLE | PAN_ALLOCATE_GROWABLE, 1, 128); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2751 | |
| 2752 | } |
| 2753 | |
| 2754 | /* New context creation, which also does hardware initialisation since I don't |
| 2755 | * know the better way to structure this :smirk: */ |
| 2756 | |
| 2757 | struct pipe_context * |
| 2758 | panfrost_create_context(struct pipe_screen *screen, void *priv, unsigned flags) |
| 2759 | { |
| 2760 | struct panfrost_context *ctx = CALLOC_STRUCT(panfrost_context); |
| 2761 | memset(ctx, 0, sizeof(*ctx)); |
| 2762 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 2763 | |
| 2764 | gallium->screen = screen; |
| 2765 | |
| 2766 | gallium->destroy = panfrost_destroy; |
| 2767 | |
| 2768 | gallium->set_framebuffer_state = panfrost_set_framebuffer_state; |
| 2769 | |
| 2770 | gallium->flush = panfrost_flush; |
| 2771 | gallium->clear = panfrost_clear; |
| 2772 | gallium->draw_vbo = panfrost_draw_vbo; |
| 2773 | |
| 2774 | gallium->set_vertex_buffers = panfrost_set_vertex_buffers; |
| 2775 | gallium->set_constant_buffer = panfrost_set_constant_buffer; |
| 2776 | |
| 2777 | gallium->set_stencil_ref = panfrost_set_stencil_ref; |
| 2778 | |
| 2779 | gallium->create_sampler_view = panfrost_create_sampler_view; |
| 2780 | gallium->set_sampler_views = panfrost_set_sampler_views; |
| 2781 | gallium->sampler_view_destroy = panfrost_sampler_view_destroy; |
| 2782 | |
| 2783 | gallium->create_rasterizer_state = panfrost_create_rasterizer_state; |
| 2784 | gallium->bind_rasterizer_state = panfrost_bind_rasterizer_state; |
| 2785 | gallium->delete_rasterizer_state = panfrost_generic_cso_delete; |
| 2786 | |
| 2787 | gallium->create_vertex_elements_state = panfrost_create_vertex_elements_state; |
| 2788 | gallium->bind_vertex_elements_state = panfrost_bind_vertex_elements_state; |
| 2789 | gallium->delete_vertex_elements_state = panfrost_delete_vertex_elements_state; |
| 2790 | |
| 2791 | gallium->create_fs_state = panfrost_create_shader_state; |
| 2792 | gallium->delete_fs_state = panfrost_delete_shader_state; |
| 2793 | gallium->bind_fs_state = panfrost_bind_fs_state; |
| 2794 | |
| 2795 | gallium->create_vs_state = panfrost_create_shader_state; |
| 2796 | gallium->delete_vs_state = panfrost_delete_shader_state; |
| 2797 | gallium->bind_vs_state = panfrost_bind_vs_state; |
| 2798 | |
| 2799 | gallium->create_sampler_state = panfrost_create_sampler_state; |
| 2800 | gallium->delete_sampler_state = panfrost_generic_cso_delete; |
| 2801 | gallium->bind_sampler_states = panfrost_bind_sampler_states; |
| 2802 | |
| 2803 | gallium->create_blend_state = panfrost_create_blend_state; |
| 2804 | gallium->bind_blend_state = panfrost_bind_blend_state; |
| 2805 | gallium->delete_blend_state = panfrost_delete_blend_state; |
| 2806 | |
| 2807 | gallium->set_blend_color = panfrost_set_blend_color; |
| 2808 | |
| 2809 | gallium->create_depth_stencil_alpha_state = panfrost_create_depth_stencil_state; |
| 2810 | gallium->bind_depth_stencil_alpha_state = panfrost_bind_depth_stencil_state; |
| 2811 | gallium->delete_depth_stencil_alpha_state = panfrost_delete_depth_stencil_state; |
| 2812 | |
| 2813 | gallium->set_sample_mask = panfrost_set_sample_mask; |
| 2814 | |
| 2815 | gallium->set_clip_state = panfrost_set_clip_state; |
| 2816 | gallium->set_viewport_states = panfrost_set_viewport_states; |
| 2817 | gallium->set_scissor_states = panfrost_set_scissor_states; |
| 2818 | gallium->set_polygon_stipple = panfrost_set_polygon_stipple; |
| 2819 | gallium->set_active_query_state = panfrost_set_active_query_state; |
| 2820 | |
| 2821 | gallium->create_query = panfrost_create_query; |
| 2822 | gallium->destroy_query = panfrost_destroy_query; |
| 2823 | gallium->begin_query = panfrost_begin_query; |
| 2824 | gallium->end_query = panfrost_end_query; |
| 2825 | gallium->get_query_result = panfrost_get_query_result; |
| 2826 | |
| 2827 | panfrost_resource_context_init(gallium); |
| 2828 | |
| 2829 | panfrost_setup_hardware(ctx); |
| 2830 | |
| 2831 | /* XXX: leaks */ |
| 2832 | gallium->stream_uploader = u_upload_create_default(gallium); |
| 2833 | gallium->const_uploader = gallium->stream_uploader; |
| 2834 | assert(gallium->stream_uploader); |
| 2835 | |
Alyssa Rosenzweig | 85e2bb5 | 2019-02-08 02:28:12 +0000 | [diff] [blame] | 2836 | /* Midgard supports ES modes, plus QUADS/QUAD_STRIPS/POLYGON */ |
| 2837 | ctx->draw_modes = (1 << (PIPE_PRIM_POLYGON + 1)) - 1; |
| 2838 | |
| 2839 | ctx->primconvert = util_primconvert_create(gallium, ctx->draw_modes); |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2840 | |
| 2841 | ctx->blitter = util_blitter_create(gallium); |
| 2842 | assert(ctx->blitter); |
| 2843 | |
| 2844 | /* Prepare for render! */ |
| 2845 | |
Alyssa Rosenzweig | 7da251f | 2019-02-05 04:32:27 +0000 | [diff] [blame] | 2846 | panfrost_emit_vertex_payload(ctx); |
| 2847 | panfrost_emit_tiler_payload(ctx); |
| 2848 | panfrost_invalidate_frame(ctx); |
| 2849 | panfrost_viewport(ctx, 0.0, 1.0, 0, 0, ctx->pipe_framebuffer.width, ctx->pipe_framebuffer.height); |
| 2850 | panfrost_default_shader_backend(ctx); |
| 2851 | panfrost_generate_space_filler_indices(); |
| 2852 | |
| 2853 | return gallium; |
| 2854 | } |