Alyssa Rosenzweig | d878753 | 2019-06-07 09:39:31 -0700 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2019 Collabora |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 8 | * and/or sell copies of the Software, and to permit persons to whom the |
| 9 | * Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | * SOFTWARE. |
| 22 | * |
| 23 | * Authors: |
| 24 | * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> |
| 25 | */ |
| 26 | |
| 27 | #include "pan_resource.h" |
| 28 | #include "util/u_format.h" |
| 29 | |
| 30 | /* Arm FrameBuffer Compression (AFBC) is a lossless compression scheme natively |
| 31 | * implemented in Mali GPUs (as well as many display controllers paired with |
| 32 | * Mali GPUs, etc). Where possible, Panfrost prefers to use AFBC for both |
| 33 | * rendering and texturing. In most cases, this is a performance-win due to a |
| 34 | * dramatic reduction in memory bandwidth and cache locality compared to a |
| 35 | * linear resources. |
| 36 | * |
| 37 | * AFBC divides the framebuffer into 16x16 tiles (other sizes possible, TODO: |
| 38 | * do we need to support this?). So, the width and height each must be aligned |
| 39 | * up to 16 pixels. This is inherently good for performance; note that for a 4 |
| 40 | * byte-per-pixel format like RGBA8888, that means that rows are 16*4=64 byte |
| 41 | * aligned, which is the cache-line size. |
| 42 | * |
| 43 | * For each AFBC-compressed resource, there is a single contiguous |
| 44 | * (CPU/GPU-shared) buffer. This buffer itself is divided into two parts: |
| 45 | * header and body, placed immediately after each other. |
| 46 | * |
| 47 | * The AFBC header contains 16 bytes of metadata per tile. |
| 48 | * |
| 49 | * The AFBC body is the same size as the original linear resource (padded to |
| 50 | * the nearest tile). Although the body comes immediately after the header, it |
| 51 | * must also be cache-line aligned, so there can sometimes be a bit of padding |
| 52 | * between the header and body. |
| 53 | * |
| 54 | * As an example, a 64x64 RGBA framebuffer contains 64/16 = 4 tiles horizontally and |
| 55 | * 4 tiles vertically. There are 4*4=16 tiles in total, each containing 16 |
| 56 | * bytes of metadata, so there is a 16*16=256 byte header. 64x64 is already |
| 57 | * tile aligned, so the body is 64*64 * 4 bytes per pixel = 16384 bytes of |
| 58 | * body. |
| 59 | * |
| 60 | * From userspace, Panfrost needs to be able to calculate these sizes. It |
| 61 | * explicitly does not and can not know the format of the data contained within |
| 62 | * this header and body. The GPU has native support for AFBC encode/decode. For |
| 63 | * an internal FBO or a framebuffer used for scanout with an AFBC-compatible |
| 64 | * winsys/display-controller, the buffer is maintained AFBC throughout flight, |
| 65 | * and the driver never needs to know the internal data. For edge cases where |
| 66 | * the driver really does need to read/write from the AFBC resource, we |
| 67 | * generate a linear staging buffer and use the GPU to blit AFBC<--->linear. |
| 68 | * TODO: Implement me. */ |
| 69 | |
| 70 | #define AFBC_TILE_WIDTH 16 |
| 71 | #define AFBC_TILE_HEIGHT 16 |
| 72 | #define AFBC_HEADER_BYTES_PER_TILE 16 |
| 73 | #define AFBC_CACHE_ALIGN 64 |
| 74 | |
| 75 | /* Is it possible to AFBC compress a particular format? Common formats (and |
| 76 | * YUV) are compressible. Some obscure formats are not and fallback on linear, |
| 77 | * at a performance hit. Also, if you need to disable AFBC entirely in the |
| 78 | * driver for debug/profiling, just always return false here. */ |
| 79 | |
| 80 | bool |
| 81 | panfrost_format_supports_afbc(enum pipe_format format) |
| 82 | { |
| 83 | const struct util_format_description *desc = |
| 84 | util_format_description(format); |
| 85 | |
| 86 | if (util_format_is_rgba8_variant(desc)) |
| 87 | return true; |
| 88 | |
| 89 | /* TODO: AFBC of other formats */ |
| 90 | /* TODO: AFBC of ZS */ |
| 91 | |
| 92 | return false; |
| 93 | } |
| 94 | |
| 95 | /* AFBC is enabled on a per-resource basis (AFBC enabling is theoretically |
| 96 | * indepdent between color buffers and depth/stencil). To enable, we allocate |
| 97 | * the AFBC metadata buffer and mark that it is enabled. We do -not- actually |
| 98 | * edit the fragment job here. This routine should be called ONCE per |
| 99 | * AFBC-compressed buffer, rather than on every frame. */ |
| 100 | |
| 101 | void |
| 102 | panfrost_enable_afbc(struct panfrost_context *ctx, struct panfrost_resource *rsrc, bool ds) |
| 103 | { |
| 104 | struct pipe_context *gallium = (struct pipe_context *) ctx; |
| 105 | struct panfrost_screen *screen = pan_screen(gallium->screen); |
| 106 | |
| 107 | unsigned width = rsrc->base.width0; |
| 108 | unsigned height = rsrc->base.height0; |
| 109 | unsigned bytes_per_pixel = util_format_get_blocksize(rsrc->base.format); |
| 110 | |
| 111 | /* Align to tile */ |
| 112 | unsigned aligned_width = ALIGN(width, AFBC_TILE_WIDTH); |
| 113 | unsigned aligned_height = ALIGN(height, AFBC_TILE_HEIGHT); |
| 114 | |
| 115 | /* Compute size in tiles, rather than pixels */ |
| 116 | unsigned tile_count_x = aligned_width / AFBC_TILE_WIDTH; |
| 117 | unsigned tile_count_y = aligned_height / AFBC_TILE_HEIGHT; |
| 118 | unsigned tile_count = tile_count_x * tile_count_y; |
| 119 | |
| 120 | unsigned header_bytes = tile_count * AFBC_HEADER_BYTES_PER_TILE; |
| 121 | unsigned header_size = ALIGN(header_bytes, AFBC_CACHE_ALIGN); |
| 122 | |
| 123 | /* The stride is a normal stride, but aligned */ |
| 124 | unsigned unaligned_stride = aligned_width * bytes_per_pixel; |
| 125 | unsigned stride = ALIGN(unaligned_stride, AFBC_CACHE_ALIGN); |
| 126 | |
| 127 | /* Compute the entire buffer size */ |
| 128 | unsigned body_size = stride * aligned_height; |
| 129 | unsigned buffer_size = header_size + body_size; |
| 130 | |
| 131 | /* Allocate the AFBC slab itself, large enough to hold the above */ |
| 132 | screen->driver->allocate_slab(screen, &rsrc->bo->afbc_slab, |
| 133 | ALIGN(buffer_size, 4096) / 4096, |
| 134 | true, 0, 0, 0); |
| 135 | |
| 136 | /* Compressed textured reads use a tagged pointer to the metadata */ |
| 137 | rsrc->bo->layout = PAN_AFBC; |
| 138 | rsrc->bo->gpu = rsrc->bo->afbc_slab.gpu | (ds ? 0 : 1); |
| 139 | rsrc->bo->cpu = rsrc->bo->afbc_slab.cpu; |
| 140 | rsrc->bo->gem_handle = rsrc->bo->afbc_slab.gem_handle; |
| 141 | rsrc->bo->afbc_metadata_size = header_size; |
| 142 | } |