Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 1 | /* |
| 2 | * © Copyright 2017-2018 Alyssa Rosenzweig |
| 3 | * © Copyright 2017-2018 Connor Abbott |
| 4 | * © Copyright 2017-2018 Lyude Paul |
Alyssa Rosenzweig | d4575c3 | 2019-06-25 13:30:17 -0700 | [diff] [blame] | 5 | * © Copyright2019 Collabora, Ltd. |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 6 | * |
| 7 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 8 | * copy of this software and associated documentation files (the "Software"), |
| 9 | * to deal in the Software without restriction, including without limitation |
| 10 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| 11 | * and/or sell copies of the Software, and to permit persons to whom the |
| 12 | * Software is furnished to do so, subject to the following conditions: |
| 13 | * |
| 14 | * The above copyright notice and this permission notice (including the next |
| 15 | * paragraph) shall be included in all copies or substantial portions of the |
| 16 | * Software. |
| 17 | * |
| 18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 20 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| 21 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 22 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 23 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 24 | * SOFTWARE. |
| 25 | * |
| 26 | */ |
| 27 | |
| 28 | #ifndef __PANFROST_JOB_H__ |
| 29 | #define __PANFROST_JOB_H__ |
| 30 | |
| 31 | #include <stdint.h> |
Icecream95 | f2f1277 | 2020-01-09 15:13:58 +1300 | [diff] [blame] | 32 | #include <stdbool.h> |
Alyssa Rosenzweig | 64f3c9d | 2020-08-05 16:05:12 -0400 | [diff] [blame] | 33 | #include <inttypes.h> |
| 34 | |
| 35 | typedef uint8_t u8; |
| 36 | typedef uint16_t u16; |
| 37 | typedef uint32_t u32; |
| 38 | typedef uint64_t u64; |
| 39 | typedef uint64_t mali_ptr; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 40 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 41 | enum mali_nondominant_mode { |
| 42 | MALI_BLEND_NON_MIRROR = 0, |
| 43 | MALI_BLEND_NON_ZERO = 1 |
| 44 | }; |
| 45 | |
| 46 | enum mali_dominant_blend { |
| 47 | MALI_BLEND_DOM_SOURCE = 0, |
| 48 | MALI_BLEND_DOM_DESTINATION = 1 |
| 49 | }; |
| 50 | |
| 51 | enum mali_dominant_factor { |
| 52 | MALI_DOMINANT_UNK0 = 0, |
| 53 | MALI_DOMINANT_ZERO = 1, |
| 54 | MALI_DOMINANT_SRC_COLOR = 2, |
| 55 | MALI_DOMINANT_DST_COLOR = 3, |
| 56 | MALI_DOMINANT_UNK4 = 4, |
| 57 | MALI_DOMINANT_SRC_ALPHA = 5, |
| 58 | MALI_DOMINANT_DST_ALPHA = 6, |
| 59 | MALI_DOMINANT_CONSTANT = 7, |
| 60 | }; |
| 61 | |
| 62 | enum mali_blend_modifier { |
| 63 | MALI_BLEND_MOD_UNK0 = 0, |
| 64 | MALI_BLEND_MOD_NORMAL = 1, |
| 65 | MALI_BLEND_MOD_SOURCE_ONE = 2, |
| 66 | MALI_BLEND_MOD_DEST_ONE = 3, |
| 67 | }; |
| 68 | |
| 69 | struct mali_blend_mode { |
| 70 | enum mali_blend_modifier clip_modifier : 2; |
| 71 | unsigned unused_0 : 1; |
| 72 | unsigned negate_source : 1; |
| 73 | |
| 74 | enum mali_dominant_blend dominant : 1; |
| 75 | |
| 76 | enum mali_nondominant_mode nondominant_mode : 1; |
| 77 | |
| 78 | unsigned unused_1 : 1; |
| 79 | |
| 80 | unsigned negate_dest : 1; |
| 81 | |
| 82 | enum mali_dominant_factor dominant_factor : 3; |
| 83 | unsigned complement_dominant : 1; |
| 84 | } __attribute__((packed)); |
| 85 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 86 | /* Compressed per-pixel formats. Each of these formats expands to one to four |
| 87 | * floating-point or integer numbers, as defined by the OpenGL specification. |
| 88 | * There are various places in OpenGL where the user can specify a compressed |
| 89 | * format in memory, which all use the same 8-bit enum in the various |
| 90 | * descriptors, although different hardware units support different formats. |
| 91 | */ |
| 92 | |
| 93 | /* The top 3 bits specify how the bits of each component are interpreted. */ |
| 94 | |
Icecream95 | 960fe9d | 2020-01-11 20:00:38 +1300 | [diff] [blame] | 95 | /* e.g. ETC2_RGB8 */ |
| 96 | #define MALI_FORMAT_COMPRESSED (0 << 5) |
| 97 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 98 | /* e.g. R11F_G11F_B10F */ |
| 99 | #define MALI_FORMAT_SPECIAL (2 << 5) |
| 100 | |
| 101 | /* signed normalized, e.g. RGBA8_SNORM */ |
| 102 | #define MALI_FORMAT_SNORM (3 << 5) |
| 103 | |
| 104 | /* e.g. RGBA8UI */ |
| 105 | #define MALI_FORMAT_UINT (4 << 5) |
| 106 | |
| 107 | /* e.g. RGBA8 and RGBA32F */ |
| 108 | #define MALI_FORMAT_UNORM (5 << 5) |
| 109 | |
| 110 | /* e.g. RGBA8I and RGBA16F */ |
| 111 | #define MALI_FORMAT_SINT (6 << 5) |
| 112 | |
| 113 | /* These formats seem to largely duplicate the others. They're used at least |
| 114 | * for Bifrost framebuffer output. |
| 115 | */ |
| 116 | #define MALI_FORMAT_SPECIAL2 (7 << 5) |
Alyssa Rosenzweig | 24c3b95 | 2020-06-10 15:35:41 -0400 | [diff] [blame] | 117 | #define MALI_EXTRACT_TYPE(fmt) ((fmt) & 0xe0) |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 118 | |
| 119 | /* If the high 3 bits are 3 to 6 these two bits say how many components |
| 120 | * there are. |
| 121 | */ |
| 122 | #define MALI_NR_CHANNELS(n) ((n - 1) << 3) |
Alyssa Rosenzweig | 8462ca0 | 2020-06-10 15:47:45 -0400 | [diff] [blame] | 123 | #define MALI_EXTRACT_CHANNELS(fmt) ((((fmt) >> 3) & 3) + 1) |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 124 | |
| 125 | /* If the high 3 bits are 3 to 6, then the low 3 bits say how big each |
| 126 | * component is, except the special MALI_CHANNEL_FLOAT which overrides what the |
| 127 | * bits mean. |
| 128 | */ |
| 129 | |
Alyssa Rosenzweig | 60270c8 | 2019-02-24 06:28:39 +0000 | [diff] [blame] | 130 | #define MALI_CHANNEL_4 2 |
| 131 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 132 | #define MALI_CHANNEL_8 3 |
| 133 | |
| 134 | #define MALI_CHANNEL_16 4 |
| 135 | |
| 136 | #define MALI_CHANNEL_32 5 |
| 137 | |
| 138 | /* For MALI_FORMAT_SINT it means a half-float (e.g. RG16F). For |
| 139 | * MALI_FORMAT_UNORM, it means a 32-bit float. |
| 140 | */ |
| 141 | #define MALI_CHANNEL_FLOAT 7 |
Alyssa Rosenzweig | 24c3b95 | 2020-06-10 15:35:41 -0400 | [diff] [blame] | 142 | #define MALI_EXTRACT_BITS(fmt) (fmt & 0x7) |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 143 | |
Alyssa Rosenzweig | 050b934 | 2019-05-04 21:57:01 +0000 | [diff] [blame] | 144 | /* The raw Midgard blend payload can either be an equation or a shader |
| 145 | * address, depending on the context */ |
| 146 | |
| 147 | union midgard_blend { |
| 148 | mali_ptr shader; |
Alyssa Rosenzweig | 3645c78 | 2019-05-18 20:36:00 +0000 | [diff] [blame] | 149 | |
| 150 | struct { |
Alyssa Rosenzweig | bf6d548 | 2020-08-18 18:15:45 -0400 | [diff] [blame] | 151 | struct mali_blend_equation_packed equation; |
Alyssa Rosenzweig | 3645c78 | 2019-05-18 20:36:00 +0000 | [diff] [blame] | 152 | float constant; |
| 153 | }; |
Alyssa Rosenzweig | 050b934 | 2019-05-04 21:57:01 +0000 | [diff] [blame] | 154 | }; |
| 155 | |
Alyssa Rosenzweig | 050b934 | 2019-05-04 21:57:01 +0000 | [diff] [blame] | 156 | struct midgard_blend_rt { |
Alyssa Rosenzweig | 94c9f87 | 2020-08-18 17:06:01 -0400 | [diff] [blame] | 157 | struct mali_blend_flags_packed flags; |
| 158 | u32 zero; |
Alyssa Rosenzweig | 050b934 | 2019-05-04 21:57:01 +0000 | [diff] [blame] | 159 | union midgard_blend blend; |
| 160 | } __attribute__((packed)); |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 161 | |
Alyssa Rosenzweig | 050b934 | 2019-05-04 21:57:01 +0000 | [diff] [blame] | 162 | /* On Bifrost systems (all MRT), each render target gets one of these |
| 163 | * descriptors */ |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 164 | |
Alyssa Rosenzweig | 33b13b9 | 2020-04-23 19:25:44 -0400 | [diff] [blame] | 165 | enum bifrost_shader_type { |
| 166 | BIFROST_BLEND_F16 = 0, |
| 167 | BIFROST_BLEND_F32 = 1, |
| 168 | BIFROST_BLEND_I32 = 2, |
| 169 | BIFROST_BLEND_U32 = 3, |
| 170 | BIFROST_BLEND_I16 = 4, |
| 171 | BIFROST_BLEND_U16 = 5, |
| 172 | }; |
| 173 | |
Tomeu Vizoso | 3c98c45 | 2020-04-24 08:40:51 +0200 | [diff] [blame] | 174 | #define BIFROST_MAX_RENDER_TARGET_COUNT 8 |
| 175 | |
Alyssa Rosenzweig | 050b934 | 2019-05-04 21:57:01 +0000 | [diff] [blame] | 176 | struct bifrost_blend_rt { |
| 177 | /* This is likely an analogue of the flags on |
| 178 | * midgard_blend_rt */ |
| 179 | |
Alyssa Rosenzweig | ae70538 | 2019-05-18 20:48:43 +0000 | [diff] [blame] | 180 | u16 flags; // = 0x200 |
| 181 | |
| 182 | /* Single-channel blend constants are encoded in a sort of |
| 183 | * fixed-point. Basically, the float is mapped to a byte, becoming |
| 184 | * a high byte, and then the lower-byte is added for precision. |
| 185 | * For the original float f: |
| 186 | * |
| 187 | * f = (constant_hi / 255) + (constant_lo / 65535) |
| 188 | * |
| 189 | * constant_hi = int(f / 255) |
| 190 | * constant_lo = 65535*f - (65535/255) * constant_hi |
| 191 | */ |
Alyssa Rosenzweig | ae70538 | 2019-05-18 20:48:43 +0000 | [diff] [blame] | 192 | u16 constant; |
| 193 | |
Alyssa Rosenzweig | bf6d548 | 2020-08-18 18:15:45 -0400 | [diff] [blame] | 194 | struct mali_blend_equation_packed equation; |
Tomeu Vizoso | 3c98c45 | 2020-04-24 08:40:51 +0200 | [diff] [blame] | 195 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 196 | /* |
| 197 | * - 0x19 normally |
| 198 | * - 0x3 when this slot is unused (everything else is 0 except the index) |
| 199 | * - 0x11 when this is the fourth slot (and it's used) |
Tomeu Vizoso | 3c98c45 | 2020-04-24 08:40:51 +0200 | [diff] [blame] | 200 | * - 0 when there is a blend shader |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 201 | */ |
| 202 | u16 unk2; |
Tomeu Vizoso | 3c98c45 | 2020-04-24 08:40:51 +0200 | [diff] [blame] | 203 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 204 | /* increments from 0 to 3 */ |
| 205 | u16 index; |
| 206 | |
Tomeu Vizoso | 3c98c45 | 2020-04-24 08:40:51 +0200 | [diff] [blame] | 207 | union { |
| 208 | struct { |
| 209 | /* So far, I've only seen: |
| 210 | * - R001 for 1-component formats |
| 211 | * - RG01 for 2-component formats |
| 212 | * - RGB1 for 3-component formats |
| 213 | * - RGBA for 4-component formats |
| 214 | */ |
| 215 | u32 swizzle : 12; |
| 216 | enum mali_format format : 8; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 217 | |
Tomeu Vizoso | 3c98c45 | 2020-04-24 08:40:51 +0200 | [diff] [blame] | 218 | /* Type of the shader output variable. Note, this can |
| 219 | * be different from the format. |
| 220 | * enum bifrost_shader_type |
| 221 | */ |
| 222 | u32 zero1 : 4; |
| 223 | u32 shader_type : 3; |
| 224 | u32 zero2 : 5; |
| 225 | }; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 226 | |
Tomeu Vizoso | 3c98c45 | 2020-04-24 08:40:51 +0200 | [diff] [blame] | 227 | /* Only the low 32 bits of the blend shader are stored, the |
| 228 | * high 32 bits are implicitly the same as the original shader. |
| 229 | * According to the kernel driver, the program counter for |
| 230 | * shaders is actually only 24 bits, so shaders cannot cross |
| 231 | * the 2^24-byte boundary, and neither can the blend shader. |
| 232 | * The blob handles this by allocating a 2^24 byte pool for |
| 233 | * shaders, and making sure that any blend shaders are stored |
| 234 | * in the same pool as the original shader. The kernel will |
| 235 | * make sure this allocation is aligned to 2^24 bytes. |
| 236 | */ |
| 237 | u32 shader; |
| 238 | }; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 239 | } __attribute__((packed)); |
| 240 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 241 | /* Possible values for job_descriptor_size */ |
| 242 | |
| 243 | #define MALI_JOB_32 0 |
| 244 | #define MALI_JOB_64 1 |
| 245 | |
| 246 | struct mali_job_descriptor_header { |
| 247 | u32 exception_status; |
| 248 | u32 first_incomplete_task; |
| 249 | u64 fault_pointer; |
| 250 | u8 job_descriptor_size : 1; |
| 251 | enum mali_job_type job_type : 7; |
| 252 | u8 job_barrier : 1; |
| 253 | u8 unknown_flags : 7; |
| 254 | u16 job_index; |
| 255 | u16 job_dependency_index_1; |
| 256 | u16 job_dependency_index_2; |
Alyssa Rosenzweig | 65e5c19 | 2019-12-27 13:03:22 -0500 | [diff] [blame] | 257 | u64 next_job; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 258 | } __attribute__((packed)); |
| 259 | |
Alyssa Rosenzweig | adf716d | 2019-12-05 09:06:53 -0500 | [diff] [blame] | 260 | /* Details about write_value from panfrost igt tests which use it as a generic |
Alyssa Rosenzweig | 9eae950 | 2019-12-04 08:59:29 -0500 | [diff] [blame] | 261 | * dword write primitive */ |
| 262 | |
Alyssa Rosenzweig | adf716d | 2019-12-05 09:06:53 -0500 | [diff] [blame] | 263 | #define MALI_WRITE_VALUE_ZERO 3 |
Alyssa Rosenzweig | 9eae950 | 2019-12-04 08:59:29 -0500 | [diff] [blame] | 264 | |
Alyssa Rosenzweig | adf716d | 2019-12-05 09:06:53 -0500 | [diff] [blame] | 265 | struct mali_payload_write_value { |
Alyssa Rosenzweig | 9eae950 | 2019-12-04 08:59:29 -0500 | [diff] [blame] | 266 | u64 address; |
| 267 | u32 value_descriptor; |
| 268 | u32 reserved; |
| 269 | u64 immediate; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 270 | } __attribute__((packed)); |
| 271 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 272 | /* |
| 273 | * Mali Attributes |
| 274 | * |
| 275 | * This structure lets the attribute unit compute the address of an attribute |
| 276 | * given the vertex and instance ID. Unfortunately, the way this works is |
| 277 | * rather complicated when instancing is enabled. |
| 278 | * |
| 279 | * To explain this, first we need to explain how compute and vertex threads are |
| 280 | * dispatched. This is a guess (although a pretty firm guess!) since the |
| 281 | * details are mostly hidden from the driver, except for attribute instancing. |
| 282 | * When a quad is dispatched, it receives a single, linear index. However, we |
| 283 | * need to translate that index into a (vertex id, instance id) pair, or a |
| 284 | * (local id x, local id y, local id z) triple for compute shaders (although |
| 285 | * vertex shaders and compute shaders are handled almost identically). |
| 286 | * Focusing on vertex shaders, one option would be to do: |
| 287 | * |
| 288 | * vertex_id = linear_id % num_vertices |
| 289 | * instance_id = linear_id / num_vertices |
| 290 | * |
| 291 | * but this involves a costly division and modulus by an arbitrary number. |
| 292 | * Instead, we could pad num_vertices. We dispatch padded_num_vertices * |
| 293 | * num_instances threads instead of num_vertices * num_instances, which results |
| 294 | * in some "extra" threads with vertex_id >= num_vertices, which we have to |
| 295 | * discard. The more we pad num_vertices, the more "wasted" threads we |
| 296 | * dispatch, but the division is potentially easier. |
| 297 | * |
| 298 | * One straightforward choice is to pad num_vertices to the next power of two, |
| 299 | * which means that the division and modulus are just simple bit shifts and |
| 300 | * masking. But the actual algorithm is a bit more complicated. The thread |
| 301 | * dispatcher has special support for dividing by 3, 5, 7, and 9, in addition |
| 302 | * to dividing by a power of two. This is possibly using the technique |
| 303 | * described in patent US20170010862A1. As a result, padded_num_vertices can be |
| 304 | * 1, 3, 5, 7, or 9 times a power of two. This results in less wasted threads, |
| 305 | * since we need less padding. |
| 306 | * |
| 307 | * padded_num_vertices is picked by the hardware. The driver just specifies the |
| 308 | * actual number of vertices. At least for Mali G71, the first few cases are |
| 309 | * given by: |
| 310 | * |
| 311 | * num_vertices | padded_num_vertices |
| 312 | * 3 | 4 |
| 313 | * 4-7 | 8 |
| 314 | * 8-11 | 12 (3 * 4) |
| 315 | * 12-15 | 16 |
| 316 | * 16-19 | 20 (5 * 4) |
| 317 | * |
| 318 | * Note that padded_num_vertices is a multiple of four (presumably because |
| 319 | * threads are dispatched in groups of 4). Also, padded_num_vertices is always |
| 320 | * at least one more than num_vertices, which seems like a quirk of the |
| 321 | * hardware. For larger num_vertices, the hardware uses the following |
| 322 | * algorithm: using the binary representation of num_vertices, we look at the |
| 323 | * most significant set bit as well as the following 3 bits. Let n be the |
| 324 | * number of bits after those 4 bits. Then we set padded_num_vertices according |
| 325 | * to the following table: |
| 326 | * |
| 327 | * high bits | padded_num_vertices |
| 328 | * 1000 | 9 * 2^n |
| 329 | * 1001 | 5 * 2^(n+1) |
| 330 | * 101x | 3 * 2^(n+2) |
| 331 | * 110x | 7 * 2^(n+1) |
| 332 | * 111x | 2^(n+4) |
| 333 | * |
| 334 | * For example, if num_vertices = 70 is passed to glDraw(), its binary |
| 335 | * representation is 1000110, so n = 3 and the high bits are 1000, and |
| 336 | * therefore padded_num_vertices = 9 * 2^3 = 72. |
| 337 | * |
| 338 | * The attribute unit works in terms of the original linear_id. if |
| 339 | * num_instances = 1, then they are the same, and everything is simple. |
| 340 | * However, with instancing things get more complicated. There are four |
| 341 | * possible modes, two of them we can group together: |
| 342 | * |
| 343 | * 1. Use the linear_id directly. Only used when there is no instancing. |
| 344 | * |
| 345 | * 2. Use the linear_id modulo a constant. This is used for per-vertex |
| 346 | * attributes with instancing enabled by making the constant equal |
| 347 | * padded_num_vertices. Because the modulus is always padded_num_vertices, this |
| 348 | * mode only supports a modulus that is a power of 2 times 1, 3, 5, 7, or 9. |
| 349 | * The shift field specifies the power of two, while the extra_flags field |
| 350 | * specifies the odd number. If shift = n and extra_flags = m, then the modulus |
| 351 | * is (2m + 1) * 2^n. As an example, if num_vertices = 70, then as computed |
| 352 | * above, padded_num_vertices = 9 * 2^3, so we should set extra_flags = 4 and |
| 353 | * shift = 3. Note that we must exactly follow the hardware algorithm used to |
| 354 | * get padded_num_vertices in order to correctly implement per-vertex |
| 355 | * attributes. |
| 356 | * |
| 357 | * 3. Divide the linear_id by a constant. In order to correctly implement |
| 358 | * instance divisors, we have to divide linear_id by padded_num_vertices times |
| 359 | * to user-specified divisor. So first we compute padded_num_vertices, again |
| 360 | * following the exact same algorithm that the hardware uses, then multiply it |
| 361 | * by the GL-level divisor to get the hardware-level divisor. This case is |
| 362 | * further divided into two more cases. If the hardware-level divisor is a |
| 363 | * power of two, then we just need to shift. The shift amount is specified by |
| 364 | * the shift field, so that the hardware-level divisor is just 2^shift. |
| 365 | * |
| 366 | * If it isn't a power of two, then we have to divide by an arbitrary integer. |
| 367 | * For that, we use the well-known technique of multiplying by an approximation |
| 368 | * of the inverse. The driver must compute the magic multiplier and shift |
| 369 | * amount, and then the hardware does the multiplication and shift. The |
| 370 | * hardware and driver also use the "round-down" optimization as described in |
| 371 | * http://ridiculousfish.com/files/faster_unsigned_division_by_constants.pdf. |
| 372 | * The hardware further assumes the multiplier is between 2^31 and 2^32, so the |
| 373 | * high bit is implicitly set to 1 even though it is set to 0 by the driver -- |
| 374 | * presumably this simplifies the hardware multiplier a little. The hardware |
| 375 | * first multiplies linear_id by the multiplier and takes the high 32 bits, |
| 376 | * then applies the round-down correction if extra_flags = 1, then finally |
| 377 | * shifts right by the shift field. |
| 378 | * |
| 379 | * There are some differences between ridiculousfish's algorithm and the Mali |
| 380 | * hardware algorithm, which means that the reference code from ridiculousfish |
| 381 | * doesn't always produce the right constants. Mali does not use the pre-shift |
| 382 | * optimization, since that would make a hardware implementation slower (it |
| 383 | * would have to always do the pre-shift, multiply, and post-shift operations). |
| 384 | * It also forces the multplier to be at least 2^31, which means that the |
| 385 | * exponent is entirely fixed, so there is no trial-and-error. Altogether, |
| 386 | * given the divisor d, the algorithm the driver must follow is: |
| 387 | * |
| 388 | * 1. Set shift = floor(log2(d)). |
| 389 | * 2. Compute m = ceil(2^(shift + 32) / d) and e = 2^(shift + 32) % d. |
| 390 | * 3. If e <= 2^shift, then we need to use the round-down algorithm. Set |
| 391 | * magic_divisor = m - 1 and extra_flags = 1. |
| 392 | * 4. Otherwise, set magic_divisor = m and extra_flags = 0. |
| 393 | */ |
| 394 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 395 | #define FBD_MASK (~0x3f) |
| 396 | |
Alyssa Rosenzweig | 8959364 | 2019-12-16 12:05:45 -0500 | [diff] [blame] | 397 | /* MFBD, rather than SFBD */ |
| 398 | #define MALI_MFBD (0x1) |
| 399 | |
Alyssa Rosenzweig | f06e8f7 | 2019-08-21 12:06:50 -0700 | [diff] [blame] | 400 | /* ORed into an MFBD address to specify the fbx section is included */ |
| 401 | #define MALI_MFBD_TAG_EXTRA (0x2) |
| 402 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 403 | /* On Bifrost, these fields are the same between the vertex and tiler payloads. |
| 404 | * They also seem to be the same between Bifrost and Midgard. They're shared in |
| 405 | * fused payloads. |
| 406 | */ |
| 407 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 408 | struct mali_vertex_tiler_prefix { |
Alyssa Rosenzweig | 02e768e | 2020-08-26 13:04:17 -0400 | [diff] [blame] | 409 | struct mali_invocation_packed invocation; |
Alyssa Rosenzweig | b60d567 | 2020-08-25 16:59:14 -0400 | [diff] [blame] | 410 | struct mali_primitive_packed primitive; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 411 | } __attribute__((packed)); |
| 412 | |
| 413 | /* Point size / line width can either be specified as a 32-bit float (for |
| 414 | * constant size) or as a [machine word size]-bit GPU pointer (for varying size). If a pointer |
| 415 | * is selected, by setting the appropriate MALI_DRAW_VARYING_SIZE bit in the tiler |
| 416 | * payload, the contents of varying_pointer will be intepreted as an array of |
| 417 | * fp16 sizes, one for each vertex. gl_PointSize is therefore implemented by |
| 418 | * creating a special MALI_R16F varying writing to varying_pointer. */ |
| 419 | |
| 420 | union midgard_primitive_size { |
| 421 | float constant; |
Tomeu Vizoso | 5a7688f | 2019-07-11 08:06:41 +0200 | [diff] [blame] | 422 | u64 pointer; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 423 | }; |
| 424 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 425 | struct midgard_payload_vertex_tiler { |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 426 | struct mali_vertex_tiler_prefix prefix; |
Alyssa Rosenzweig | eb261a8 | 2020-08-26 17:10:37 -0400 | [diff] [blame] | 427 | struct mali_draw_packed postfix; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 428 | union midgard_primitive_size primitive_size; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 429 | } __attribute__((packed)); |
| 430 | |
| 431 | struct bifrost_payload_vertex { |
| 432 | struct mali_vertex_tiler_prefix prefix; |
Alyssa Rosenzweig | eb261a8 | 2020-08-26 17:10:37 -0400 | [diff] [blame] | 433 | struct mali_draw_packed postfix; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 434 | } __attribute__((packed)); |
| 435 | |
| 436 | struct bifrost_payload_tiler { |
| 437 | struct mali_vertex_tiler_prefix prefix; |
Alyssa Rosenzweig | 4467e79 | 2020-08-26 13:21:06 -0400 | [diff] [blame] | 438 | union midgard_primitive_size primitive_size; |
| 439 | mali_ptr tiler_meta; |
| 440 | u64 zero1, zero2, zero3, zero4, zero5, zero6; |
Alyssa Rosenzweig | eb261a8 | 2020-08-26 17:10:37 -0400 | [diff] [blame] | 441 | struct mali_draw_packed postfix; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 442 | } __attribute__((packed)); |
| 443 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 444 | /* Purposeful off-by-one in width, height fields. For example, a (64, 64) |
| 445 | * texture is stored as (63, 63) in these fields. This adjusts for that. |
| 446 | * There's an identical pattern in the framebuffer descriptor. Even vertex |
| 447 | * count fields work this way, hence the generic name -- integral fields that |
| 448 | * are strictly positive generally need this adjustment. */ |
| 449 | |
| 450 | #define MALI_POSITIVE(dim) (dim - 1) |
| 451 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 452 | /* 8192x8192 */ |
| 453 | #define MAX_MIP_LEVELS (13) |
| 454 | |
| 455 | /* Cubemap bloats everything up */ |
Alyssa Rosenzweig | 83c02a5 | 2019-06-17 14:26:08 -0700 | [diff] [blame] | 456 | #define MAX_CUBE_FACES (6) |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 457 | |
Alyssa Rosenzweig | 416fc3b | 2019-06-07 14:25:28 -0700 | [diff] [blame] | 458 | /* For each pointer, there is an address and optionally also a stride */ |
| 459 | #define MAX_ELEMENTS (2) |
| 460 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 461 | /* Used for lod encoding. Thanks @urjaman for pointing out these routines can |
| 462 | * be cleaned up a lot. */ |
| 463 | |
| 464 | #define DECODE_FIXED_16(x) ((float) (x / 256.0)) |
| 465 | |
Icecream95 | f2f1277 | 2020-01-09 15:13:58 +1300 | [diff] [blame] | 466 | static inline int16_t |
| 467 | FIXED_16(float x, bool allow_negative) |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 468 | { |
| 469 | /* Clamp inputs, accounting for float error */ |
| 470 | float max_lod = (32.0 - (1.0 / 512.0)); |
Icecream95 | f2f1277 | 2020-01-09 15:13:58 +1300 | [diff] [blame] | 471 | float min_lod = allow_negative ? -max_lod : 0.0; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 472 | |
Icecream95 | f2f1277 | 2020-01-09 15:13:58 +1300 | [diff] [blame] | 473 | x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x)); |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 474 | |
| 475 | return (int) (x * 256.0); |
| 476 | } |
| 477 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 478 | /* From presentations, 16x16 tiles externally. Use shift for fast computation |
| 479 | * of tile numbers. */ |
| 480 | |
| 481 | #define MALI_TILE_SHIFT 4 |
| 482 | #define MALI_TILE_LENGTH (1 << MALI_TILE_SHIFT) |
| 483 | |
| 484 | /* Tile coordinates are stored as a compact u32, as only 12 bits are needed to |
| 485 | * each component. Notice that this provides a theoretical upper bound of (1 << |
| 486 | * 12) = 4096 tiles in each direction, addressing a maximum framebuffer of size |
| 487 | * 65536x65536. Multiplying that together, times another four given that Mali |
| 488 | * framebuffers are 32-bit ARGB8888, means that this upper bound would take 16 |
| 489 | * gigabytes of RAM just to store the uncompressed framebuffer itself, let |
| 490 | * alone rendering in real-time to such a buffer. |
| 491 | * |
| 492 | * Nice job, guys.*/ |
| 493 | |
| 494 | /* From mali_kbase_10969_workaround.c */ |
| 495 | #define MALI_X_COORD_MASK 0x00000FFF |
| 496 | #define MALI_Y_COORD_MASK 0x0FFF0000 |
| 497 | |
| 498 | /* Extract parts of a tile coordinate */ |
| 499 | |
| 500 | #define MALI_TILE_COORD_X(coord) ((coord) & MALI_X_COORD_MASK) |
| 501 | #define MALI_TILE_COORD_Y(coord) (((coord) & MALI_Y_COORD_MASK) >> 16) |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 502 | |
| 503 | /* Helpers to generate tile coordinates based on the boundary coordinates in |
| 504 | * screen space. So, with the bounds (0, 0) to (128, 128) for the screen, these |
| 505 | * functions would convert it to the bounding tiles (0, 0) to (7, 7). |
| 506 | * Intentional "off-by-one"; finding the tile number is a form of fencepost |
| 507 | * problem. */ |
| 508 | |
| 509 | #define MALI_MAKE_TILE_COORDS(X, Y) ((X) | ((Y) << 16)) |
| 510 | #define MALI_BOUND_TO_TILE(B, bias) ((B - bias) >> MALI_TILE_SHIFT) |
| 511 | #define MALI_COORDINATE_TO_TILE(W, H, bias) MALI_MAKE_TILE_COORDS(MALI_BOUND_TO_TILE(W, bias), MALI_BOUND_TO_TILE(H, bias)) |
| 512 | #define MALI_COORDINATE_TO_TILE_MIN(W, H) MALI_COORDINATE_TO_TILE(W, H, 0) |
| 513 | #define MALI_COORDINATE_TO_TILE_MAX(W, H) MALI_COORDINATE_TO_TILE(W, H, 1) |
| 514 | |
| 515 | struct mali_payload_fragment { |
| 516 | u32 min_tile_coord; |
| 517 | u32 max_tile_coord; |
| 518 | mali_ptr framebuffer; |
| 519 | } __attribute__((packed)); |
| 520 | |
Alyssa Rosenzweig | 3f5cd44 | 2020-02-28 07:17:53 -0500 | [diff] [blame] | 521 | /* Configures multisampling on Bifrost fragment jobs */ |
Alyssa Rosenzweig | 254f40f | 2020-02-05 15:58:28 -0500 | [diff] [blame] | 522 | |
Alyssa Rosenzweig | 3f5cd44 | 2020-02-28 07:17:53 -0500 | [diff] [blame] | 523 | struct bifrost_multisampling { |
| 524 | u64 zero1; |
| 525 | u64 zero2; |
| 526 | mali_ptr sample_locations; |
| 527 | u64 zero4; |
| 528 | } __attribute__((packed)); |
Alyssa Rosenzweig | 254f40f | 2020-02-05 15:58:28 -0500 | [diff] [blame] | 529 | |
Alyssa Rosenzweig | 2c47993 | 2020-07-21 18:51:07 -0400 | [diff] [blame] | 530 | #define MALI_MFBD_FORMAT_SRGB (1 << 0) |
Alyssa Rosenzweig | d507951 | 2019-06-17 15:53:09 -0700 | [diff] [blame] | 531 | |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 532 | struct mali_rt_format { |
| 533 | unsigned unk1 : 32; |
| 534 | unsigned unk2 : 3; |
| 535 | |
| 536 | unsigned nr_channels : 2; /* MALI_POSITIVE */ |
| 537 | |
Tomeu Vizoso | 28902ba | 2020-04-24 11:30:03 +0200 | [diff] [blame] | 538 | unsigned unk3 : 4; |
| 539 | unsigned unk4 : 1; |
Tomeu Vizoso | 9447a84 | 2019-10-30 12:05:30 +0100 | [diff] [blame] | 540 | enum mali_block_format block : 2; |
Alyssa Rosenzweig | 99d17fb | 2020-08-11 21:04:01 -0400 | [diff] [blame] | 541 | enum mali_msaa msaa : 2; |
Alyssa Rosenzweig | 2c47993 | 2020-07-21 18:51:07 -0400 | [diff] [blame] | 542 | unsigned flags : 2; |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 543 | |
| 544 | unsigned swizzle : 12; |
| 545 | |
Alyssa Rosenzweig | b78e04c | 2019-08-14 16:01:38 -0700 | [diff] [blame] | 546 | unsigned zero : 3; |
| 547 | |
| 548 | /* Disables MFBD preload. When this bit is set, the render target will |
| 549 | * be cleared every frame. When this bit is clear, the hardware will |
| 550 | * automatically wallpaper the render target back from main memory. |
| 551 | * Unfortunately, MFBD preload is very broken on Midgard, so in |
| 552 | * practice, this is a chicken bit that should always be set. |
| 553 | * Discovered by accident, as all good chicken bits are. */ |
| 554 | |
| 555 | unsigned no_preload : 1; |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 556 | } __attribute__((packed)); |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 557 | |
Icecream95 | 9ac106d | 2020-06-02 14:13:03 +1200 | [diff] [blame] | 558 | /* Flags for afbc.flags and ds_afbc.flags */ |
| 559 | |
| 560 | #define MALI_AFBC_FLAGS 0x10009 |
| 561 | |
| 562 | /* Lossless RGB and RGBA colorspace transform */ |
| 563 | #define MALI_AFBC_YTR (1 << 17) |
| 564 | |
Alyssa Rosenzweig | 6d9ee3e | 2020-02-10 08:51:37 -0500 | [diff] [blame] | 565 | struct mali_render_target { |
Alyssa Rosenzweig | f943047 | 2019-02-24 06:22:23 +0000 | [diff] [blame] | 566 | struct mali_rt_format format; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 567 | |
| 568 | u64 zero1; |
| 569 | |
Alyssa Rosenzweig | c9b6233 | 2019-08-20 11:06:07 -0700 | [diff] [blame] | 570 | struct { |
| 571 | /* Stuff related to ARM Framebuffer Compression. When AFBC is enabled, |
| 572 | * there is an extra metadata buffer that contains 16 bytes per tile. |
| 573 | * The framebuffer needs to be the same size as before, since we don't |
| 574 | * know ahead of time how much space it will take up. The |
| 575 | * framebuffer_stride is set to 0, since the data isn't stored linearly |
| 576 | * anymore. |
| 577 | * |
| 578 | * When AFBC is disabled, these fields are zero. |
| 579 | */ |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 580 | |
Alyssa Rosenzweig | c9b6233 | 2019-08-20 11:06:07 -0700 | [diff] [blame] | 581 | mali_ptr metadata; |
| 582 | u32 stride; // stride in units of tiles |
Icecream95 | 9ac106d | 2020-06-02 14:13:03 +1200 | [diff] [blame] | 583 | u32 flags; // = 0x20000 |
Alyssa Rosenzweig | c9b6233 | 2019-08-20 11:06:07 -0700 | [diff] [blame] | 584 | } afbc; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 585 | |
| 586 | mali_ptr framebuffer; |
| 587 | |
| 588 | u32 zero2 : 4; |
Alyssa Rosenzweig | 3720458 | 2020-06-30 16:21:18 -0400 | [diff] [blame] | 589 | u32 framebuffer_stride : 28; // in units of bytes, row to next |
| 590 | u32 layer_stride; /* For multisample rendering */ |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 591 | |
| 592 | u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware |
| 593 | u32 clear_color_2; // always equal, but unclear function? |
| 594 | u32 clear_color_3; // always equal, but unclear function? |
| 595 | u32 clear_color_4; // always equal, but unclear function? |
| 596 | } __attribute__((packed)); |
| 597 | |
Alyssa Rosenzweig | 6d9ee3e | 2020-02-10 08:51:37 -0500 | [diff] [blame] | 598 | /* An optional part of mali_framebuffer. It comes between the main structure |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 599 | * and the array of render targets. It must be included if any of these are |
| 600 | * enabled: |
| 601 | * |
| 602 | * - Transaction Elimination |
| 603 | * - Depth/stencil |
| 604 | * - TODO: Anything else? |
| 605 | */ |
| 606 | |
Alyssa Rosenzweig | 6bd9c4d | 2020-01-10 13:12:35 -0500 | [diff] [blame] | 607 | /* flags_hi */ |
Alyssa Rosenzweig | e061bf0 | 2020-07-15 11:57:35 -0400 | [diff] [blame] | 608 | #define MALI_EXTRA_PRESENT (0x1) |
Alyssa Rosenzweig | 587ad37 | 2019-03-09 00:45:23 +0000 | [diff] [blame] | 609 | |
Alyssa Rosenzweig | 6bd9c4d | 2020-01-10 13:12:35 -0500 | [diff] [blame] | 610 | /* flags_lo */ |
Alyssa Rosenzweig | 587ad37 | 2019-03-09 00:45:23 +0000 | [diff] [blame] | 611 | #define MALI_EXTRA_ZS (0x4) |
| 612 | |
Alyssa Rosenzweig | 6d9ee3e | 2020-02-10 08:51:37 -0500 | [diff] [blame] | 613 | struct mali_framebuffer_extra { |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 614 | mali_ptr checksum; |
| 615 | /* Each tile has an 8 byte checksum, so the stride is "width in tiles * 8" */ |
| 616 | u32 checksum_stride; |
| 617 | |
Alyssa Rosenzweig | 6bd9c4d | 2020-01-10 13:12:35 -0500 | [diff] [blame] | 618 | unsigned flags_lo : 4; |
| 619 | enum mali_block_format zs_block : 2; |
Alyssa Rosenzweig | e061bf0 | 2020-07-15 11:57:35 -0400 | [diff] [blame] | 620 | |
| 621 | /* Number of samples in Z/S attachment, MALI_POSITIVE. So zero for |
| 622 | * 1-sample (non-MSAA), 0x3 for MSAA 4x, etc */ |
| 623 | unsigned zs_samples : 4; |
| 624 | unsigned flags_hi : 22; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 625 | |
| 626 | union { |
| 627 | /* Note: AFBC is only allowed for 24/8 combined depth/stencil. */ |
| 628 | struct { |
| 629 | mali_ptr depth_stencil_afbc_metadata; |
| 630 | u32 depth_stencil_afbc_stride; // in units of tiles |
Icecream95 | 9ac106d | 2020-06-02 14:13:03 +1200 | [diff] [blame] | 631 | u32 flags; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 632 | |
| 633 | mali_ptr depth_stencil; |
| 634 | |
| 635 | u64 padding; |
| 636 | } ds_afbc; |
| 637 | |
| 638 | struct { |
| 639 | /* Depth becomes depth/stencil in case of combined D/S */ |
| 640 | mali_ptr depth; |
| 641 | u32 depth_stride_zero : 4; |
| 642 | u32 depth_stride : 28; |
Alyssa Rosenzweig | 5e38d95 | 2020-07-03 11:27:48 -0400 | [diff] [blame] | 643 | u32 depth_layer_stride; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 644 | |
| 645 | mali_ptr stencil; |
| 646 | u32 stencil_stride_zero : 4; |
| 647 | u32 stencil_stride : 28; |
Alyssa Rosenzweig | 5e38d95 | 2020-07-03 11:27:48 -0400 | [diff] [blame] | 648 | u32 stencil_layer_stride; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 649 | } ds_linear; |
| 650 | }; |
| 651 | |
| 652 | |
Alyssa Rosenzweig | 81a3191 | 2020-04-06 19:45:30 -0400 | [diff] [blame] | 653 | u32 clear_color_1; |
| 654 | u32 clear_color_2; |
| 655 | u64 zero3; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 656 | } __attribute__((packed)); |
| 657 | |
Alyssa Rosenzweig | ac68946 | 2019-06-14 11:14:01 -0700 | [diff] [blame] | 658 | /* Flags for mfbd_flags */ |
Alyssa Rosenzweig | e57ea53 | 2019-03-09 00:12:07 +0000 | [diff] [blame] | 659 | |
| 660 | /* Enables writing depth results back to main memory (rather than keeping them |
| 661 | * on-chip in the tile buffer and then discarding) */ |
| 662 | |
| 663 | #define MALI_MFBD_DEPTH_WRITE (1 << 10) |
| 664 | |
Alyssa Rosenzweig | 6d9ee3e | 2020-02-10 08:51:37 -0500 | [diff] [blame] | 665 | /* The MFBD contains the extra mali_framebuffer_extra section */ |
Alyssa Rosenzweig | e57ea53 | 2019-03-09 00:12:07 +0000 | [diff] [blame] | 666 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 667 | #define MALI_MFBD_EXTRA (1 << 13) |
| 668 | |
Alyssa Rosenzweig | 6d9ee3e | 2020-02-10 08:51:37 -0500 | [diff] [blame] | 669 | struct mali_framebuffer { |
Alyssa Rosenzweig | 3f5cd44 | 2020-02-28 07:17:53 -0500 | [diff] [blame] | 670 | union { |
Boris Brezillon | 3a06fc3 | 2020-09-03 09:18:09 +0200 | [diff] [blame] | 671 | struct mali_local_storage_packed shared_memory; |
Alyssa Rosenzweig | 3f5cd44 | 2020-02-28 07:17:53 -0500 | [diff] [blame] | 672 | struct bifrost_multisampling msaa; |
| 673 | }; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 674 | |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 675 | /* 0x20 */ |
| 676 | u16 width1, height1; |
| 677 | u32 zero3; |
| 678 | u16 width2, height2; |
| 679 | u32 unk1 : 19; // = 0x01000 |
Icecream95 | 3ec252a | 2020-07-14 12:05:47 +1200 | [diff] [blame] | 680 | u32 rt_count_1 : 3; // off-by-one (use MALI_POSITIVE) |
| 681 | u32 unk2 : 2; // = 0 |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 682 | u32 rt_count_2 : 3; // no off-by-one |
| 683 | u32 zero4 : 5; |
| 684 | /* 0x30 */ |
| 685 | u32 clear_stencil : 8; |
Alyssa Rosenzweig | ac68946 | 2019-06-14 11:14:01 -0700 | [diff] [blame] | 686 | u32 mfbd_flags : 24; // = 0x100 |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 687 | float clear_depth; |
Alyssa Rosenzweig | 85e745f | 2019-06-12 09:33:06 -0700 | [diff] [blame] | 688 | |
Tomeu Vizoso | 46e4246 | 2020-04-08 15:58:42 +0200 | [diff] [blame] | 689 | union { |
Boris Brezillon | e855698 | 2020-09-05 18:16:37 +0200 | [diff] [blame] | 690 | struct { |
| 691 | struct mali_midgard_tiler_packed tiler; |
| 692 | struct mali_midgard_tiler_weights_packed tiler_weights; |
| 693 | }; |
Tomeu Vizoso | 46e4246 | 2020-04-08 15:58:42 +0200 | [diff] [blame] | 694 | struct { |
| 695 | mali_ptr tiler_meta; |
| 696 | u32 zeros[16]; |
| 697 | }; |
| 698 | }; |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 699 | |
Alyssa Rosenzweig | 6d9ee3e | 2020-02-10 08:51:37 -0500 | [diff] [blame] | 700 | /* optional: struct mali_framebuffer_extra extra */ |
| 701 | /* struct mali_render_target rts[] */ |
Alyssa Rosenzweig | 61d3ae6 | 2019-01-29 05:46:07 +0000 | [diff] [blame] | 702 | } __attribute__((packed)); |
| 703 | |
| 704 | #endif /* __PANFROST_JOB_H__ */ |