Blame - src/panfrost/include/panfrost-job.h - platform/external/mesa3d

blob: 879023eacb417f2af2040a44f09ca23667570406 [file] [log] [blame]

Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	1	/*
				2	* © Copyright 2017-2018 Alyssa Rosenzweig
				3	* © Copyright 2017-2018 Connor Abbott
				4	* © Copyright 2017-2018 Lyude Paul
Alyssa Rosenzweig	d4575c3	2019-06-25 13:30:17 -0700	[diff] [blame]	5	* © Copyright2019 Collabora, Ltd.
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	6	*
				7	* Permission is hereby granted, free of charge, to any person obtaining a
				8	* copy of this software and associated documentation files (the "Software"),
				9	* to deal in the Software without restriction, including without limitation
				10	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				11	* and/or sell copies of the Software, and to permit persons to whom the
				12	* Software is furnished to do so, subject to the following conditions:
				13	*
				14	* The above copyright notice and this permission notice (including the next
				15	* paragraph) shall be included in all copies or substantial portions of the
				16	* Software.
				17	*
				18	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				19	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				20	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				21	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				22	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				23	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				24	* SOFTWARE.
				25	*
				26	*/
				27
				28	#ifndef __PANFROST_JOB_H__
				29	#define __PANFROST_JOB_H__
				30
				31	#include <stdint.h>
Icecream95	f2f1277	2020-01-09 15:13:58 +1300	[diff] [blame]	32	#include <stdbool.h>
Alyssa Rosenzweig	64f3c9d	2020-08-05 16:05:12 -0400	[diff] [blame]	33	#include <inttypes.h>
				34
				35	typedef uint8_t u8;
				36	typedef uint16_t u16;
				37	typedef uint32_t u32;
				38	typedef uint64_t u64;
				39	typedef uint64_t mali_ptr;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	40
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	41	/* Applies to tiler_gl_enables */
				42
Alyssa Rosenzweig	2d22b53	2019-02-14 02:44:03 +0000	[diff] [blame]	43	#define MALI_OCCLUSION_QUERY (1 << 3)
				44	#define MALI_OCCLUSION_PRECISE (1 << 4)
				45
Alyssa Rosenzweig	2adf35e	2019-05-23 03:01:32 +0000	[diff] [blame]	46	/* Set for a glFrontFace(GL_CCW) in a Y=0=TOP coordinate system (like Gallium).
				47	* In OpenGL, this would corresponds to glFrontFace(GL_CW). Mesa and the blob
				48	* disagree about how to do viewport flipping, so the blob actually sets this
				49	* for GL_CW but then has a negative viewport stride */
Alyssa Rosenzweig	44971b8	2019-10-27 19:46:50 -0400	[diff] [blame]	50
Alyssa Rosenzweig	2adf35e	2019-05-23 03:01:32 +0000	[diff] [blame]	51	#define MALI_FRONT_CCW_TOP (1 << 5)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	52
Alyssa Rosenzweig	2d22b53	2019-02-14 02:44:03 +0000	[diff] [blame]	53	#define MALI_CULL_FACE_FRONT (1 << 6)
				54	#define MALI_CULL_FACE_BACK (1 << 7)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	55
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	56	enum mali_nondominant_mode {
				57	MALI_BLEND_NON_MIRROR = 0,
				58	MALI_BLEND_NON_ZERO = 1
				59	};
				60
				61	enum mali_dominant_blend {
				62	MALI_BLEND_DOM_SOURCE = 0,
				63	MALI_BLEND_DOM_DESTINATION = 1
				64	};
				65
				66	enum mali_dominant_factor {
				67	MALI_DOMINANT_UNK0 = 0,
				68	MALI_DOMINANT_ZERO = 1,
				69	MALI_DOMINANT_SRC_COLOR = 2,
				70	MALI_DOMINANT_DST_COLOR = 3,
				71	MALI_DOMINANT_UNK4 = 4,
				72	MALI_DOMINANT_SRC_ALPHA = 5,
				73	MALI_DOMINANT_DST_ALPHA = 6,
				74	MALI_DOMINANT_CONSTANT = 7,
				75	};
				76
				77	enum mali_blend_modifier {
				78	MALI_BLEND_MOD_UNK0 = 0,
				79	MALI_BLEND_MOD_NORMAL = 1,
				80	MALI_BLEND_MOD_SOURCE_ONE = 2,
				81	MALI_BLEND_MOD_DEST_ONE = 3,
				82	};
				83
				84	struct mali_blend_mode {
				85	enum mali_blend_modifier clip_modifier : 2;
				86	unsigned unused_0 : 1;
				87	unsigned negate_source : 1;
				88
				89	enum mali_dominant_blend dominant : 1;
				90
				91	enum mali_nondominant_mode nondominant_mode : 1;
				92
				93	unsigned unused_1 : 1;
				94
				95	unsigned negate_dest : 1;
				96
				97	enum mali_dominant_factor dominant_factor : 3;
				98	unsigned complement_dominant : 1;
				99	} __attribute__((packed));
				100
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	101	/* Compressed per-pixel formats. Each of these formats expands to one to four
				102	* floating-point or integer numbers, as defined by the OpenGL specification.
				103	* There are various places in OpenGL where the user can specify a compressed
				104	* format in memory, which all use the same 8-bit enum in the various
				105	* descriptors, although different hardware units support different formats.
				106	*/
				107
				108	/* The top 3 bits specify how the bits of each component are interpreted. */
				109
Icecream95	960fe9d	2020-01-11 20:00:38 +1300	[diff] [blame]	110	/* e.g. ETC2_RGB8 */
				111	#define MALI_FORMAT_COMPRESSED (0 << 5)
				112
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	113	/* e.g. R11F_G11F_B10F */
				114	#define MALI_FORMAT_SPECIAL (2 << 5)
				115
				116	/* signed normalized, e.g. RGBA8_SNORM */
				117	#define MALI_FORMAT_SNORM (3 << 5)
				118
				119	/* e.g. RGBA8UI */
				120	#define MALI_FORMAT_UINT (4 << 5)
				121
				122	/* e.g. RGBA8 and RGBA32F */
				123	#define MALI_FORMAT_UNORM (5 << 5)
				124
				125	/* e.g. RGBA8I and RGBA16F */
				126	#define MALI_FORMAT_SINT (6 << 5)
				127
				128	/* These formats seem to largely duplicate the others. They're used at least
				129	* for Bifrost framebuffer output.
				130	*/
				131	#define MALI_FORMAT_SPECIAL2 (7 << 5)
Alyssa Rosenzweig	24c3b95	2020-06-10 15:35:41 -0400	[diff] [blame]	132	#define MALI_EXTRACT_TYPE(fmt) ((fmt) & 0xe0)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	133
				134	/* If the high 3 bits are 3 to 6 these two bits say how many components
				135	* there are.
				136	*/
				137	#define MALI_NR_CHANNELS(n) ((n - 1) << 3)
Alyssa Rosenzweig	8462ca0	2020-06-10 15:47:45 -0400	[diff] [blame]	138	#define MALI_EXTRACT_CHANNELS(fmt) ((((fmt) >> 3) & 3) + 1)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	139
				140	/* If the high 3 bits are 3 to 6, then the low 3 bits say how big each
				141	* component is, except the special MALI_CHANNEL_FLOAT which overrides what the
				142	* bits mean.
				143	*/
				144
Alyssa Rosenzweig	60270c8	2019-02-24 06:28:39 +0000	[diff] [blame]	145	#define MALI_CHANNEL_4 2
				146
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	147	#define MALI_CHANNEL_8 3
				148
				149	#define MALI_CHANNEL_16 4
				150
				151	#define MALI_CHANNEL_32 5
				152
				153	/* For MALI_FORMAT_SINT it means a half-float (e.g. RG16F). For
				154	* MALI_FORMAT_UNORM, it means a 32-bit float.
				155	*/
				156	#define MALI_CHANNEL_FLOAT 7
Alyssa Rosenzweig	24c3b95	2020-06-10 15:35:41 -0400	[diff] [blame]	157	#define MALI_EXTRACT_BITS(fmt) (fmt & 0x7)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	158
Alyssa Rosenzweig	050b934	2019-05-04 21:57:01 +0000	[diff] [blame]	159	/* The raw Midgard blend payload can either be an equation or a shader
				160	* address, depending on the context */
				161
				162	union midgard_blend {
				163	mali_ptr shader;
Alyssa Rosenzweig	3645c78	2019-05-18 20:36:00 +0000	[diff] [blame]	164
				165	struct {
Alyssa Rosenzweig	bf6d548	2020-08-18 18:15:45 -0400	[diff] [blame]	166	struct mali_blend_equation_packed equation;
Alyssa Rosenzweig	3645c78	2019-05-18 20:36:00 +0000	[diff] [blame]	167	float constant;
				168	};
Alyssa Rosenzweig	050b934	2019-05-04 21:57:01 +0000	[diff] [blame]	169	};
				170
Alyssa Rosenzweig	050b934	2019-05-04 21:57:01 +0000	[diff] [blame]	171	struct midgard_blend_rt {
Alyssa Rosenzweig	94c9f87	2020-08-18 17:06:01 -0400	[diff] [blame]	172	struct mali_blend_flags_packed flags;
				173	u32 zero;
Alyssa Rosenzweig	050b934	2019-05-04 21:57:01 +0000	[diff] [blame]	174	union midgard_blend blend;
				175	} __attribute__((packed));
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	176
Alyssa Rosenzweig	050b934	2019-05-04 21:57:01 +0000	[diff] [blame]	177	/* On Bifrost systems (all MRT), each render target gets one of these
				178	* descriptors */
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	179
Alyssa Rosenzweig	33b13b9	2020-04-23 19:25:44 -0400	[diff] [blame]	180	enum bifrost_shader_type {
				181	BIFROST_BLEND_F16 = 0,
				182	BIFROST_BLEND_F32 = 1,
				183	BIFROST_BLEND_I32 = 2,
				184	BIFROST_BLEND_U32 = 3,
				185	BIFROST_BLEND_I16 = 4,
				186	BIFROST_BLEND_U16 = 5,
				187	};
				188
Tomeu Vizoso	3c98c45	2020-04-24 08:40:51 +0200	[diff] [blame]	189	#define BIFROST_MAX_RENDER_TARGET_COUNT 8
				190
Alyssa Rosenzweig	050b934	2019-05-04 21:57:01 +0000	[diff] [blame]	191	struct bifrost_blend_rt {
				192	/* This is likely an analogue of the flags on
				193	* midgard_blend_rt */
				194
Alyssa Rosenzweig	ae70538	2019-05-18 20:48:43 +0000	[diff] [blame]	195	u16 flags; // = 0x200
				196
				197	/* Single-channel blend constants are encoded in a sort of
				198	* fixed-point. Basically, the float is mapped to a byte, becoming
				199	* a high byte, and then the lower-byte is added for precision.
				200	* For the original float f:
				201	*
				202	* f = (constant_hi / 255) + (constant_lo / 65535)
				203	*
				204	* constant_hi = int(f / 255)
				205	* constant_lo = 65535f - (65535/255) constant_hi
				206	*/
Alyssa Rosenzweig	ae70538	2019-05-18 20:48:43 +0000	[diff] [blame]	207	u16 constant;
				208
Alyssa Rosenzweig	bf6d548	2020-08-18 18:15:45 -0400	[diff] [blame]	209	struct mali_blend_equation_packed equation;
Tomeu Vizoso	3c98c45	2020-04-24 08:40:51 +0200	[diff] [blame]	210
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	211	/*
				212	* - 0x19 normally
				213	* - 0x3 when this slot is unused (everything else is 0 except the index)
				214	* - 0x11 when this is the fourth slot (and it's used)
Tomeu Vizoso	3c98c45	2020-04-24 08:40:51 +0200	[diff] [blame]	215	* - 0 when there is a blend shader
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	216	*/
				217	u16 unk2;
Tomeu Vizoso	3c98c45	2020-04-24 08:40:51 +0200	[diff] [blame]	218
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	219	/* increments from 0 to 3 */
				220	u16 index;
				221
Tomeu Vizoso	3c98c45	2020-04-24 08:40:51 +0200	[diff] [blame]	222	union {
				223	struct {
				224	/* So far, I've only seen:
				225	* - R001 for 1-component formats
				226	* - RG01 for 2-component formats
				227	* - RGB1 for 3-component formats
				228	* - RGBA for 4-component formats
				229	*/
				230	u32 swizzle : 12;
				231	enum mali_format format : 8;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	232
Tomeu Vizoso	3c98c45	2020-04-24 08:40:51 +0200	[diff] [blame]	233	/* Type of the shader output variable. Note, this can
				234	* be different from the format.
				235	* enum bifrost_shader_type
				236	*/
				237	u32 zero1 : 4;
				238	u32 shader_type : 3;
				239	u32 zero2 : 5;
				240	};
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	241
Tomeu Vizoso	3c98c45	2020-04-24 08:40:51 +0200	[diff] [blame]	242	/* Only the low 32 bits of the blend shader are stored, the
				243	* high 32 bits are implicitly the same as the original shader.
				244	* According to the kernel driver, the program counter for
				245	* shaders is actually only 24 bits, so shaders cannot cross
				246	* the 2^24-byte boundary, and neither can the blend shader.
				247	* The blob handles this by allocating a 2^24 byte pool for
				248	* shaders, and making sure that any blend shaders are stored
				249	* in the same pool as the original shader. The kernel will
				250	* make sure this allocation is aligned to 2^24 bytes.
				251	*/
				252	u32 shader;
				253	};
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	254	} __attribute__((packed));
				255
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	256	/* Possible values for job_descriptor_size */
				257
				258	#define MALI_JOB_32 0
				259	#define MALI_JOB_64 1
				260
				261	struct mali_job_descriptor_header {
				262	u32 exception_status;
				263	u32 first_incomplete_task;
				264	u64 fault_pointer;
				265	u8 job_descriptor_size : 1;
				266	enum mali_job_type job_type : 7;
				267	u8 job_barrier : 1;
				268	u8 unknown_flags : 7;
				269	u16 job_index;
				270	u16 job_dependency_index_1;
				271	u16 job_dependency_index_2;
Alyssa Rosenzweig	65e5c19	2019-12-27 13:03:22 -0500	[diff] [blame]	272	u64 next_job;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	273	} __attribute__((packed));
				274
Alyssa Rosenzweig	adf716d	2019-12-05 09:06:53 -0500	[diff] [blame]	275	/* Details about write_value from panfrost igt tests which use it as a generic
Alyssa Rosenzweig	9eae950	2019-12-04 08:59:29 -0500	[diff] [blame]	276	* dword write primitive */
				277
Alyssa Rosenzweig	adf716d	2019-12-05 09:06:53 -0500	[diff] [blame]	278	#define MALI_WRITE_VALUE_ZERO 3
Alyssa Rosenzweig	9eae950	2019-12-04 08:59:29 -0500	[diff] [blame]	279
Alyssa Rosenzweig	adf716d	2019-12-05 09:06:53 -0500	[diff] [blame]	280	struct mali_payload_write_value {
Alyssa Rosenzweig	9eae950	2019-12-04 08:59:29 -0500	[diff] [blame]	281	u64 address;
				282	u32 value_descriptor;
				283	u32 reserved;
				284	u64 immediate;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	285	} __attribute__((packed));
				286
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	287	/*
				288	* Mali Attributes
				289	*
				290	* This structure lets the attribute unit compute the address of an attribute
				291	* given the vertex and instance ID. Unfortunately, the way this works is
				292	* rather complicated when instancing is enabled.
				293	*
				294	* To explain this, first we need to explain how compute and vertex threads are
				295	* dispatched. This is a guess (although a pretty firm guess!) since the
				296	* details are mostly hidden from the driver, except for attribute instancing.
				297	* When a quad is dispatched, it receives a single, linear index. However, we
				298	* need to translate that index into a (vertex id, instance id) pair, or a
				299	* (local id x, local id y, local id z) triple for compute shaders (although
				300	* vertex shaders and compute shaders are handled almost identically).
				301	* Focusing on vertex shaders, one option would be to do:
				302	*
				303	* vertex_id = linear_id % num_vertices
				304	* instance_id = linear_id / num_vertices
				305	*
				306	* but this involves a costly division and modulus by an arbitrary number.
				307	* Instead, we could pad num_vertices. We dispatch padded_num_vertices *
				308	* num_instances threads instead of num_vertices * num_instances, which results
				309	* in some "extra" threads with vertex_id >= num_vertices, which we have to
				310	* discard. The more we pad num_vertices, the more "wasted" threads we
				311	* dispatch, but the division is potentially easier.
				312	*
				313	* One straightforward choice is to pad num_vertices to the next power of two,
				314	* which means that the division and modulus are just simple bit shifts and
				315	* masking. But the actual algorithm is a bit more complicated. The thread
				316	* dispatcher has special support for dividing by 3, 5, 7, and 9, in addition
				317	* to dividing by a power of two. This is possibly using the technique
				318	* described in patent US20170010862A1. As a result, padded_num_vertices can be
				319	* 1, 3, 5, 7, or 9 times a power of two. This results in less wasted threads,
				320	* since we need less padding.
				321	*
				322	* padded_num_vertices is picked by the hardware. The driver just specifies the
				323	* actual number of vertices. At least for Mali G71, the first few cases are
				324	* given by:
				325	*
				326	* num_vertices \| padded_num_vertices
				327	* 3 \| 4
				328	* 4-7 \| 8
				329	* 8-11 \| 12 (3 * 4)
				330	* 12-15 \| 16
				331	* 16-19 \| 20 (5 * 4)
				332	*
				333	* Note that padded_num_vertices is a multiple of four (presumably because
				334	* threads are dispatched in groups of 4). Also, padded_num_vertices is always
				335	* at least one more than num_vertices, which seems like a quirk of the
				336	* hardware. For larger num_vertices, the hardware uses the following
				337	* algorithm: using the binary representation of num_vertices, we look at the
				338	* most significant set bit as well as the following 3 bits. Let n be the
				339	* number of bits after those 4 bits. Then we set padded_num_vertices according
				340	* to the following table:
				341	*
				342	* high bits \| padded_num_vertices
				343	* 1000 \| 9 * 2^n
				344	* 1001 \| 5 * 2^(n+1)
				345	* 101x \| 3 * 2^(n+2)
				346	* 110x \| 7 * 2^(n+1)
				347	* 111x \| 2^(n+4)
				348	*
				349	* For example, if num_vertices = 70 is passed to glDraw(), its binary
				350	* representation is 1000110, so n = 3 and the high bits are 1000, and
				351	* therefore padded_num_vertices = 9 * 2^3 = 72.
				352	*
				353	* The attribute unit works in terms of the original linear_id. if
				354	* num_instances = 1, then they are the same, and everything is simple.
				355	* However, with instancing things get more complicated. There are four
				356	* possible modes, two of them we can group together:
				357	*
				358	* 1. Use the linear_id directly. Only used when there is no instancing.
				359	*
				360	* 2. Use the linear_id modulo a constant. This is used for per-vertex
				361	* attributes with instancing enabled by making the constant equal
				362	* padded_num_vertices. Because the modulus is always padded_num_vertices, this
				363	* mode only supports a modulus that is a power of 2 times 1, 3, 5, 7, or 9.
				364	* The shift field specifies the power of two, while the extra_flags field
				365	* specifies the odd number. If shift = n and extra_flags = m, then the modulus
				366	* is (2m + 1) * 2^n. As an example, if num_vertices = 70, then as computed
				367	* above, padded_num_vertices = 9 * 2^3, so we should set extra_flags = 4 and
				368	* shift = 3. Note that we must exactly follow the hardware algorithm used to
				369	* get padded_num_vertices in order to correctly implement per-vertex
				370	* attributes.
				371	*
				372	* 3. Divide the linear_id by a constant. In order to correctly implement
				373	* instance divisors, we have to divide linear_id by padded_num_vertices times
				374	* to user-specified divisor. So first we compute padded_num_vertices, again
				375	* following the exact same algorithm that the hardware uses, then multiply it
				376	* by the GL-level divisor to get the hardware-level divisor. This case is
				377	* further divided into two more cases. If the hardware-level divisor is a
				378	* power of two, then we just need to shift. The shift amount is specified by
				379	* the shift field, so that the hardware-level divisor is just 2^shift.
				380	*
				381	* If it isn't a power of two, then we have to divide by an arbitrary integer.
				382	* For that, we use the well-known technique of multiplying by an approximation
				383	* of the inverse. The driver must compute the magic multiplier and shift
				384	* amount, and then the hardware does the multiplication and shift. The
				385	* hardware and driver also use the "round-down" optimization as described in
				386	* http://ridiculousfish.com/files/faster_unsigned_division_by_constants.pdf.
				387	* The hardware further assumes the multiplier is between 2^31 and 2^32, so the
				388	* high bit is implicitly set to 1 even though it is set to 0 by the driver --
				389	* presumably this simplifies the hardware multiplier a little. The hardware
				390	* first multiplies linear_id by the multiplier and takes the high 32 bits,
				391	* then applies the round-down correction if extra_flags = 1, then finally
				392	* shifts right by the shift field.
				393	*
				394	* There are some differences between ridiculousfish's algorithm and the Mali
				395	* hardware algorithm, which means that the reference code from ridiculousfish
				396	* doesn't always produce the right constants. Mali does not use the pre-shift
				397	* optimization, since that would make a hardware implementation slower (it
				398	* would have to always do the pre-shift, multiply, and post-shift operations).
				399	* It also forces the multplier to be at least 2^31, which means that the
				400	* exponent is entirely fixed, so there is no trial-and-error. Altogether,
				401	* given the divisor d, the algorithm the driver must follow is:
				402	*
				403	* 1. Set shift = floor(log2(d)).
				404	* 2. Compute m = ceil(2^(shift + 32) / d) and e = 2^(shift + 32) % d.
				405	* 3. If e <= 2^shift, then we need to use the round-down algorithm. Set
				406	* magic_divisor = m - 1 and extra_flags = 1.
				407	* 4. Otherwise, set magic_divisor = m and extra_flags = 0.
				408	*/
				409
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	410	#define FBD_MASK (~0x3f)
				411
Alyssa Rosenzweig	8959364	2019-12-16 12:05:45 -0500	[diff] [blame]	412	/* MFBD, rather than SFBD */
				413	#define MALI_MFBD (0x1)
				414
Alyssa Rosenzweig	f06e8f7	2019-08-21 12:06:50 -0700	[diff] [blame]	415	/* ORed into an MFBD address to specify the fbx section is included */
				416	#define MALI_MFBD_TAG_EXTRA (0x2)
				417
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	418	/* On Bifrost, these fields are the same between the vertex and tiler payloads.
				419	* They also seem to be the same between Bifrost and Midgard. They're shared in
				420	* fused payloads.
				421	*/
				422
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	423	struct mali_vertex_tiler_prefix {
Alyssa Rosenzweig	02e768e	2020-08-26 13:04:17 -0400	[diff] [blame^]	424	struct mali_invocation_packed invocation;
Alyssa Rosenzweig	b60d567	2020-08-25 16:59:14 -0400	[diff] [blame]	425	struct mali_primitive_packed primitive;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	426	} __attribute__((packed));
				427
				428	/* Point size / line width can either be specified as a 32-bit float (for
				429	* constant size) or as a [machine word size]-bit GPU pointer (for varying size). If a pointer
				430	* is selected, by setting the appropriate MALI_DRAW_VARYING_SIZE bit in the tiler
				431	* payload, the contents of varying_pointer will be intepreted as an array of
				432	* fp16 sizes, one for each vertex. gl_PointSize is therefore implemented by
				433	* creating a special MALI_R16F varying writing to varying_pointer. */
				434
				435	union midgard_primitive_size {
				436	float constant;
Tomeu Vizoso	5a7688f	2019-07-11 08:06:41 +0200	[diff] [blame]	437	u64 pointer;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	438	};
				439
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	440	struct bifrost_tiler_heap_meta {
				441	u32 zero;
				442	u32 heap_size;
				443	/* note: these are just guesses! */
				444	mali_ptr tiler_heap_start;
				445	mali_ptr tiler_heap_free;
				446	mali_ptr tiler_heap_end;
				447
				448	/* hierarchy weights? but they're still 0 after the job has run... */
Tomeu Vizoso	0a0b670	2020-04-09 09:39:17 +0200	[diff] [blame]	449	u32 zeros[10];
				450	u32 unk1;
				451	u32 unk7e007e;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	452	} __attribute__((packed));
				453
				454	struct bifrost_tiler_meta {
Tomeu Vizoso	7104e28	2020-04-27 17:09:39 +0200	[diff] [blame]	455	u32 tiler_heap_next_start; /* To be written by the GPU */
				456	u32 used_hierarchy_mask; /* To be written by the GPU */
Tomeu Vizoso	0a0b670	2020-04-09 09:39:17 +0200	[diff] [blame]	457	u16 hierarchy_mask; /* Five values observed: 0xa, 0x14, 0x28, 0x50, 0xa0 */
Alyssa Rosenzweig	7f26bb3	2019-06-13 10:25:32 -0700	[diff] [blame]	458	u16 flags;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	459	u16 width;
				460	u16 height;
Tomeu Vizoso	7104e28	2020-04-27 17:09:39 +0200	[diff] [blame]	461	u64 zero0;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	462	mali_ptr tiler_heap_meta;
				463	/* TODO what is this used for? */
				464	u64 zeros[20];
				465	} __attribute__((packed));
				466
				467	struct bifrost_tiler_only {
				468	/* 0x20 */
				469	union midgard_primitive_size primitive_size;
				470
				471	mali_ptr tiler_meta;
				472
				473	u64 zero1, zero2, zero3, zero4, zero5, zero6;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	474	} __attribute__((packed));
				475
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	476	struct mali_vertex_tiler_postfix {
Tomeu Vizoso	7b10d4e	2020-04-08 10:55:28 +0200	[diff] [blame]	477	u16 gl_enables; // 0x6 on Midgard, 0x2 on Bifrost
Alyssa Rosenzweig	b010a6d	2020-04-06 20:31:32 -0400	[diff] [blame]	478
				479	/* Both zero for non-instanced draws. For instanced draws, a
				480	* decomposition of padded_num_vertices. See the comments about the
				481	* corresponding fields in mali_attr for context. */
				482
				483	unsigned instance_shift : 5;
				484	unsigned instance_odd : 3;
				485
				486	u8 zero4;
				487
				488	/* Offset for first vertex in buffer */
				489	u32 offset_start;
				490
				491	u64 zero5;
				492
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	493	/* Zero for vertex jobs. Pointer to the position (gl_Position) varying
				494	* output from the vertex shader for tiler jobs.
				495	*/
				496
Tomeu Vizoso	5a7688f	2019-07-11 08:06:41 +0200	[diff] [blame]	497	u64 position_varying;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	498
				499	/* An array of mali_uniform_buffer_meta's. The size is given by the
				500	* shader_meta.
				501	*/
Tomeu Vizoso	5a7688f	2019-07-11 08:06:41 +0200	[diff] [blame]	502	u64 uniform_buffers;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	503
Alyssa Rosenzweig	497977b	2020-03-09 13:51:39 -0400	[diff] [blame]	504	/* On Bifrost, this is a pointer to an array of bifrost_texture_descriptor.
				505	* On Midgard, this is a pointer to an array of pointers to the texture
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	506	* descriptors, number of pointers bounded by number of textures. The
				507	* indirection is needed to accomodate varying numbers and sizes of
				508	* texture descriptors */
Alyssa Rosenzweig	497977b	2020-03-09 13:51:39 -0400	[diff] [blame]	509	u64 textures;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	510
				511	/* For OpenGL, from what I've seen, this is intimately connected to
				512	* texture_meta. cwabbott says this is not the case under Vulkan, hence
				513	* why this field is seperate (Midgard is Vulkan capable). Pointer to
				514	* array of sampler descriptors (which are uniform in size) */
Tomeu Vizoso	5a7688f	2019-07-11 08:06:41 +0200	[diff] [blame]	515	u64 sampler_descriptor;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	516
Tomeu Vizoso	5a7688f	2019-07-11 08:06:41 +0200	[diff] [blame]	517	u64 uniforms;
Alyssa Rosenzweig	fa14cdf	2019-10-27 19:46:21 -0400	[diff] [blame]	518	u64 shader;
Tomeu Vizoso	5a7688f	2019-07-11 08:06:41 +0200	[diff] [blame]	519	u64 attributes; /* struct attribute_buffer[] */
				520	u64 attribute_meta; /* attribute_meta[] */
				521	u64 varyings; /* struct attr */
				522	u64 varying_meta; /* pointer */
				523	u64 viewport;
				524	u64 occlusion_counter; /* A single bit as far as I can tell */
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	525
Alyssa Rosenzweig	6dc1055	2020-02-10 08:47:09 -0500	[diff] [blame]	526	/* On Bifrost, this points directly to a mali_shared_memory structure.
				527	* On Midgard, this points to a framebuffer (either SFBD or MFBD as
				528	* tagged), which embeds a mali_shared_memory structure */
				529	mali_ptr shared_memory;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	530	} __attribute__((packed));
				531
				532	struct midgard_payload_vertex_tiler {
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	533	struct mali_vertex_tiler_prefix prefix;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	534	struct mali_vertex_tiler_postfix postfix;
				535
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	536	union midgard_primitive_size primitive_size;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	537	} __attribute__((packed));
				538
				539	struct bifrost_payload_vertex {
				540	struct mali_vertex_tiler_prefix prefix;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	541	struct mali_vertex_tiler_postfix postfix;
				542	} __attribute__((packed));
				543
				544	struct bifrost_payload_tiler {
				545	struct mali_vertex_tiler_prefix prefix;
				546	struct bifrost_tiler_only tiler;
				547	struct mali_vertex_tiler_postfix postfix;
				548	} __attribute__((packed));
				549
				550	struct bifrost_payload_fused {
				551	struct mali_vertex_tiler_prefix prefix;
				552	struct bifrost_tiler_only tiler;
				553	struct mali_vertex_tiler_postfix tiler_postfix;
Alyssa Rosenzweig	6b2457e	2019-05-18 21:04:33 +0000	[diff] [blame]	554	u64 padding; /* zero */
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	555	struct mali_vertex_tiler_postfix vertex_postfix;
				556	} __attribute__((packed));
				557
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	558	/* Purposeful off-by-one in width, height fields. For example, a (64, 64)
				559	* texture is stored as (63, 63) in these fields. This adjusts for that.
				560	* There's an identical pattern in the framebuffer descriptor. Even vertex
				561	* count fields work this way, hence the generic name -- integral fields that
				562	* are strictly positive generally need this adjustment. */
				563
				564	#define MALI_POSITIVE(dim) (dim - 1)
				565
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	566	/* 8192x8192 */
				567	#define MAX_MIP_LEVELS (13)
				568
				569	/* Cubemap bloats everything up */
Alyssa Rosenzweig	83c02a5	2019-06-17 14:26:08 -0700	[diff] [blame]	570	#define MAX_CUBE_FACES (6)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	571
Alyssa Rosenzweig	416fc3b	2019-06-07 14:25:28 -0700	[diff] [blame]	572	/* For each pointer, there is an address and optionally also a stride */
				573	#define MAX_ELEMENTS (2)
				574
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	575	/* Used for lod encoding. Thanks @urjaman for pointing out these routines can
				576	* be cleaned up a lot. */
				577
				578	#define DECODE_FIXED_16(x) ((float) (x / 256.0))
				579
Icecream95	f2f1277	2020-01-09 15:13:58 +1300	[diff] [blame]	580	static inline int16_t
				581	FIXED_16(float x, bool allow_negative)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	582	{
				583	/* Clamp inputs, accounting for float error */
				584	float max_lod = (32.0 - (1.0 / 512.0));
Icecream95	f2f1277	2020-01-09 15:13:58 +1300	[diff] [blame]	585	float min_lod = allow_negative ? -max_lod : 0.0;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	586
Icecream95	f2f1277	2020-01-09 15:13:58 +1300	[diff] [blame]	587	x = ((x > max_lod) ? max_lod : ((x < min_lod) ? min_lod : x));
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	588
				589	return (int) (x * 256.0);
				590	}
				591
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	592	/* From presentations, 16x16 tiles externally. Use shift for fast computation
				593	* of tile numbers. */
				594
				595	#define MALI_TILE_SHIFT 4
				596	#define MALI_TILE_LENGTH (1 << MALI_TILE_SHIFT)
				597
				598	/* Tile coordinates are stored as a compact u32, as only 12 bits are needed to
				599	* each component. Notice that this provides a theoretical upper bound of (1 <<
				600	* 12) = 4096 tiles in each direction, addressing a maximum framebuffer of size
				601	* 65536x65536. Multiplying that together, times another four given that Mali
				602	* framebuffers are 32-bit ARGB8888, means that this upper bound would take 16
				603	* gigabytes of RAM just to store the uncompressed framebuffer itself, let
				604	* alone rendering in real-time to such a buffer.
				605	*
				606	* Nice job, guys.*/
				607
				608	/* From mali_kbase_10969_workaround.c */
				609	#define MALI_X_COORD_MASK 0x00000FFF
				610	#define MALI_Y_COORD_MASK 0x0FFF0000
				611
				612	/* Extract parts of a tile coordinate */
				613
				614	#define MALI_TILE_COORD_X(coord) ((coord) & MALI_X_COORD_MASK)
				615	#define MALI_TILE_COORD_Y(coord) (((coord) & MALI_Y_COORD_MASK) >> 16)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	616
				617	/* Helpers to generate tile coordinates based on the boundary coordinates in
				618	* screen space. So, with the bounds (0, 0) to (128, 128) for the screen, these
				619	* functions would convert it to the bounding tiles (0, 0) to (7, 7).
				620	* Intentional "off-by-one"; finding the tile number is a form of fencepost
				621	* problem. */
				622
				623	#define MALI_MAKE_TILE_COORDS(X, Y) ((X) \| ((Y) << 16))
				624	#define MALI_BOUND_TO_TILE(B, bias) ((B - bias) >> MALI_TILE_SHIFT)
				625	#define MALI_COORDINATE_TO_TILE(W, H, bias) MALI_MAKE_TILE_COORDS(MALI_BOUND_TO_TILE(W, bias), MALI_BOUND_TO_TILE(H, bias))
				626	#define MALI_COORDINATE_TO_TILE_MIN(W, H) MALI_COORDINATE_TO_TILE(W, H, 0)
				627	#define MALI_COORDINATE_TO_TILE_MAX(W, H) MALI_COORDINATE_TO_TILE(W, H, 1)
				628
				629	struct mali_payload_fragment {
				630	u32 min_tile_coord;
				631	u32 max_tile_coord;
				632	mali_ptr framebuffer;
				633	} __attribute__((packed));
				634
Alyssa Rosenzweig	7b5217a	2019-05-18 21:01:03 +0000	[diff] [blame]	635	/* Single Framebuffer Descriptor */
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	636
				637	/* Flags apply to format. With just MSAA_A and MSAA_B, the framebuffer is
				638	* configured for 4x. With MSAA_8, it is configured for 8x. */
				639
Tomeu Vizoso	9447a84	2019-10-30 12:05:30 +0100	[diff] [blame]	640	#define MALI_SFBD_FORMAT_MSAA_8 (1 << 3)
				641	#define MALI_SFBD_FORMAT_MSAA_A (1 << 4)
				642	#define MALI_SFBD_FORMAT_MSAA_B (1 << 4)
				643	#define MALI_SFBD_FORMAT_SRGB (1 << 5)
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	644
				645	/* Fast/slow based on whether all three buffers are cleared at once */
				646
				647	#define MALI_CLEAR_FAST (1 << 18)
				648	#define MALI_CLEAR_SLOW (1 << 28)
				649	#define MALI_CLEAR_SLOW_STENCIL (1 << 31)
				650
Alyssa Rosenzweig	31fc52a	2019-07-10 07:22:19 -0700	[diff] [blame]	651	/* Configures hierarchical tiling on Midgard for both SFBD/MFBD (embedded
				652	* within the larget framebuffer descriptor). Analogous to
				653	* bifrost_tiler_heap_meta and bifrost_tiler_meta*/
				654
Alyssa Rosenzweig	897110a	2019-08-19 14:47:50 -0700	[diff] [blame]	655	/* See pan_tiler.c for derivation */
				656	#define MALI_HIERARCHY_MASK ((1 << 9) - 1)
				657
Alyssa Rosenzweig	9fb0904	2019-11-27 08:31:16 -0500	[diff] [blame]	658	/* Flag disabling the tiler for clear-only jobs, with
				659	hierarchical tiling */
Alyssa Rosenzweig	897110a	2019-08-19 14:47:50 -0700	[diff] [blame]	660	#define MALI_TILER_DISABLED (1 << 12)
				661
Alyssa Rosenzweig	9fb0904	2019-11-27 08:31:16 -0500	[diff] [blame]	662	/* Flag selecting userspace-generated polygon list, for clear-only jobs without
				663	* hierarhical tiling. */
				664	#define MALI_TILER_USER 0xFFF
				665
				666	/* Absent any geometry, the minimum size of the polygon list header */
				667	#define MALI_TILER_MINIMUM_HEADER_SIZE 0x200
				668
Alyssa Rosenzweig	31fc52a	2019-07-10 07:22:19 -0700	[diff] [blame]	669	struct midgard_tiler_descriptor {
				670	/* Size of the entire polygon list; see pan_tiler.c for the
				671	* computation. It's based on hierarchical tiling */
				672
				673	u32 polygon_list_size;
				674
				675	/* Name known from the replay workaround in the kernel. What exactly is
				676	* flagged here is less known. We do that (tiler_hierarchy_mask & 0x1ff)
				677	* specifies a mask of hierarchy weights, which explains some of the
				678	* performance mysteries around setting it. We also see the bottom bit
Alyssa Rosenzweig	897110a	2019-08-19 14:47:50 -0700	[diff] [blame]	679	* of tiler_flags set in the kernel, but no comment why.
				680	*
				681	* hierarchy_mask can have the TILER_DISABLED flag */
Alyssa Rosenzweig	31fc52a	2019-07-10 07:22:19 -0700	[diff] [blame]	682
				683	u16 hierarchy_mask;
				684	u16 flags;
				685
				686	/* See mali_tiler.c for an explanation */
				687	mali_ptr polygon_list;
				688	mali_ptr polygon_list_body;
				689
				690	/* Names based on we see symmetry with replay jobs which name these
				691	* explicitly */
				692
				693	mali_ptr heap_start; /* tiler heap_free_address */
				694	mali_ptr heap_end;
				695
				696	/* Hierarchy weights. We know these are weights based on the kernel,
				697	* but I've never seen them be anything other than zero */
				698	u32 weights[8];
				699	};
				700
Tomeu Vizoso	9447a84	2019-10-30 12:05:30 +0100	[diff] [blame]	701	struct mali_sfbd_format {
				702	/* 0x1 */
				703	unsigned unk1 : 6;
				704
				705	/* mali_channel_swizzle */
				706	unsigned swizzle : 12;
				707
				708	/* MALI_POSITIVE */
				709	unsigned nr_channels : 2;
				710
				711	/* 0x4 */
				712	unsigned unk2 : 6;
				713
				714	enum mali_block_format block : 2;
				715
				716	/* 0xb */
				717	unsigned unk3 : 4;
				718	};
				719
Alyssa Rosenzweig	254f40f	2020-02-05 15:58:28 -0500	[diff] [blame]	720	/* Shared structure at the start of framebuffer descriptors, or used bare for
				721	* compute jobs, configuring stack and shared memory */
				722
				723	struct mali_shared_memory {
				724	u32 stack_shift : 4;
				725	u32 unk0 : 28;
				726
				727	/* Configuration for shared memory for compute shaders.
				728	* shared_workgroup_count is logarithmic and may be computed for a
				729	* compute shader using shared memory as:
				730	*
				731	* shared_workgroup_count = MAX2(ceil(log2(count_x)) + ... + ceil(log2(count_z), 10)
				732	*
				733	* For compute shaders that don't use shared memory, or non-compute
				734	* shaders, this is set to ~0
				735	*/
				736
				737	u32 shared_workgroup_count : 5;
				738	u32 shared_unk1 : 3;
				739	u32 shared_shift : 4;
				740	u32 shared_zero : 20;
				741
Alyssa Rosenzweig	6c63727	2019-12-09 08:41:07 -0500	[diff] [blame]	742	mali_ptr scratchpad;
				743
Alyssa Rosenzweig	254f40f	2020-02-05 15:58:28 -0500	[diff] [blame]	744	/* For compute shaders, the RAM backing of workgroup-shared memory. For
				745	* fragment shaders on Bifrost, apparently multisampling locations */
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	746
Alyssa Rosenzweig	254f40f	2020-02-05 15:58:28 -0500	[diff] [blame]	747	mali_ptr shared_memory;
				748	mali_ptr unknown1;
				749	} __attribute__((packed));
				750
Alyssa Rosenzweig	3f5cd44	2020-02-28 07:17:53 -0500	[diff] [blame]	751	/* Configures multisampling on Bifrost fragment jobs */
Alyssa Rosenzweig	254f40f	2020-02-05 15:58:28 -0500	[diff] [blame]	752
Alyssa Rosenzweig	3f5cd44	2020-02-28 07:17:53 -0500	[diff] [blame]	753	struct bifrost_multisampling {
				754	u64 zero1;
				755	u64 zero2;
				756	mali_ptr sample_locations;
				757	u64 zero4;
				758	} __attribute__((packed));
Alyssa Rosenzweig	254f40f	2020-02-05 15:58:28 -0500	[diff] [blame]	759
				760	struct mali_single_framebuffer {
				761	struct mali_shared_memory shared_memory;
Tomeu Vizoso	9447a84	2019-10-30 12:05:30 +0100	[diff] [blame]	762	struct mali_sfbd_format format;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	763
				764	u32 clear_flags;
				765	u32 zero2;
				766
				767	/* Purposeful off-by-one in these fields should be accounted for by the
				768	* MALI_DIMENSION macro */
				769
				770	u16 width;
				771	u16 height;
				772
Tomeu Vizoso	23fe7cd	2019-07-12 12:38:50 +0200	[diff] [blame]	773	u32 zero3[4];
				774	mali_ptr checksum;
				775	u32 checksum_stride;
				776	u32 zero5;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	777
				778	/* By default, the framebuffer is upside down from OpenGL's
				779	* perspective. Set framebuffer to the end and negate the stride to
				780	* flip in the Y direction */
				781
				782	mali_ptr framebuffer;
				783	int32_t stride;
				784
				785	u32 zero4;
				786
				787	/* Depth and stencil buffers are interleaved, it appears, as they are
				788	* set to the same address in captures. Both fields set to zero if the
				789	* buffer is not being cleared. Depending on GL_ENABLE magic, you might
				790	* get a zero enable despite the buffer being present; that still is
				791	* disabled. */
				792
				793	mali_ptr depth_buffer; // not SAME_VA
Tomeu Vizoso	9447a84	2019-10-30 12:05:30 +0100	[diff] [blame]	794	u32 depth_stride_zero : 4;
				795	u32 depth_stride : 28;
				796	u32 zero7;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	797
				798	mali_ptr stencil_buffer; // not SAME_VA
Tomeu Vizoso	9447a84	2019-10-30 12:05:30 +0100	[diff] [blame]	799	u32 stencil_stride_zero : 4;
				800	u32 stencil_stride : 28;
				801	u32 zero8;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	802
				803	u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware
				804	u32 clear_color_2; // always equal, but unclear function?
				805	u32 clear_color_3; // always equal, but unclear function?
				806	u32 clear_color_4; // always equal, but unclear function?
				807
				808	/* Set to zero if not cleared */
				809
				810	float clear_depth_1; // float32, ditto
				811	float clear_depth_2; // float32, ditto
				812	float clear_depth_3; // float32, ditto
				813	float clear_depth_4; // float32, ditto
				814
				815	u32 clear_stencil; // Exactly as it appears in OpenGL
				816
				817	u32 zero6[7];
				818
Alyssa Rosenzweig	31fc52a	2019-07-10 07:22:19 -0700	[diff] [blame]	819	struct midgard_tiler_descriptor tiler;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	820
				821	/* More below this, maybe */
				822	} __attribute__((packed));
				823
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	824
Alyssa Rosenzweig	2c47993	2020-07-21 18:51:07 -0400	[diff] [blame]	825	#define MALI_MFBD_FORMAT_SRGB (1 << 0)
Alyssa Rosenzweig	d507951	2019-06-17 15:53:09 -0700	[diff] [blame]	826
Alyssa Rosenzweig	f943047	2019-02-24 06:22:23 +0000	[diff] [blame]	827	struct mali_rt_format {
				828	unsigned unk1 : 32;
				829	unsigned unk2 : 3;
				830
				831	unsigned nr_channels : 2; /* MALI_POSITIVE */
				832
Tomeu Vizoso	28902ba	2020-04-24 11:30:03 +0200	[diff] [blame]	833	unsigned unk3 : 4;
				834	unsigned unk4 : 1;
Tomeu Vizoso	9447a84	2019-10-30 12:05:30 +0100	[diff] [blame]	835	enum mali_block_format block : 2;
Alyssa Rosenzweig	99d17fb	2020-08-11 21:04:01 -0400	[diff] [blame]	836	enum mali_msaa msaa : 2;
Alyssa Rosenzweig	2c47993	2020-07-21 18:51:07 -0400	[diff] [blame]	837	unsigned flags : 2;
Alyssa Rosenzweig	f943047	2019-02-24 06:22:23 +0000	[diff] [blame]	838
				839	unsigned swizzle : 12;
				840
Alyssa Rosenzweig	b78e04c	2019-08-14 16:01:38 -0700	[diff] [blame]	841	unsigned zero : 3;
				842
				843	/* Disables MFBD preload. When this bit is set, the render target will
				844	* be cleared every frame. When this bit is clear, the hardware will
				845	* automatically wallpaper the render target back from main memory.
				846	* Unfortunately, MFBD preload is very broken on Midgard, so in
				847	* practice, this is a chicken bit that should always be set.
				848	* Discovered by accident, as all good chicken bits are. */
				849
				850	unsigned no_preload : 1;
Alyssa Rosenzweig	f943047	2019-02-24 06:22:23 +0000	[diff] [blame]	851	} __attribute__((packed));
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	852
Icecream95	9ac106d	2020-06-02 14:13:03 +1200	[diff] [blame]	853	/* Flags for afbc.flags and ds_afbc.flags */
				854
				855	#define MALI_AFBC_FLAGS 0x10009
				856
				857	/* Lossless RGB and RGBA colorspace transform */
				858	#define MALI_AFBC_YTR (1 << 17)
				859
Alyssa Rosenzweig	6d9ee3e	2020-02-10 08:51:37 -0500	[diff] [blame]	860	struct mali_render_target {
Alyssa Rosenzweig	f943047	2019-02-24 06:22:23 +0000	[diff] [blame]	861	struct mali_rt_format format;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	862
				863	u64 zero1;
				864
Alyssa Rosenzweig	c9b6233	2019-08-20 11:06:07 -0700	[diff] [blame]	865	struct {
				866	/* Stuff related to ARM Framebuffer Compression. When AFBC is enabled,
				867	* there is an extra metadata buffer that contains 16 bytes per tile.
				868	* The framebuffer needs to be the same size as before, since we don't
				869	* know ahead of time how much space it will take up. The
				870	* framebuffer_stride is set to 0, since the data isn't stored linearly
				871	* anymore.
				872	*
				873	* When AFBC is disabled, these fields are zero.
				874	*/
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	875
Alyssa Rosenzweig	c9b6233	2019-08-20 11:06:07 -0700	[diff] [blame]	876	mali_ptr metadata;
				877	u32 stride; // stride in units of tiles
Icecream95	9ac106d	2020-06-02 14:13:03 +1200	[diff] [blame]	878	u32 flags; // = 0x20000
Alyssa Rosenzweig	c9b6233	2019-08-20 11:06:07 -0700	[diff] [blame]	879	} afbc;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	880
				881	mali_ptr framebuffer;
				882
				883	u32 zero2 : 4;
Alyssa Rosenzweig	3720458	2020-06-30 16:21:18 -0400	[diff] [blame]	884	u32 framebuffer_stride : 28; // in units of bytes, row to next
				885	u32 layer_stride; /* For multisample rendering */
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	886
				887	u32 clear_color_1; // RGBA8888 from glClear, actually used by hardware
				888	u32 clear_color_2; // always equal, but unclear function?
				889	u32 clear_color_3; // always equal, but unclear function?
				890	u32 clear_color_4; // always equal, but unclear function?
				891	} __attribute__((packed));
				892
Alyssa Rosenzweig	6d9ee3e	2020-02-10 08:51:37 -0500	[diff] [blame]	893	/* An optional part of mali_framebuffer. It comes between the main structure
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	894	* and the array of render targets. It must be included if any of these are
				895	* enabled:
				896	*
				897	* - Transaction Elimination
				898	* - Depth/stencil
				899	* - TODO: Anything else?
				900	*/
				901
Alyssa Rosenzweig	6bd9c4d	2020-01-10 13:12:35 -0500	[diff] [blame]	902	/* flags_hi */
Alyssa Rosenzweig	e061bf0	2020-07-15 11:57:35 -0400	[diff] [blame]	903	#define MALI_EXTRA_PRESENT (0x1)
Alyssa Rosenzweig	587ad37	2019-03-09 00:45:23 +0000	[diff] [blame]	904
Alyssa Rosenzweig	6bd9c4d	2020-01-10 13:12:35 -0500	[diff] [blame]	905	/* flags_lo */
Alyssa Rosenzweig	587ad37	2019-03-09 00:45:23 +0000	[diff] [blame]	906	#define MALI_EXTRA_ZS (0x4)
				907
Alyssa Rosenzweig	6d9ee3e	2020-02-10 08:51:37 -0500	[diff] [blame]	908	struct mali_framebuffer_extra {
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	909	mali_ptr checksum;
				910	/* Each tile has an 8 byte checksum, so the stride is "width in tiles * 8" */
				911	u32 checksum_stride;
				912
Alyssa Rosenzweig	6bd9c4d	2020-01-10 13:12:35 -0500	[diff] [blame]	913	unsigned flags_lo : 4;
				914	enum mali_block_format zs_block : 2;
Alyssa Rosenzweig	e061bf0	2020-07-15 11:57:35 -0400	[diff] [blame]	915
				916	/* Number of samples in Z/S attachment, MALI_POSITIVE. So zero for
				917	* 1-sample (non-MSAA), 0x3 for MSAA 4x, etc */
				918	unsigned zs_samples : 4;
				919	unsigned flags_hi : 22;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	920
				921	union {
				922	/* Note: AFBC is only allowed for 24/8 combined depth/stencil. */
				923	struct {
				924	mali_ptr depth_stencil_afbc_metadata;
				925	u32 depth_stencil_afbc_stride; // in units of tiles
Icecream95	9ac106d	2020-06-02 14:13:03 +1200	[diff] [blame]	926	u32 flags;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	927
				928	mali_ptr depth_stencil;
				929
				930	u64 padding;
				931	} ds_afbc;
				932
				933	struct {
				934	/* Depth becomes depth/stencil in case of combined D/S */
				935	mali_ptr depth;
				936	u32 depth_stride_zero : 4;
				937	u32 depth_stride : 28;
Alyssa Rosenzweig	5e38d95	2020-07-03 11:27:48 -0400	[diff] [blame]	938	u32 depth_layer_stride;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	939
				940	mali_ptr stencil;
				941	u32 stencil_stride_zero : 4;
				942	u32 stencil_stride : 28;
Alyssa Rosenzweig	5e38d95	2020-07-03 11:27:48 -0400	[diff] [blame]	943	u32 stencil_layer_stride;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	944	} ds_linear;
				945	};
				946
				947
Alyssa Rosenzweig	81a3191	2020-04-06 19:45:30 -0400	[diff] [blame]	948	u32 clear_color_1;
				949	u32 clear_color_2;
				950	u64 zero3;
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	951	} __attribute__((packed));
				952
Alyssa Rosenzweig	ac68946	2019-06-14 11:14:01 -0700	[diff] [blame]	953	/* Flags for mfbd_flags */
Alyssa Rosenzweig	e57ea53	2019-03-09 00:12:07 +0000	[diff] [blame]	954
				955	/* Enables writing depth results back to main memory (rather than keeping them
				956	* on-chip in the tile buffer and then discarding) */
				957
				958	#define MALI_MFBD_DEPTH_WRITE (1 << 10)
				959
Alyssa Rosenzweig	6d9ee3e	2020-02-10 08:51:37 -0500	[diff] [blame]	960	/* The MFBD contains the extra mali_framebuffer_extra section */
Alyssa Rosenzweig	e57ea53	2019-03-09 00:12:07 +0000	[diff] [blame]	961
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	962	#define MALI_MFBD_EXTRA (1 << 13)
				963
Alyssa Rosenzweig	6d9ee3e	2020-02-10 08:51:37 -0500	[diff] [blame]	964	struct mali_framebuffer {
Alyssa Rosenzweig	3f5cd44	2020-02-28 07:17:53 -0500	[diff] [blame]	965	union {
				966	struct mali_shared_memory shared_memory;
				967	struct bifrost_multisampling msaa;
				968	};
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	969
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	970	/* 0x20 */
				971	u16 width1, height1;
				972	u32 zero3;
				973	u16 width2, height2;
				974	u32 unk1 : 19; // = 0x01000
Icecream95	3ec252a	2020-07-14 12:05:47 +1200	[diff] [blame]	975	u32 rt_count_1 : 3; // off-by-one (use MALI_POSITIVE)
				976	u32 unk2 : 2; // = 0
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	977	u32 rt_count_2 : 3; // no off-by-one
				978	u32 zero4 : 5;
				979	/* 0x30 */
				980	u32 clear_stencil : 8;
Alyssa Rosenzweig	ac68946	2019-06-14 11:14:01 -0700	[diff] [blame]	981	u32 mfbd_flags : 24; // = 0x100
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	982	float clear_depth;
Alyssa Rosenzweig	85e745f	2019-06-12 09:33:06 -0700	[diff] [blame]	983
Tomeu Vizoso	46e4246	2020-04-08 15:58:42 +0200	[diff] [blame]	984	union {
				985	struct midgard_tiler_descriptor tiler;
				986	struct {
				987	mali_ptr tiler_meta;
				988	u32 zeros[16];
				989	};
				990	};
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	991
Alyssa Rosenzweig	6d9ee3e	2020-02-10 08:51:37 -0500	[diff] [blame]	992	/* optional: struct mali_framebuffer_extra extra */
				993	/* struct mali_render_target rts[] */
Alyssa Rosenzweig	61d3ae6	2019-01-29 05:46:07 +0000	[diff] [blame]	994	} __attribute__((packed));
				995
				996	#endif /* __PANFROST_JOB_H__ */