blob: 0ee66840c91a0defc7230f0fabc421efae2e3c29 [file] [log] [blame]
/*
* Copyright © 2011 Intel Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include "intel_batchbuffer.h"
#include "intel_mipmap_tree.h"
#include "brw_context.h"
#include "brw_state.h"
#include "brw_blorp.h"
#include "vbo/vbo.h"
#include "brw_draw.h"
static void
gen6_blorp_emit_input_varying_data(struct brw_context *brw,
const struct brw_blorp_params *params,
unsigned *offset,
unsigned *size)
{
const unsigned vec4_size_in_bytes = 4 * sizeof(float);
const unsigned max_num_varyings =
DIV_ROUND_UP(sizeof(params->wm_inputs), vec4_size_in_bytes);
const unsigned num_varyings = params->wm_prog_data->num_varying_inputs;
*size = num_varyings * vec4_size_in_bytes;
const float *const inputs_src = (const float *)&params->wm_inputs;
float *inputs = (float *)brw_state_batch(brw, AUB_TRACE_VERTEX_BUFFER,
*size, 32, offset);
/* Walk over the attribute slots, determine if the attribute is used by
* the program and when necessary copy the values from the input storage to
* the vertex data buffer.
*/
for (unsigned i = 0; i < max_num_varyings; i++) {
const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
if (!(params->wm_prog_data->inputs_read & BITFIELD64_BIT(attr)))
continue;
memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
inputs += 4;
}
}
static void
gen6_blorp_emit_vertex_data(struct brw_context *brw,
const struct brw_blorp_params *params)
{
uint32_t vertex_offset;
uint32_t const_data_offset = 0;
unsigned const_data_size = 0;
/* Setup VBO for the rectangle primitive..
*
* A rectangle primitive (3DPRIM_RECTLIST) consists of only three
* vertices. The vertices reside in screen space with DirectX coordinates
* (that is, (0, 0) is the upper left corner).
*
* v2 ------ implied
* | |
* | |
* v0 ----- v1
*
* Since the VS is disabled, the clipper loads each VUE directly from
* the URB. This is controlled by the 3DSTATE_VERTEX_BUFFERS and
* 3DSTATE_VERTEX_ELEMENTS packets below. The VUE contents are as follows:
* dw0: Reserved, MBZ.
* dw1: Render Target Array Index. The HiZ op does not use indexed
* vertices, so set the dword to 0.
* dw2: Viewport Index. The HiZ op disables viewport mapping and
* scissoring, so set the dword to 0.
* dw3: Point Width: The HiZ op does not emit the POINTLIST primitive, so
* set the dword to 0.
* dw4: Vertex Position X.
* dw5: Vertex Position Y.
* dw6: Vertex Position Z.
* dw7: Vertex Position W.
*
* dw8: Flat vertex input 0
* dw9: Flat vertex input 1
* ...
* dwn: Flat vertex input n - 8
*
* For details, see the Sandybridge PRM, Volume 2, Part 1, Section 1.5.1
* "Vertex URB Entry (VUE) Formats".
*
* Only vertex position X and Y are going to be variable, Z is fixed to
* zero and W to one. Header words dw0-3 are all zero. There is no need to
* include the fixed values in the vertex buffer. Vertex fetcher can be
* instructed to fill vertex elements with constant values of one and zero
* instead of reading them from the buffer.
* Flat inputs are program constants that are not interpolated. Moreover
* their values will be the same between vertices.
*
* See the vertex element setup below.
*/
const float vertices[] = {
/* v0 */ (float)params->x0, (float)params->y1,
/* v1 */ (float)params->x1, (float)params->y1,
/* v2 */ (float)params->x0, (float)params->y0,
};
float *const vertex_data = (float *)brw_state_batch(
brw, AUB_TRACE_VERTEX_BUFFER,
sizeof(vertices), 32,
&vertex_offset);
memcpy(vertex_data, vertices, sizeof(vertices));
if (params->wm_prog_data && params->wm_prog_data->num_varying_inputs)
gen6_blorp_emit_input_varying_data(brw, params,
&const_data_offset,
&const_data_size);
/* 3DSTATE_VERTEX_BUFFERS */
const int num_buffers = 1 + (const_data_size > 0);
const int batch_length = 1 + 4 * num_buffers;
BEGIN_BATCH(batch_length);
OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
const unsigned blorp_num_vue_elems = 2;
const unsigned stride = blorp_num_vue_elems * sizeof(float);
EMIT_VERTEX_BUFFER_STATE(brw, 0 /* buffer_nr */, brw->batch.bo,
vertex_offset, vertex_offset + sizeof(vertices),
stride, 0 /* steprate */);
if (const_data_size) {
/* Tell vertex fetcher not to advance the pointer in the buffer when
* moving to the next vertex. This will effectively provide the same
* data for all the vertices. For flat inputs only the data provided
* for the first provoking vertex actually matters.
*/
const unsigned stride_zero = 0;
EMIT_VERTEX_BUFFER_STATE(brw, 1 /* buffer_nr */, brw->batch.bo,
const_data_offset,
const_data_offset + const_data_size,
stride_zero, 0 /* step_rate */);
}
ADVANCE_BATCH();
}
void
gen6_blorp_emit_vertices(struct brw_context *brw,
const struct brw_blorp_params *params)
{
gen6_blorp_emit_vertex_data(brw, params);
const unsigned num_varyings =
params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
const unsigned num_elements = 2 + num_varyings;
const int batch_length = 1 + 2 * num_elements;
BEGIN_BATCH(batch_length);
/* 3DSTATE_VERTEX_ELEMENTS
*
* Fetch dwords 0 - 7 from each VUE. See the comments above where
* the vertex_bo is filled with data. First element contains dwords
* for the VUE header, second the actual position values and the
* remaining contain the flat inputs.
*/
{
OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (batch_length - 2));
/* Element 0 */
OUT_BATCH(GEN6_VE0_VALID |
BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
0 << BRW_VE0_SRC_OFFSET_SHIFT);
OUT_BATCH(BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT |
BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT |
BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT);
/* Element 1 */
OUT_BATCH(GEN6_VE0_VALID |
BRW_SURFACEFORMAT_R32G32_FLOAT << BRW_VE0_FORMAT_SHIFT |
0 << BRW_VE0_SRC_OFFSET_SHIFT);
OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT |
BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT);
}
for (unsigned i = 0; i < num_varyings; ++i) {
/* Element 2 + i */
OUT_BATCH(1 << GEN6_VE0_INDEX_SHIFT |
GEN6_VE0_VALID |
BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT |
(i * 4 * sizeof(float)) << BRW_VE0_SRC_OFFSET_SHIFT);
OUT_BATCH(BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_2_SHIFT |
BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_3_SHIFT);
}
ADVANCE_BATCH();
}
/* 3DSTATE_URB
*
* Assign the entire URB to the VS. Even though the VS disabled, URB space
* is still needed because the clipper loads the VUE's from the URB. From
* the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE,
* Dword 1.15:0 "VS Number of URB Entries":
* This field is always used (even if VS Function Enable is DISABLED).
*
* The warning below appears in the PRM (Section 3DSTATE_URB), but we can
* safely ignore it because this batch contains only one draw call.
* Because of URB corruption caused by allocating a previous GS unit
* URB entry to the VS unit, software is required to send a “GS NULL
* Fence” (Send URB fence with VS URB size == 1 and GS URB size == 0)
* plus a dummy DRAW call before any case where VS will be taking over
* GS URB space.
*/
static void
gen6_blorp_emit_urb_config(struct brw_context *brw,
const struct brw_blorp_params *params)
{
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_URB << 16 | (3 - 2));
OUT_BATCH(brw->urb.max_vs_entries << GEN6_URB_VS_ENTRIES_SHIFT);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* BLEND_STATE */
uint32_t
gen6_blorp_emit_blend_state(struct brw_context *brw,
const struct brw_blorp_params *params)
{
uint32_t cc_blend_state_offset;
assume(params->num_draw_buffers);
const unsigned size = params->num_draw_buffers *
sizeof(struct gen6_blend_state);
struct gen6_blend_state *blend = (struct gen6_blend_state *)
brw_state_batch(brw, AUB_TRACE_BLEND_STATE, size, 64,
&cc_blend_state_offset);
memset(blend, 0, size);
for (unsigned i = 0; i < params->num_draw_buffers; ++i) {
blend[i].blend1.pre_blend_clamp_enable = 1;
blend[i].blend1.post_blend_clamp_enable = 1;
blend[i].blend1.clamp_range = BRW_RENDERTARGET_CLAMPRANGE_FORMAT;
blend[i].blend1.write_disable_r = params->color_write_disable[0];
blend[i].blend1.write_disable_g = params->color_write_disable[1];
blend[i].blend1.write_disable_b = params->color_write_disable[2];
blend[i].blend1.write_disable_a = params->color_write_disable[3];
}
return cc_blend_state_offset;
}
/* CC_STATE */
uint32_t
gen6_blorp_emit_cc_state(struct brw_context *brw)
{
uint32_t cc_state_offset;
struct gen6_color_calc_state *cc = (struct gen6_color_calc_state *)
brw_state_batch(brw, AUB_TRACE_CC_STATE,
sizeof(gen6_color_calc_state), 64,
&cc_state_offset);
memset(cc, 0, sizeof(*cc));
return cc_state_offset;
}
/**
* \param out_offset is relative to
* CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
*/
uint32_t
gen6_blorp_emit_depth_stencil_state(struct brw_context *brw,
const struct brw_blorp_params *params)
{
uint32_t depthstencil_offset;
struct gen6_depth_stencil_state *state;
state = (struct gen6_depth_stencil_state *)
brw_state_batch(brw, AUB_TRACE_DEPTH_STENCIL_STATE,
sizeof(*state), 64,
&depthstencil_offset);
memset(state, 0, sizeof(*state));
/* See the following sections of the Sandy Bridge PRM, Volume 1, Part2:
* - 7.5.3.1 Depth Buffer Clear
* - 7.5.3.2 Depth Buffer Resolve
* - 7.5.3.3 Hierarchical Depth Buffer Resolve
*/
state->ds2.depth_write_enable = 1;
if (params->hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE) {
state->ds2.depth_test_enable = 1;
state->ds2.depth_test_func = BRW_COMPAREFUNCTION_NEVER;
}
return depthstencil_offset;
}
/* 3DSTATE_CC_STATE_POINTERS
*
* The pointer offsets are relative to
* CMD_STATE_BASE_ADDRESS.DynamicStateBaseAddress.
*
* The HiZ op doesn't use BLEND_STATE or COLOR_CALC_STATE.
*/
static void
gen6_blorp_emit_cc_state_pointers(struct brw_context *brw,
const struct brw_blorp_params *params,
uint32_t cc_blend_state_offset,
uint32_t depthstencil_offset,
uint32_t cc_state_offset)
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CC_STATE_POINTERS << 16 | (4 - 2));
OUT_BATCH(cc_blend_state_offset | 1); /* BLEND_STATE offset */
OUT_BATCH(depthstencil_offset | 1); /* DEPTH_STENCIL_STATE offset */
OUT_BATCH(cc_state_offset | 1); /* COLOR_CALC_STATE offset */
ADVANCE_BATCH();
}
/* BINDING_TABLE. See brw_wm_binding_table(). */
uint32_t
gen6_blorp_emit_binding_table(struct brw_context *brw,
uint32_t wm_surf_offset_renderbuffer,
uint32_t wm_surf_offset_texture)
{
uint32_t wm_bind_bo_offset;
uint32_t *bind = (uint32_t *)
brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
sizeof(uint32_t) *
BRW_BLORP_NUM_BINDING_TABLE_ENTRIES,
32, /* alignment */
&wm_bind_bo_offset);
bind[BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX] =
wm_surf_offset_renderbuffer;
bind[BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX] = wm_surf_offset_texture;
return wm_bind_bo_offset;
}
/**
* SAMPLER_STATE. See brw_update_sampler_state().
*/
uint32_t
gen6_blorp_emit_sampler_state(struct brw_context *brw,
unsigned tex_filter, unsigned max_lod,
bool non_normalized_coords)
{
uint32_t sampler_offset;
uint32_t *sampler_state = (uint32_t *)
brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset);
unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
BRW_ADDRESS_ROUNDING_ENABLE_R_MIN |
BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
/* XXX: I don't think that using firstLevel, lastLevel works,
* because we always setup the surface state as if firstLevel ==
* level zero. Probably have to subtract firstLevel from each of
* these:
*/
brw_emit_sampler_state(brw,
sampler_state,
sampler_offset,
tex_filter, /* min filter */
tex_filter, /* mag filter */
BRW_MIPFILTER_NONE,
BRW_ANISORATIO_2,
address_rounding,
BRW_TEXCOORDMODE_CLAMP,
BRW_TEXCOORDMODE_CLAMP,
BRW_TEXCOORDMODE_CLAMP,
0, /* min LOD */
max_lod,
0, /* LOD bias */
0, /* shadow function */
non_normalized_coords,
0); /* border color offset - unused */
return sampler_offset;
}
/**
* 3DSTATE_SAMPLER_STATE_POINTERS. See upload_sampler_state_pointers().
*/
static void
gen6_blorp_emit_sampler_state_pointers(struct brw_context *brw,
uint32_t sampler_offset)
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS << 16 |
VS_SAMPLER_STATE_CHANGE |
GS_SAMPLER_STATE_CHANGE |
PS_SAMPLER_STATE_CHANGE |
(4 - 2));
OUT_BATCH(0); /* VS */
OUT_BATCH(0); /* GS */
OUT_BATCH(sampler_offset);
ADVANCE_BATCH();
}
/* 3DSTATE_VS
*
* Disable vertex shader.
*/
void
gen6_blorp_emit_vs_disable(struct brw_context *brw,
const struct brw_blorp_params *params)
{
/* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
* 3DSTATE_VS, Dword 5.0 "VS Function Enable":
*
* [DevSNB] A pipeline flush must be programmed prior to a
* 3DSTATE_VS command that causes the VS Function Enable to
* toggle. Pipeline flush can be executed by sending a PIPE_CONTROL
* command with CS stall bit set and a post sync operation.
*
* We've already done one at the start of the BLORP operation.
*/
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_VS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(6);
OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_GS
*
* Disable the geometry shader.
*/
void
gen6_blorp_emit_gs_disable(struct brw_context *brw,
const struct brw_blorp_params *params)
{
/* Disable all the constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_GS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
brw->gs.enabled = false;
}
/* 3DSTATE_CLIP
*
* Disable the clipper.
*
* The BLORP op emits a rectangle primitive, which requires clipping to
* be disabled. From page 10 of the Sandy Bridge PRM Volume 2 Part 1
* Section 1.3 "3D Primitives Overview":
* RECTLIST:
* Either the CLIP unit should be DISABLED, or the CLIP unit's Clip
* Mode should be set to a value other than CLIPMODE_NORMAL.
*
* Also disable perspective divide. This doesn't change the clipper's
* output, but does spare a few electrons.
*/
void
gen6_blorp_emit_clip_disable(struct brw_context *brw)
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(GEN6_CLIP_PERSPECTIVE_DIVIDE_DISABLE);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_SF
*
* Disable ViewportTransformEnable (dw2.1)
*
* From the SandyBridge PRM, Volume 2, Part 1, Section 1.3, "3D
* Primitives Overview":
* RECTLIST: Viewport Mapping must be DISABLED (as is typical with the
* use of screen- space coordinates).
*
* A solid rectangle must be rendered, so set FrontFaceFillMode (dw2.4:3)
* and BackFaceFillMode (dw2.5:6) to SOLID(0).
*
* From the Sandy Bridge PRM, Volume 2, Part 1, Section
* 6.4.1.1 3DSTATE_SF, Field FrontFaceFillMode:
* SOLID: Any triangle or rectangle object found to be front-facing
* is rendered as a solid object. This setting is required when
* (rendering rectangle (RECTLIST) objects.
*/
static void
gen6_blorp_emit_sf_config(struct brw_context *brw,
const struct brw_blorp_params *params)
{
const unsigned num_varyings =
params->wm_prog_data ? params->wm_prog_data->num_varying_inputs : 0;
const unsigned urb_read_length =
brw_blorp_get_urb_length(params->wm_prog_data);
BEGIN_BATCH(20);
OUT_BATCH(_3DSTATE_SF << 16 | (20 - 2));
OUT_BATCH(num_varyings << GEN6_SF_NUM_OUTPUTS_SHIFT |
urb_read_length << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
BRW_SF_URB_ENTRY_READ_OFFSET <<
GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0); /* dw2 */
OUT_BATCH(params->dst.surf.samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
for (int i = 0; i < 13; ++i)
OUT_BATCH(0);
OUT_BATCH(params->wm_prog_data ? params->wm_prog_data->flat_inputs : 0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/**
* Enable or disable thread dispatch and set the HiZ op appropriately.
*/
static void
gen6_blorp_emit_wm_config(struct brw_context *brw,
const struct brw_blorp_params *params)
{
const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
uint32_t dw2, dw4, dw5, dw6, ksp0, ksp2;
/* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
* nonzero to prevent the GPU from hanging. While the documentation doesn't
* mention this explicitly, it notes that the valid range for the field is
* [1,39] = [2,40] threads, which excludes zero.
*
* To be safe (and to minimize extraneous code) we go ahead and fully
* configure the WM state whether or not there is a WM program.
*/
dw2 = dw4 = dw5 = dw6 = ksp0 = ksp2 = 0;
switch (params->hiz_op) {
case GEN6_HIZ_OP_DEPTH_CLEAR:
dw4 |= GEN6_WM_DEPTH_CLEAR;
break;
case GEN6_HIZ_OP_DEPTH_RESOLVE:
dw4 |= GEN6_WM_DEPTH_RESOLVE;
break;
case GEN6_HIZ_OP_HIZ_RESOLVE:
dw4 |= GEN6_WM_HIERARCHICAL_DEPTH_RESOLVE;
break;
case GEN6_HIZ_OP_NONE:
break;
default:
unreachable("not reached");
}
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT;
dw6 |= 0 << GEN6_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */
dw6 |= (params->wm_prog_data ? prog_data->num_varying_inputs : 0) <<
GEN6_WM_NUM_SF_OUTPUTS_SHIFT;
if (params->wm_prog_data) {
dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
dw4 |= prog_data->first_curbe_grf_0 << GEN6_WM_DISPATCH_START_GRF_SHIFT_0;
dw4 |= prog_data->first_curbe_grf_2 << GEN6_WM_DISPATCH_START_GRF_SHIFT_2;
ksp0 = params->wm_prog_kernel;
ksp2 = params->wm_prog_kernel + params->wm_prog_data->ksp_offset_2;
if (params->wm_prog_data->dispatch_8)
dw5 |= GEN6_WM_8_DISPATCH_ENABLE;
if (params->wm_prog_data->dispatch_16)
dw5 |= GEN6_WM_16_DISPATCH_ENABLE;
}
if (params->src.mt) {
dw5 |= GEN6_WM_KILL_ENABLE; /* TODO: temporarily smash on */
dw2 |= 1 << GEN6_WM_SAMPLER_COUNT_SHIFT; /* Up to 4 samplers */
}
if (params->dst.surf.samples > 1) {
dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
if (prog_data && prog_data->persample_msaa_dispatch)
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
else
dw6 |= GEN6_WM_MSDISPMODE_PERPIXEL;
} else {
dw6 |= GEN6_WM_MSRAST_OFF_PIXEL;
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
}
BEGIN_BATCH(9);
OUT_BATCH(_3DSTATE_WM << 16 | (9 - 2));
OUT_BATCH(ksp0);
OUT_BATCH(dw2);
OUT_BATCH(0); /* No scratch needed */
OUT_BATCH(dw4);
OUT_BATCH(dw5);
OUT_BATCH(dw6);
OUT_BATCH(0); /* kernel 1 pointer */
OUT_BATCH(ksp2);
ADVANCE_BATCH();
}
static void
gen6_blorp_emit_constant_ps_disable(struct brw_context *brw,
const struct brw_blorp_params *params)
{
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(_3DSTATE_CONSTANT_PS << 16 | (5 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/**
* 3DSTATE_BINDING_TABLE_POINTERS
*/
static void
gen6_blorp_emit_binding_table_pointers(struct brw_context *brw,
uint32_t wm_bind_bo_offset)
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 |
GEN6_BINDING_TABLE_MODIFY_PS |
(4 - 2));
OUT_BATCH(0); /* vs -- ignored */
OUT_BATCH(0); /* gs -- ignored */
OUT_BATCH(wm_bind_bo_offset); /* wm/ps */
ADVANCE_BATCH();
}
static void
gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
const struct brw_blorp_params *params)
{
uint32_t surfwidth, surfheight;
uint32_t surftype;
unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
GLenum gl_target = params->depth.mt->target;
unsigned int lod;
switch (gl_target) {
case GL_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_CUBE_MAP:
/* The PRM claims that we should use BRW_SURFACE_CUBE for this
* situation, but experiments show that gl_Layer doesn't work when we do
* this. So we use BRW_SURFACE_2D, since for rendering purposes this is
* equivalent.
*/
surftype = BRW_SURFACE_2D;
depth *= 6;
break;
default:
surftype = translate_tex_target(gl_target);
break;
}
const unsigned min_array_element = params->depth.layer;
lod = params->depth.level - params->depth.mt->first_level;
if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) {
/* HIZ ops for lod 0 may set the width & height a little
* larger to allow the fast depth clear to fit the hardware
* alignment requirements. (8x4)
*/
surfwidth = params->depth.width;
surfheight = params->depth.height;
} else {
surfwidth = params->depth.mt->logical_width0;
surfheight = params->depth.mt->logical_height0;
}
/* 3DSTATE_DEPTH_BUFFER */
{
brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
/* 3DSTATE_DEPTH_BUFFER dw0 */
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
/* 3DSTATE_DEPTH_BUFFER dw1 */
OUT_BATCH((params->depth.mt->pitch - 1) |
params->depth_format << 18 |
1 << 21 | /* separate stencil enable */
1 << 22 | /* hiz enable */
BRW_TILEWALK_YMAJOR << 26 |
1 << 27 | /* y-tiled */
surftype << 29);
/* 3DSTATE_DEPTH_BUFFER dw2 */
OUT_RELOC(params->depth.mt->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0);
/* 3DSTATE_DEPTH_BUFFER dw3 */
OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
(surfwidth - 1) << 6 |
(surfheight - 1) << 19 |
lod << 2);
/* 3DSTATE_DEPTH_BUFFER dw4 */
OUT_BATCH((depth - 1) << 21 |
min_array_element << 10 |
(depth - 1) << 1);
/* 3DSTATE_DEPTH_BUFFER dw5 */
OUT_BATCH(0);
/* 3DSTATE_DEPTH_BUFFER dw6 */
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
struct intel_mipmap_tree *hiz_mt = params->depth.mt->hiz_buf->mt;
uint32_t offset = 0;
if (hiz_mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
offset = intel_miptree_get_aligned_offset(hiz_mt,
hiz_mt->level[lod].level_x,
hiz_mt->level[lod].level_y,
false);
}
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
OUT_BATCH(hiz_mt->pitch - 1);
OUT_RELOC(hiz_mt->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
ADVANCE_BATCH();
}
/* 3DSTATE_STENCIL_BUFFER */
{
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
}
static void
gen6_blorp_emit_depth_disable(struct brw_context *brw,
const struct brw_blorp_params *params)
{
brw_emit_depth_stall_flushes(brw);
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
(BRW_SURFACE_NULL << 29));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
BEGIN_BATCH(3);
OUT_BATCH(_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_CLEAR_PARAMS
*
* From the Sandybridge PRM, Volume 2, Part 1, Section 3DSTATE_CLEAR_PARAMS:
* [DevSNB] 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE
* packet when HiZ is enabled and the DEPTH_BUFFER_STATE changes.
*/
static void
gen6_blorp_emit_clear_params(struct brw_context *brw,
const struct brw_blorp_params *params)
{
BEGIN_BATCH(2);
OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 |
GEN5_DEPTH_CLEAR_VALID |
(2 - 2));
OUT_BATCH(params->depth.mt ? params->depth.mt->depth_clear_value : 0);
ADVANCE_BATCH();
}
/* 3DSTATE_DRAWING_RECTANGLE */
void
gen6_blorp_emit_drawing_rectangle(struct brw_context *brw,
const struct brw_blorp_params *params)
{
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(((MAX2(params->x1, params->x0) - 1) & 0xffff) |
((MAX2(params->y1, params->y0) - 1) << 16));
OUT_BATCH(0);
ADVANCE_BATCH();
}
/* 3DSTATE_VIEWPORT_STATE_POINTERS */
static void
gen6_blorp_emit_viewport_state(struct brw_context *brw,
const struct brw_blorp_params *params)
{
struct brw_cc_viewport *ccv;
uint32_t cc_vp_offset;
ccv = (struct brw_cc_viewport *)brw_state_batch(brw, AUB_TRACE_CC_VP_STATE,
sizeof(*ccv), 32,
&cc_vp_offset);
ccv->min_depth = 0.0;
ccv->max_depth = 1.0;
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_VIEWPORT_STATE_POINTERS << 16 | (4 - 2) |
GEN6_CC_VIEWPORT_MODIFY);
OUT_BATCH(0); /* clip VP */
OUT_BATCH(0); /* SF VP */
OUT_BATCH(cc_vp_offset);
ADVANCE_BATCH();
}
/* 3DPRIMITIVE */
static void
gen6_blorp_emit_primitive(struct brw_context *brw,
const struct brw_blorp_params *params)
{
BEGIN_BATCH(6);
OUT_BATCH(CMD_3D_PRIM << 16 | (6 - 2) |
_3DPRIM_RECTLIST << GEN4_3DPRIM_TOPOLOGY_TYPE_SHIFT |
GEN4_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
OUT_BATCH(3); /* vertex count per instance */
OUT_BATCH(0);
OUT_BATCH(params->num_layers); /* instance count */
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
}
/**
* \brief Execute a blit or render pass operation.
*
* To execute the operation, this function manually constructs and emits a
* batch to draw a rectangle primitive. The batchbuffer is flushed before
* constructing and after emitting the batch.
*
* This function alters no GL state.
*/
void
gen6_blorp_exec(struct brw_context *brw,
const struct brw_blorp_params *params)
{
uint32_t cc_blend_state_offset = 0;
uint32_t cc_state_offset = 0;
uint32_t depthstencil_offset;
uint32_t wm_bind_bo_offset = 0;
/* Emit workaround flushes when we switch from drawing to blorping. */
brw_emit_post_sync_nonzero_flush(brw);
brw_upload_state_base_address(brw);
gen6_emit_3dstate_multisample(brw, params->dst.surf.samples);
gen6_emit_3dstate_sample_mask(brw,
params->dst.surf.samples > 1 ?
(1 << params->dst.surf.samples) - 1 : 1);
gen6_blorp_emit_vertices(brw, params);
gen6_blorp_emit_urb_config(brw, params);
if (params->wm_prog_data) {
cc_blend_state_offset = gen6_blorp_emit_blend_state(brw, params);
cc_state_offset = gen6_blorp_emit_cc_state(brw);
}
depthstencil_offset = gen6_blorp_emit_depth_stencil_state(brw, params);
gen6_blorp_emit_cc_state_pointers(brw, params, cc_blend_state_offset,
depthstencil_offset, cc_state_offset);
if (params->wm_prog_data) {
uint32_t wm_surf_offset_renderbuffer;
uint32_t wm_surf_offset_texture = 0;
intel_miptree_used_for_rendering(params->dst.mt);
wm_surf_offset_renderbuffer =
brw_blorp_emit_surface_state(brw, &params->dst,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER, true);
if (params->src.mt) {
wm_surf_offset_texture =
brw_blorp_emit_surface_state(brw, &params->src,
I915_GEM_DOMAIN_SAMPLER, 0, false);
}
wm_bind_bo_offset =
gen6_blorp_emit_binding_table(brw,
wm_surf_offset_renderbuffer,
wm_surf_offset_texture);
}
if (params->src.mt) {
const uint32_t sampler_offset =
gen6_blorp_emit_sampler_state(brw, BRW_MAPFILTER_LINEAR, 0, true);
gen6_blorp_emit_sampler_state_pointers(brw, sampler_offset);
}
gen6_blorp_emit_vs_disable(brw, params);
gen6_blorp_emit_gs_disable(brw, params);
gen6_blorp_emit_clip_disable(brw);
gen6_blorp_emit_sf_config(brw, params);
gen6_blorp_emit_constant_ps_disable(brw, params);
gen6_blorp_emit_wm_config(brw, params);
if (params->wm_prog_data)
gen6_blorp_emit_binding_table_pointers(brw, wm_bind_bo_offset);
gen6_blorp_emit_viewport_state(brw, params);
if (params->depth.mt)
gen6_blorp_emit_depth_stencil_config(brw, params);
else
gen6_blorp_emit_depth_disable(brw, params);
gen6_blorp_emit_clear_params(brw, params);
gen6_blorp_emit_drawing_rectangle(brw, params);
gen6_blorp_emit_primitive(brw, params);
}