intel: fill in dynamic hardware states
diff --git a/icd/intel/state.c b/icd/intel/state.c
index 5924388..3f66c56 100644
--- a/icd/intel/state.c
+++ b/icd/intel/state.c
@@ -22,9 +22,444 @@
* DEALINGS IN THE SOFTWARE.
*/
+#include <math.h>
+#include "genhw/genhw.h"
#include "dev.h"
#include "state.h"
+static int translate_compare_func(XGL_COMPARE_FUNC func)
+{
+ switch (func) {
+ case XGL_COMPARE_NEVER: return GEN6_COMPAREFUNCTION_NEVER;
+ case XGL_COMPARE_LESS: return GEN6_COMPAREFUNCTION_LESS;
+ case XGL_COMPARE_EQUAL: return GEN6_COMPAREFUNCTION_EQUAL;
+ case XGL_COMPARE_LESS_EQUAL: return GEN6_COMPAREFUNCTION_LEQUAL;
+ case XGL_COMPARE_GREATER: return GEN6_COMPAREFUNCTION_GREATER;
+ case XGL_COMPARE_NOT_EQUAL: return GEN6_COMPAREFUNCTION_NOTEQUAL;
+ case XGL_COMPARE_GREATER_EQUAL: return GEN6_COMPAREFUNCTION_GEQUAL;
+ case XGL_COMPARE_ALWAYS: return GEN6_COMPAREFUNCTION_ALWAYS;
+ default:
+ assert(!"unknown compare_func");
+ return GEN6_COMPAREFUNCTION_NEVER;
+ }
+}
+
+static int translate_stencil_op(XGL_STENCIL_OP op)
+{
+ switch (op) {
+ case XGL_STENCIL_OP_KEEP: return GEN6_STENCILOP_KEEP;
+ case XGL_STENCIL_OP_ZERO: return GEN6_STENCILOP_ZERO;
+ case XGL_STENCIL_OP_REPLACE: return GEN6_STENCILOP_REPLACE;
+ case XGL_STENCIL_OP_INC_CLAMP: return GEN6_STENCILOP_INCRSAT;
+ case XGL_STENCIL_OP_DEC_CLAMP: return GEN6_STENCILOP_DECRSAT;
+ case XGL_STENCIL_OP_INVERT: return GEN6_STENCILOP_INVERT;
+ case XGL_STENCIL_OP_INC_WRAP: return GEN6_STENCILOP_INCR;
+ case XGL_STENCIL_OP_DEC_WRAP: return GEN6_STENCILOP_DECR;
+ default:
+ assert(!"unknown stencil op");
+ return GEN6_STENCILOP_KEEP;
+ }
+}
+
+static int translate_blend_func(XGL_BLEND_FUNC func)
+{
+ switch (func) {
+ case XGL_BLEND_FUNC_ADD: return GEN6_BLENDFUNCTION_ADD;
+ case XGL_BLEND_FUNC_SUBTRACT: return GEN6_BLENDFUNCTION_SUBTRACT;
+ case XGL_BLEND_FUNC_REVERSE_SUBTRACT: return GEN6_BLENDFUNCTION_REVERSE_SUBTRACT;
+ case XGL_BLEND_FUNC_MIN: return GEN6_BLENDFUNCTION_MIN;
+ case XGL_BLEND_FUNC_MAX: return GEN6_BLENDFUNCTION_MAX;
+ default:
+ assert(!"unknown blend func");
+ return GEN6_BLENDFUNCTION_ADD;
+ };
+}
+
+static int translate_blend(XGL_BLEND blend)
+{
+ switch (blend) {
+ case XGL_BLEND_ZERO: return GEN6_BLENDFACTOR_ZERO;
+ case XGL_BLEND_ONE: return GEN6_BLENDFACTOR_ONE;
+ case XGL_BLEND_SRC_COLOR: return GEN6_BLENDFACTOR_SRC_COLOR;
+ case XGL_BLEND_ONE_MINUS_SRC_COLOR: return GEN6_BLENDFACTOR_INV_SRC_COLOR;
+ case XGL_BLEND_DEST_COLOR: return GEN6_BLENDFACTOR_DST_COLOR;
+ case XGL_BLEND_ONE_MINUS_DEST_COLOR: return GEN6_BLENDFACTOR_INV_DST_COLOR;
+ case XGL_BLEND_SRC_ALPHA: return GEN6_BLENDFACTOR_SRC_ALPHA;
+ case XGL_BLEND_ONE_MINUS_SRC_ALPHA: return GEN6_BLENDFACTOR_INV_SRC_ALPHA;
+ case XGL_BLEND_DEST_ALPHA: return GEN6_BLENDFACTOR_DST_ALPHA;
+ case XGL_BLEND_ONE_MINUS_DEST_ALPHA: return GEN6_BLENDFACTOR_INV_DST_ALPHA;
+ case XGL_BLEND_CONSTANT_COLOR: return GEN6_BLENDFACTOR_CONST_COLOR;
+ case XGL_BLEND_ONE_MINUS_CONSTANT_COLOR: return GEN6_BLENDFACTOR_INV_CONST_COLOR;
+ case XGL_BLEND_CONSTANT_ALPHA: return GEN6_BLENDFACTOR_CONST_ALPHA;
+ case XGL_BLEND_ONE_MINUS_CONSTANT_ALPHA: return GEN6_BLENDFACTOR_INV_CONST_ALPHA;
+ case XGL_BLEND_SRC_ALPHA_SATURATE: return GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE;
+ case XGL_BLEND_SRC1_COLOR: return GEN6_BLENDFACTOR_SRC1_COLOR;
+ case XGL_BLEND_ONE_MINUS_SRC1_COLOR: return GEN6_BLENDFACTOR_INV_SRC1_COLOR;
+ case XGL_BLEND_SRC1_ALPHA: return GEN6_BLENDFACTOR_SRC1_ALPHA;
+ case XGL_BLEND_ONE_MINUS_SRC1_ALPHA: return GEN6_BLENDFACTOR_INV_SRC1_ALPHA;
+ default:
+ assert(!"unknown blend factor");
+ return GEN6_BLENDFACTOR_ONE;
+ };
+}
+
+static void
+raster_state_init(struct intel_raster_state *state,
+ const struct intel_gpu *gpu,
+ const XGL_RASTER_STATE_CREATE_INFO *info)
+{
+ switch (info->fillMode) {
+ case XFL_FILL_POINTS:
+ state->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_POINT |
+ GEN7_SF_DW1_BACKFACE_POINT;
+ break;
+ case XGL_FILL_WIREFRAME:
+ state->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_WIREFRAME |
+ GEN7_SF_DW1_BACKFACE_WIREFRAME;
+ break;
+ case XGL_FILL_SOLID:
+ default:
+ state->cmd_sf_fill |= GEN7_SF_DW1_FRONTFACE_SOLID |
+ GEN7_SF_DW1_BACKFACE_SOLID;
+ break;
+ }
+
+ if (info->frontFace == XGL_FRONT_FACE_CCW) {
+ state->cmd_sf_fill |= GEN7_SF_DW1_FRONTWINDING_CCW;
+ state->cmd_clip_cull |= GEN7_CLIP_DW1_FRONTWINDING_CCW;
+ }
+
+ switch (info->cullMode) {
+ case XGL_CULL_NONE:
+ default:
+ state->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_NONE;
+ state->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_NONE;
+ break;
+ case XGL_CULL_FRONT:
+ state->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_FRONT;
+ state->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_FRONT;
+ break;
+ case XGL_CULL_BACK:
+ state->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_BACK;
+ state->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_BACK;
+ break;
+ case XGL_CULL_FRONT_AND_BACK:
+ state->cmd_sf_cull |= GEN7_SF_DW2_CULLMODE_BOTH;
+ state->cmd_clip_cull |= GEN7_CLIP_DW1_CULLMODE_BOTH;
+ break;
+ }
+
+ /* only GEN7+ needs cull mode in 3DSTATE_CLIP */
+ if (intel_gpu_gen(gpu) == INTEL_GEN(6))
+ state->cmd_clip_cull = 0;
+
+ /* XXX scale info->depthBias back into NDC */
+ state->cmd_depth_offset_const = u_fui((float) info->depthBias * 2.0f);
+ state->cmd_depth_offset_clamp = u_fui(info->depthBiasClamp);
+ state->cmd_depth_offset_scale = u_fui(info->slopeScaledDepthBias);
+}
+
+static void
+viewport_get_guardband(const struct intel_gpu *gpu,
+ int center_x, int center_y,
+ int *min_gbx, int *max_gbx,
+ int *min_gby, int *max_gby)
+{
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 234:
+ *
+ * "Per-Device Guardband Extents
+ *
+ * - Supported X,Y ScreenSpace "Guardband" Extent: [-16K,16K-1]
+ * - Maximum Post-Clamp Delta (X or Y): 16K"
+ *
+ * "In addition, in order to be correctly rendered, objects must have a
+ * screenspace bounding box not exceeding 8K in the X or Y direction.
+ * This additional restriction must also be comprehended by software,
+ * i.e., enforced by use of clipping."
+ *
+ * From the Ivy Bridge PRM, volume 2 part 1, page 248:
+ *
+ * "Per-Device Guardband Extents
+ *
+ * - Supported X,Y ScreenSpace "Guardband" Extent: [-32K,32K-1]
+ * - Maximum Post-Clamp Delta (X or Y): N/A"
+ *
+ * "In addition, in order to be correctly rendered, objects must have a
+ * screenspace bounding box not exceeding 8K in the X or Y direction.
+ * This additional restriction must also be comprehended by software,
+ * i.e., enforced by use of clipping."
+ *
+ * Combined, the bounding box of any object can not exceed 8K in both
+ * width and height.
+ *
+ * Below we set the guardband as a squre of length 8K, centered at where
+ * the viewport is. This makes sure all objects passing the GB test are
+ * valid to the renderer, and those failing the XY clipping have a
+ * better chance of passing the GB test.
+ */
+ const int max_extent = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 32768 : 16384;
+ const int half_len = 8192 / 2;
+
+ /* make sure the guardband is within the valid range */
+ if (center_x - half_len < -max_extent)
+ center_x = -max_extent + half_len;
+ else if (center_x + half_len > max_extent - 1)
+ center_x = max_extent - half_len;
+
+ if (center_y - half_len < -max_extent)
+ center_y = -max_extent + half_len;
+ else if (center_y + half_len > max_extent - 1)
+ center_y = max_extent - half_len;
+
+ *min_gbx = (float) (center_x - half_len);
+ *max_gbx = (float) (center_x + half_len);
+ *min_gby = (float) (center_y - half_len);
+ *max_gby = (float) (center_y + half_len);
+}
+
+static XGL_RESULT
+viewport_state_init(struct intel_viewport_state *state,
+ const struct intel_gpu *gpu,
+ const XGL_VIEWPORT_STATE_CREATE_INFO *info)
+{
+ const XGL_UINT sf_stride = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 16 : 8;
+ const XGL_UINT clip_stride = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 16 : 4;
+ uint32_t *sf_viewport, *clip_viewport, *cc_viewport, *scissor_rect;
+ XGL_UINT i;
+
+ INTEL_GPU_ASSERT(gpu, 6, 7.5);
+
+ state->scissor_enable = info->scissorEnable;
+
+ if (intel_gpu_gen(gpu) >= INTEL_GEN(7))
+ state->size = (16 + 2 + 2) * info->viewportCount;
+ else
+ state->size = (8 + 4 + 2 + 2) * info->viewportCount;
+
+ state->cmd = icd_alloc(sizeof(uint32_t) * state->size,
+ 0, XGL_SYSTEM_ALLOC_INTERNAL);
+ if (!state->cmd)
+ return XGL_ERROR_OUT_OF_MEMORY;
+
+ sf_viewport = state->cmd;
+ clip_viewport = sf_viewport + 8;
+ cc_viewport = sf_viewport +
+ ((intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 16 : 12);
+ scissor_rect = cc_viewport + 2;
+
+ for (i = 0; i < info->viewportCount; i++) {
+ const XGL_VIEWPORT *viewport = &info->viewports[i];
+ const XGL_RECT *scissor = &info->scissors[i];
+ uint32_t *dw = NULL;
+ float translate[3], scale[3];
+ int min_gbx, max_gbx, min_gby, max_gby;
+
+ scale[0] = viewport->width / 2.0f;
+ scale[1] = viewport->height / 2.0f;
+ scale[2] = (viewport->maxDepth - viewport->minDepth) / 2.0;
+ translate[0] = viewport->originX + scale[0];
+ translate[1] = viewport->originY + scale[1];
+ translate[2] = (viewport->minDepth + viewport->maxDepth) / 2.0f;
+
+ viewport_get_guardband(gpu, (int) translate[0], (int) translate[1],
+ &min_gbx, &max_gbx, &min_gby, &max_gby);
+
+ /* SF_VIEWPORT */
+ dw = sf_viewport;
+ dw[0] = u_fui(scale[0]);
+ dw[1] = u_fui(scale[1]);
+ dw[2] = u_fui(scale[2]);
+ dw[3] = u_fui(translate[0]);
+ dw[4] = u_fui(translate[1]);
+ dw[5] = u_fui(translate[2]);
+ dw[6] = 0;
+ dw[7] = 0;
+ sf_viewport += sf_stride;
+
+ /* CLIP_VIEWPORT */
+ dw = clip_viewport;
+ dw[0] = ((float) min_gbx - translate[0]) / fabsf(scale[0]);
+ dw[1] = ((float) max_gbx - translate[0]) / fabsf(scale[0]);
+ dw[2] = ((float) min_gby - translate[1]) / fabsf(scale[1]);
+ dw[3] = ((float) max_gby - translate[1]) / fabsf(scale[1]);
+ clip_viewport += clip_stride;
+
+ /* CC_VIEWPORT */
+ dw = cc_viewport;
+ dw[0] = u_fui(viewport->minDepth);
+ dw[1] = u_fui(viewport->maxDepth);
+ cc_viewport += 2;
+
+ /* SCISSOR_RECT */
+ dw = scissor_rect;
+ if (scissor->extent.width && scissor->extent.height) {
+ dw[0] = (scissor->offset.y & 0xffff) << 16 |
+ (scissor->offset.x & 0xffff);
+ dw[1] =
+ ((scissor->offset.y + scissor->extent.height - 1) & 0xffff) << 16 |
+ ((scissor->offset.x + scissor->extent.width - 1) & 0xffff);
+ } else {
+ dw[0] = 1 << 16 | 1;
+ dw[1] = 0;
+ }
+ scissor_rect += 2;
+ }
+
+ return XGL_SUCCESS;
+}
+
+static void
+msaa_state_init(struct intel_msaa_state *state,
+ const struct intel_gpu *gpu,
+ const XGL_MSAA_STATE_CREATE_INFO *info)
+{
+ /* taken from Mesa */
+ static const uint32_t brw_multisample_positions_1x_2x = 0x0088cc44;
+ static const uint32_t brw_multisample_positions_4x = 0xae2ae662;
+ static const uint32_t brw_multisample_positions_8x[] = { 0xdbb39d79, 0x3ff55117 };
+ uint32_t cmd, cmd_len;
+ uint32_t *dw = state->cmd;
+
+ INTEL_GPU_ASSERT(gpu, 6, 7.5);
+ STATIC_ASSERT(ARRAY_SIZE(state->cmd) >= 6);
+
+ /* 3DSTATE_MULTISAMPLE */
+ cmd = GEN_RENDER_CMD(3D, 3DSTATE_MULTISAMPLE);
+ cmd_len = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 4 : 3;
+
+ dw[0] = cmd | (cmd_len - 2);
+ if (info->samples <= 1) {
+ dw[1] = GEN6_MULTISAMPLE_DW1_NUMSAMPLES_1;
+ dw[2] = brw_multisample_positions_1x_2x;
+ } else if (info->samples <= 4 || intel_gpu_gen(gpu) == INTEL_GEN(6)) {
+ dw[1] = GEN6_MULTISAMPLE_DW1_NUMSAMPLES_4;
+ dw[2] = brw_multisample_positions_4x;
+ } else {
+ dw[1] = GEN7_MULTISAMPLE_DW1_NUMSAMPLES_8;
+ dw[2] = brw_multisample_positions_8x[0];
+ dw[3] = brw_multisample_positions_8x[1];
+ }
+
+ dw += cmd_len;
+
+ /* 3DSTATE_SAMPLE_MASK */
+ cmd = GEN_RENDER_CMD(3D, 3DSTATE_SAMPLE_MASK);
+ cmd_len = 2;
+
+ dw[0] = cmd | (cmd_len - 2);
+ dw[1] = info->sampleMask & ((1 << info->samples) - 1);
+}
+
+static void
+blend_state_init(struct intel_blend_state *state,
+ const struct intel_gpu *gpu,
+ const XGL_COLOR_BLEND_STATE_CREATE_INFO *info)
+{
+ XGL_UINT i;
+
+ INTEL_GPU_ASSERT(gpu, 6, 7.5);
+
+ for (i = 0; i < ARRAY_SIZE(info->attachment); i++) {
+ const XGL_COLOR_ATTACHMENT_BLEND_STATE *att = &info->attachment[i];
+ uint32_t *dw = &state->cmd[2 * i];
+
+ if (att->blendEnable) {
+ dw[0] = 1 << 31 |
+ translate_blend_func(att->blendFuncAlpha) << 26 |
+ translate_blend(att->srcBlendAlpha) << 20 |
+ translate_blend(att->destBlendAlpha) << 15 |
+ translate_blend_func(att->blendFuncColor) << 11 |
+ translate_blend(att->srcBlendColor) << 5 |
+ translate_blend(att->destBlendColor);
+
+ if (att->blendFuncAlpha != att->blendFuncColor ||
+ att->srcBlendAlpha != att->srcBlendColor ||
+ att->destBlendAlpha != att->destBlendColor)
+ dw[0] |= 1 << 30;
+ }
+
+ dw[1] = GEN6_BLEND_DW1_COLORCLAMP_RTFORMAT |
+ 0x3;
+ }
+
+ memcpy(state->cmd_blend_color, info->blendConst, sizeof(info->blendConst));
+}
+
+static XGL_RESULT
+ds_state_init(struct intel_ds_state *state,
+ const struct intel_gpu *gpu,
+ const XGL_DEPTH_STENCIL_STATE_CREATE_INFO *info)
+{
+ uint32_t *dw = state->cmd;
+
+ INTEL_GPU_ASSERT(gpu, 6, 7.5);
+
+ STATIC_ASSERT(ARRAY_SIZE(state->cmd) >= 3);
+
+ if (info->depthBoundsEnable)
+ return XGL_ERROR_UNKNOWN;
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 359:
+ *
+ * "If the Depth Buffer is either undefined or does not have a surface
+ * format of D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT and separate
+ * stencil buffer is disabled, Stencil Test Enable must be DISABLED"
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 370:
+ *
+ * "This field (Stencil Test Enable) cannot be enabled if
+ * Surface Format in 3DSTATE_DEPTH_BUFFER is set to D16_UNORM."
+ *
+ * TODO We do not check these yet.
+ */
+ if (info->stencilTestEnable) {
+ dw[0] = 1 << 31 |
+ translate_compare_func(info->front.stencilFunc) << 28 |
+ translate_stencil_op(info->front.stencilFailOp) << 25 |
+ translate_stencil_op(info->front.stencilDepthFailOp) << 22 |
+ translate_stencil_op(info->front.stencilPassOp) << 19 |
+ 1 << 15 |
+ translate_compare_func(info->back.stencilFunc) << 12 |
+ translate_stencil_op(info->back.stencilFailOp) << 9 |
+ translate_stencil_op(info->back.stencilDepthFailOp) << 6 |
+ translate_stencil_op(info->back.stencilPassOp) << 3;
+
+ if (info->stencilWriteMask)
+ dw[0] |= 1 << 18;
+
+ dw[1] = (info->stencilReadMask & 0xff) << 24 |
+ (info->stencilWriteMask & 0xff) << 16;
+
+ state->cmd_stencil_ref = (info->front.stencilRef & 0xff) << 24 |
+ (info->back.stencilRef & 0xff) << 16;
+ }
+
+ /*
+ * From the Sandy Bridge PRM, volume 2 part 1, page 360:
+ *
+ * "Enabling the Depth Test function without defining a Depth Buffer is
+ * UNDEFINED."
+ *
+ * From the Sandy Bridge PRM, volume 2 part 1, page 375:
+ *
+ * "A Depth Buffer must be defined before enabling writes to it, or
+ * operation is UNDEFINED."
+ *
+ * TODO We do not check these yet.
+ */
+ if (info->depthTestEnable) {
+ dw[2] = 1 << 31 |
+ translate_compare_func(info->depthFunc) << 27 |
+ (bool) info->depthWriteEnable << 26;
+ } else {
+ dw[2] = GEN6_COMPAREFUNCTION_ALWAYS << 27;
+ }
+
+ return XGL_SUCCESS;
+}
+
static void viewport_state_destroy(struct intel_obj *obj)
{
struct intel_viewport_state *state = intel_viewport_state_from_obj(obj);
@@ -37,6 +472,7 @@
struct intel_viewport_state **state_ret)
{
struct intel_viewport_state *state;
+ XGL_RESULT ret;
state = (struct intel_viewport_state *) intel_base_create(dev,
sizeof(*state), dev->base.dbg, XGL_DBG_OBJECT_VIEWPORT_STATE,
@@ -46,7 +482,11 @@
state->obj.destroy = viewport_state_destroy;
- //emit_viewport_state(dev->gpu, info, state->cmd);
+ ret = viewport_state_init(state, dev->gpu, info);
+ if (ret != XGL_SUCCESS) {
+ intel_viewport_state_destroy(state);
+ return ret;
+ }
*state_ret = state;
@@ -55,6 +495,7 @@
void intel_viewport_state_destroy(struct intel_viewport_state *state)
{
+ icd_free(state->cmd);
intel_base_destroy(&state->obj.base);
}
@@ -79,7 +520,7 @@
state->obj.destroy = raster_state_destroy;
- //emit_raster_state(dev->gpu, info, state->cmd);
+ raster_state_init(state, dev->gpu, info);
*state_ret = state;
@@ -112,7 +553,7 @@
state->obj.destroy = msaa_state_destroy;
- //emit_msaa_state(dev->gpu, info, state->cmd);
+ msaa_state_init(state, dev->gpu, info);
*state_ret = state;
@@ -145,7 +586,7 @@
state->obj.destroy = blend_state_destroy;
- //emit_blend_state(dev->gpu, info, state->cmd);
+ blend_state_init(state, dev->gpu, info);
*state_ret = state;
@@ -169,6 +610,7 @@
struct intel_ds_state **state_ret)
{
struct intel_ds_state *state;
+ XGL_RESULT ret;
state = (struct intel_ds_state *) intel_base_create(dev,
sizeof(*state), dev->base.dbg, XGL_DBG_OBJECT_MSAA_STATE,
@@ -178,7 +620,11 @@
state->obj.destroy = ds_state_destroy;
- //emit_ds_state(dev->gpu, info, state->cmd);
+ ret = ds_state_init(state, dev->gpu, info);
+ if (ret != XGL_SUCCESS) {
+ intel_ds_state_destroy(state);
+ return ret;
+ }
*state_ret = state;