| /* |
| * XGL |
| * |
| * Copyright (C) 2014 LunarG, Inc. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice shall be included |
| * in all copies or substantial portions of the Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| * |
| * Authors: |
| * Chia-I Wu <olv@lunarg.com> |
| * Courtney Goeltzenleuchter <courtney@lunarg.com> |
| */ |
| |
| #include "genhw/genhw.h" |
| #include "dset.h" |
| #include "img.h" |
| #include "mem.h" |
| #include "pipeline.h" |
| #include "shader.h" |
| #include "state.h" |
| #include "view.h" |
| #include "cmd_priv.h" |
| |
| static void gen6_3DPRIMITIVE(struct intel_cmd *cmd, |
| int prim_type, bool indexed, |
| uint32_t vertex_count, |
| uint32_t vertex_start, |
| uint32_t instance_count, |
| uint32_t instance_start, |
| uint32_t vertex_base) |
| { |
| const uint8_t cmd_len = 6; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | |
| prim_type << GEN6_3DPRIM_DW0_TYPE__SHIFT | |
| (cmd_len - 2); |
| |
| if (indexed) |
| dw0 |= GEN6_3DPRIM_DW0_ACCESS_RANDOM; |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, vertex_count); |
| cmd_batch_write(cmd, vertex_start); |
| cmd_batch_write(cmd, instance_count); |
| cmd_batch_write(cmd, instance_start); |
| cmd_batch_write(cmd, vertex_base); |
| } |
| |
| static void gen7_3DPRIMITIVE(struct intel_cmd *cmd, |
| int prim_type, bool indexed, |
| uint32_t vertex_count, |
| uint32_t vertex_start, |
| uint32_t instance_count, |
| uint32_t instance_start, |
| uint32_t vertex_base) |
| { |
| const uint8_t cmd_len = 7; |
| uint32_t dw0, dw1; |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DPRIMITIVE) | (cmd_len - 2); |
| dw1 = prim_type << GEN7_3DPRIM_DW1_TYPE__SHIFT; |
| |
| if (indexed) |
| dw1 |= GEN7_3DPRIM_DW1_ACCESS_RANDOM; |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, dw1); |
| cmd_batch_write(cmd, vertex_count); |
| cmd_batch_write(cmd, vertex_start); |
| cmd_batch_write(cmd, instance_count); |
| cmd_batch_write(cmd, instance_start); |
| cmd_batch_write(cmd, vertex_base); |
| } |
| |
| static void gen6_PIPE_CONTROL(struct intel_cmd *cmd, uint32_t dw1, |
| struct intel_bo *bo, uint32_t bo_offset, |
| uint64_t imm) |
| { |
| const uint8_t cmd_len = 5; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, PIPE_CONTROL) | |
| (cmd_len - 2); |
| uint32_t reloc_flags = INTEL_RELOC_WRITE; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| assert(bo_offset % 8 == 0); |
| |
| if (dw1 & GEN6_PIPE_CONTROL_CS_STALL) { |
| /* |
| * From the Sandy Bridge PRM, volume 2 part 1, page 73: |
| * |
| * "1 of the following must also be set (when CS stall is set): |
| * |
| * * Depth Cache Flush Enable ([0] of DW1) |
| * * Stall at Pixel Scoreboard ([1] of DW1) |
| * * Depth Stall ([13] of DW1) |
| * * Post-Sync Operation ([13] of DW1) |
| * * Render Target Cache Flush Enable ([12] of DW1) |
| * * Notify Enable ([8] of DW1)" |
| * |
| * From the Ivy Bridge PRM, volume 2 part 1, page 61: |
| * |
| * "One of the following must also be set (when CS stall is set): |
| * |
| * * Render Target Cache Flush Enable ([12] of DW1) |
| * * Depth Cache Flush Enable ([0] of DW1) |
| * * Stall at Pixel Scoreboard ([1] of DW1) |
| * * Depth Stall ([13] of DW1) |
| * * Post-Sync Operation ([13] of DW1)" |
| */ |
| uint32_t bit_test = GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | |
| GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH | |
| GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL | |
| GEN6_PIPE_CONTROL_DEPTH_STALL; |
| |
| /* post-sync op */ |
| bit_test |= GEN6_PIPE_CONTROL_WRITE_IMM | |
| GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT | |
| GEN6_PIPE_CONTROL_WRITE_TIMESTAMP; |
| |
| if (cmd_gen(cmd) == INTEL_GEN(6)) |
| bit_test |= GEN6_PIPE_CONTROL_NOTIFY_ENABLE; |
| |
| assert(dw1 & bit_test); |
| } |
| |
| if (dw1 & GEN6_PIPE_CONTROL_DEPTH_STALL) { |
| /* |
| * From the Sandy Bridge PRM, volume 2 part 1, page 73: |
| * |
| * "Following bits must be clear (when Depth Stall is set): |
| * |
| * * Render Target Cache Flush Enable ([12] of DW1) |
| * * Depth Cache Flush Enable ([0] of DW1)" |
| */ |
| assert(!(dw1 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | |
| GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH))); |
| } |
| |
| /* |
| * From the Sandy Bridge PRM, volume 1 part 3, page 19: |
| * |
| * "[DevSNB] PPGTT memory writes by MI_* (such as MI_STORE_DATA_IMM) |
| * and PIPE_CONTROL are not supported." |
| * |
| * The kernel will add the mapping automatically (when write domain is |
| * INTEL_DOMAIN_INSTRUCTION). |
| */ |
| if (cmd_gen(cmd) == INTEL_GEN(6) && bo) { |
| bo_offset |= GEN6_PIPE_CONTROL_DW2_USE_GGTT; |
| reloc_flags |= INTEL_RELOC_GGTT; |
| } |
| |
| cmd_batch_reserve_reloc(cmd, cmd_len, (bool) bo); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, dw1); |
| if (bo) |
| cmd_batch_reloc(cmd, bo_offset, bo, reloc_flags); |
| else |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, (uint32_t) imm); |
| cmd_batch_write(cmd, (uint32_t) (imm >> 32)); |
| } |
| |
| static bool gen6_can_primitive_restart(const struct intel_cmd *cmd) |
| { |
| const struct intel_pipeline *p = cmd->bind.pipeline.graphics; |
| bool supported; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7.5)) |
| return (p->prim_type != GEN6_3DPRIM_RECTLIST); |
| |
| switch (p->prim_type) { |
| case GEN6_3DPRIM_POINTLIST: |
| case GEN6_3DPRIM_LINELIST: |
| case GEN6_3DPRIM_LINESTRIP: |
| case GEN6_3DPRIM_TRILIST: |
| case GEN6_3DPRIM_TRISTRIP: |
| supported = true; |
| break; |
| default: |
| supported = false; |
| break; |
| } |
| |
| if (!supported) |
| return false; |
| |
| switch (cmd->bind.index.type) { |
| case XGL_INDEX_8: |
| supported = (p->primitive_restart_index != 0xffu); |
| break; |
| case XGL_INDEX_16: |
| supported = (p->primitive_restart_index != 0xffffu); |
| break; |
| case XGL_INDEX_32: |
| supported = (p->primitive_restart_index != 0xffffffffu); |
| break; |
| default: |
| supported = false; |
| break; |
| } |
| |
| return supported; |
| } |
| |
| static void gen6_3DSTATE_INDEX_BUFFER(struct intel_cmd *cmd, |
| const struct intel_mem *mem, |
| XGL_GPU_SIZE offset, |
| XGL_INDEX_TYPE type, |
| bool enable_cut_index) |
| { |
| const uint8_t cmd_len = 3; |
| uint32_t dw0, end_offset; |
| unsigned offset_align; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_INDEX_BUFFER) | (cmd_len - 2); |
| |
| /* the bit is moved to 3DSTATE_VF */ |
| if (cmd_gen(cmd) >= INTEL_GEN(7.5)) |
| assert(!enable_cut_index); |
| if (enable_cut_index) |
| dw0 |= GEN6_IB_DW0_CUT_INDEX_ENABLE; |
| |
| switch (type) { |
| case XGL_INDEX_8: |
| dw0 |= GEN6_IB_DW0_FORMAT_BYTE; |
| offset_align = 1; |
| break; |
| case XGL_INDEX_16: |
| dw0 |= GEN6_IB_DW0_FORMAT_WORD; |
| offset_align = 2; |
| break; |
| case XGL_INDEX_32: |
| dw0 |= GEN6_IB_DW0_FORMAT_DWORD; |
| offset_align = 4; |
| break; |
| default: |
| cmd->result = XGL_ERROR_INVALID_VALUE; |
| return; |
| break; |
| } |
| |
| if (offset % offset_align) { |
| cmd->result = XGL_ERROR_INVALID_VALUE; |
| return; |
| } |
| |
| /* aligned and inclusive */ |
| end_offset = mem->size - (mem->size % offset_align) - 1; |
| |
| cmd_batch_reserve_reloc(cmd, cmd_len, 2); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_reloc(cmd, offset, mem->bo, 0); |
| cmd_batch_reloc(cmd, end_offset, mem->bo, 0); |
| } |
| |
| static void gen75_3DSTATE_VF(struct intel_cmd *cmd, |
| bool enable_cut_index, |
| uint32_t cut_index) |
| { |
| const uint8_t cmd_len = 2; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 7.5, 7.5); |
| |
| dw0 = GEN75_RENDER_CMD(3D, 3DSTATE_VF) | (cmd_len - 2); |
| if (enable_cut_index) |
| dw0 |= GEN75_VF_DW0_CUT_INDEX_ENABLE; |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, cut_index); |
| } |
| |
| |
| static void gen6_3DSTATE_GS(struct intel_cmd *cmd) |
| { |
| const uint8_t cmd_len = 7; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| assert(cmd->bind.gs.shader == NULL); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 1 << GEN6_GS_DW4_URB_READ_LEN__SHIFT); |
| cmd_batch_write(cmd, GEN6_GS_DW5_STATISTICS); |
| cmd_batch_write(cmd, 0); |
| } |
| |
| static void gen7_3DSTATE_GS(struct intel_cmd *cmd) |
| { |
| const uint8_t cmd_len = 7; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_GS) | (cmd_len - 2); |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| assert(cmd->bind.gs.shader == NULL); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, GEN6_GS_DW5_STATISTICS); |
| cmd_batch_write(cmd, 0); |
| } |
| |
| static void gen6_3DSTATE_DRAWING_RECTANGLE(struct intel_cmd *cmd, |
| XGL_UINT width, XGL_UINT height) |
| { |
| const uint8_t cmd_len = 4; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_DRAWING_RECTANGLE) | |
| (cmd_len - 2); |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| if (width && height) { |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, (height - 1) << 16 | |
| (width - 1)); |
| } else { |
| cmd_batch_write(cmd, 1); |
| cmd_batch_write(cmd, 0); |
| } |
| cmd_batch_write(cmd, 0); |
| } |
| |
| static void gen7_fill_3DSTATE_SF_body(const struct intel_cmd *cmd, |
| uint32_t body[6]) |
| { |
| const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics; |
| const struct intel_viewport_state *viewport = cmd->bind.state.viewport; |
| const struct intel_raster_state *raster = cmd->bind.state.raster; |
| const struct intel_msaa_state *msaa = cmd->bind.state.msaa; |
| uint32_t dw1, dw2, dw3; |
| int point_width; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| dw1 = GEN7_SF_DW1_STATISTICS | |
| GEN7_SF_DW1_DEPTH_OFFSET_SOLID | |
| GEN7_SF_DW1_DEPTH_OFFSET_WIREFRAME | |
| GEN7_SF_DW1_DEPTH_OFFSET_POINT | |
| GEN7_SF_DW1_VIEWPORT_ENABLE | |
| raster->cmd_sf_fill; |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7)) { |
| int format; |
| |
| switch (pipeline->db_format.channelFormat) { |
| case XGL_CH_FMT_R16: |
| format = GEN6_ZFORMAT_D16_UNORM; |
| break; |
| case XGL_CH_FMT_R32: |
| case XGL_CH_FMT_R32G8: |
| format = GEN6_ZFORMAT_D32_FLOAT; |
| break; |
| default: |
| assert(!"unknown depth format"); |
| format = 0; |
| break; |
| } |
| |
| dw1 |= format << GEN7_SF_DW1_DEPTH_FORMAT__SHIFT; |
| } |
| |
| dw2 = raster->cmd_sf_cull; |
| |
| if (msaa->sample_count > 1) { |
| dw2 |= 128 << GEN7_SF_DW2_LINE_WIDTH__SHIFT | |
| GEN7_SF_DW2_MSRASTMODE_ON_PATTERN; |
| } else { |
| dw2 |= 0 << GEN7_SF_DW2_LINE_WIDTH__SHIFT | |
| GEN7_SF_DW2_MSRASTMODE_OFF_PIXEL; |
| } |
| |
| if (viewport->scissor_enable) |
| dw2 |= GEN7_SF_DW2_SCISSOR_ENABLE; |
| |
| /* in U8.3 */ |
| point_width = (int) (pipeline->pointSize * 8.0f + 0.5f); |
| point_width = U_CLAMP(point_width, 1, 2047); |
| |
| dw3 = pipeline->provoking_vertex_tri << GEN7_SF_DW3_TRI_PROVOKE__SHIFT | |
| pipeline->provoking_vertex_line << GEN7_SF_DW3_LINE_PROVOKE__SHIFT | |
| pipeline->provoking_vertex_trifan << GEN7_SF_DW3_TRIFAN_PROVOKE__SHIFT | |
| GEN7_SF_DW3_SUBPIXEL_8BITS | |
| GEN7_SF_DW3_USE_POINT_WIDTH | |
| point_width; |
| |
| body[0] = dw1; |
| body[1] = dw2; |
| body[2] = dw3; |
| body[3] = raster->cmd_depth_offset_const; |
| body[4] = raster->cmd_depth_offset_scale; |
| body[5] = raster->cmd_depth_offset_clamp; |
| } |
| |
| static void gen7_fill_3DSTATE_SBE_body(const struct intel_cmd *cmd, |
| uint32_t body[13]) |
| { |
| const struct intel_pipe_shader *vs = &cmd->bind.pipeline.graphics->vs; |
| const struct intel_pipe_shader *fs = &cmd->bind.pipeline.graphics->fs; |
| XGL_UINT attr_skip, attr_count; |
| XGL_UINT vue_offset, vue_len; |
| XGL_UINT i; |
| uint32_t dw1; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| /* VS outputs VUE header and position additionally */ |
| assert(vs->out_count >= 2); |
| attr_skip = 2; |
| attr_count = vs->out_count - attr_skip; |
| assert(fs->in_count == attr_count); |
| assert(fs->in_count <= 32); |
| |
| vue_offset = attr_skip / 2; |
| vue_len = (attr_count + 1) / 2; |
| if (!vue_len) |
| vue_len = 1; |
| |
| dw1 = fs->in_count << GEN7_SBE_DW1_ATTR_COUNT__SHIFT | |
| vue_len << GEN7_SBE_DW1_URB_READ_LEN__SHIFT | |
| vue_offset << GEN7_SBE_DW1_URB_READ_OFFSET__SHIFT; |
| |
| body[0] = dw1; |
| |
| for (i = 0; i < 8; i++) { |
| uint16_t hi, lo; |
| |
| /* no attr swizzles */ |
| if (i * 2 + 1 < fs->in_count) { |
| hi = i * 2 + 1; |
| lo = i * 2; |
| } else if (i * 2 < fs->in_count) { |
| hi = 0; |
| lo = i * 2; |
| } else { |
| hi = 0; |
| lo = 0; |
| } |
| |
| body[1 + i] = hi << GEN7_SBE_ATTR_HIGH__SHIFT | lo; |
| } |
| |
| body[9] = 0; /* point sprite enables */ |
| body[10] = 0; /* constant interpolation enables */ |
| body[11] = 0; /* WrapShortest enables */ |
| body[12] = 0; |
| } |
| |
| static void gen6_3DSTATE_SF(struct intel_cmd *cmd) |
| { |
| const uint8_t cmd_len = 20; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | |
| (cmd_len - 2); |
| uint32_t sf[6]; |
| uint32_t sbe[13]; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| gen7_fill_3DSTATE_SF_body(cmd, sf); |
| gen7_fill_3DSTATE_SBE_body(cmd, sbe); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, sbe[0]); |
| cmd_batch_write_n(cmd, sf, 6); |
| cmd_batch_write_n(cmd, &sbe[1], 12); |
| } |
| |
| static void gen7_3DSTATE_SF(struct intel_cmd *cmd) |
| { |
| const uint8_t cmd_len = 7; |
| uint32_t dw[7]; |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| dw[0] = GEN6_RENDER_CMD(3D, 3DSTATE_SF) | |
| (cmd_len - 2); |
| gen7_fill_3DSTATE_SF_body(cmd, &dw[1]); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write_n(cmd, dw, cmd_len); |
| } |
| |
| static void gen7_3DSTATE_SBE(struct intel_cmd *cmd) |
| { |
| const uint8_t cmd_len = 14; |
| uint32_t dw[14]; |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| dw[0] = GEN7_RENDER_CMD(3D, 3DSTATE_SBE) | |
| (cmd_len - 2); |
| gen7_fill_3DSTATE_SBE_body(cmd, &dw[1]); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write_n(cmd, dw, cmd_len); |
| } |
| |
| static void gen6_3DSTATE_CLIP(struct intel_cmd *cmd) |
| { |
| const uint8_t cmd_len = 4; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLIP) | |
| (cmd_len - 2); |
| const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics; |
| const struct intel_pipe_shader *fs = &pipeline->fs; |
| const struct intel_viewport_state *viewport = cmd->bind.state.viewport; |
| const struct intel_raster_state *raster = cmd->bind.state.raster; |
| uint32_t dw1, dw2, dw3; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| dw1 = GEN6_CLIP_DW1_STATISTICS; |
| if (cmd_gen(cmd) >= INTEL_GEN(7)) { |
| dw1 |= GEN7_CLIP_DW1_SUBPIXEL_8BITS | |
| GEN7_CLIP_DW1_EARLY_CULL_ENABLE | |
| raster->cmd_clip_cull; |
| } |
| |
| dw2 = GEN6_CLIP_DW2_CLIP_ENABLE | |
| GEN6_CLIP_DW2_XY_TEST_ENABLE | |
| GEN6_CLIP_DW2_APIMODE_OGL | |
| pipeline->provoking_vertex_tri << GEN6_CLIP_DW2_TRI_PROVOKE__SHIFT | |
| pipeline->provoking_vertex_line << GEN6_CLIP_DW2_LINE_PROVOKE__SHIFT | |
| pipeline->provoking_vertex_trifan << GEN6_CLIP_DW2_TRIFAN_PROVOKE__SHIFT; |
| |
| if (pipeline->rasterizerDiscardEnable) |
| dw2 |= GEN6_CLIP_DW2_CLIPMODE_REJECT_ALL; |
| else |
| dw2 |= GEN6_CLIP_DW2_CLIPMODE_NORMAL; |
| |
| if (pipeline->depthClipEnable) |
| dw2 |= GEN6_CLIP_DW2_Z_TEST_ENABLE; |
| |
| if (fs->barycentric_interps & (GEN6_INTERP_NONPERSPECTIVE_PIXEL | |
| GEN6_INTERP_NONPERSPECTIVE_CENTROID | |
| GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) |
| dw2 |= GEN6_CLIP_DW2_NONPERSPECTIVE_BARYCENTRIC_ENABLE; |
| |
| dw3 = 0x1 << GEN6_CLIP_DW3_MIN_POINT_WIDTH__SHIFT | |
| 0x7ff << GEN6_CLIP_DW3_MAX_POINT_WIDTH__SHIFT | |
| (viewport->viewport_count - 1); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, dw1); |
| cmd_batch_write(cmd, dw2); |
| cmd_batch_write(cmd, dw3); |
| } |
| |
| static void gen6_3DSTATE_WM(struct intel_cmd *cmd) |
| { |
| const int max_threads = (cmd->dev->gpu->gt == 2) ? 80 : 40; |
| const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics; |
| const struct intel_pipe_shader *fs = &pipeline->fs; |
| const struct intel_msaa_state *msaa = cmd->bind.state.msaa; |
| const uint8_t cmd_len = 9; |
| uint32_t dw0, dw2, dw4, dw5, dw6; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); |
| |
| dw2 = (fs->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | |
| fs->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; |
| |
| dw4 = GEN6_WM_DW4_STATISTICS | |
| fs->urb_grf_start << GEN6_WM_DW4_URB_GRF_START0__SHIFT | |
| 0 << GEN6_WM_DW4_URB_GRF_START1__SHIFT | |
| 0 << GEN6_WM_DW4_URB_GRF_START2__SHIFT; |
| |
| dw5 = (max_threads - 1) << GEN6_WM_DW5_MAX_THREADS__SHIFT | |
| GEN6_WM_DW5_PS_ENABLE | |
| GEN6_WM_DW5_8_PIXEL_DISPATCH; |
| |
| if (fs->uses & INTEL_SHADER_USE_KILL || |
| pipeline->cb_state.alphaToCoverageEnable) |
| dw5 |= GEN6_WM_DW5_PS_KILL; |
| |
| if (fs->uses & INTEL_SHADER_USE_COMPUTED_DEPTH) |
| dw5 |= GEN6_WM_DW5_PS_COMPUTE_DEPTH; |
| if (fs->uses & INTEL_SHADER_USE_DEPTH) |
| dw5 |= GEN6_WM_DW5_PS_USE_DEPTH; |
| if (fs->uses & INTEL_SHADER_USE_W) |
| dw5 |= GEN6_WM_DW5_PS_USE_W; |
| |
| if (pipeline->cb_state.dualSourceBlendEnable) |
| dw5 |= GEN6_WM_DW5_DUAL_SOURCE_BLEND; |
| |
| dw6 = fs->in_count << GEN6_WM_DW6_SF_ATTR_COUNT__SHIFT | |
| GEN6_WM_DW6_POSOFFSET_NONE | |
| GEN6_WM_DW6_ZW_INTERP_PIXEL | |
| fs->barycentric_interps << GEN6_WM_DW6_BARYCENTRIC_INTERP__SHIFT | |
| GEN6_WM_DW6_POINT_RASTRULE_UPPER_RIGHT; |
| |
| if (msaa->sample_count > 1) { |
| dw6 |= GEN6_WM_DW6_MSRASTMODE_ON_PATTERN | |
| GEN6_WM_DW6_MSDISPMODE_PERPIXEL; |
| } else { |
| dw6 |= GEN6_WM_DW6_MSRASTMODE_OFF_PIXEL | |
| GEN6_WM_DW6_MSDISPMODE_PERSAMPLE; |
| } |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, cmd->bind.fs.kernel_pos << 2); |
| cmd_batch_write(cmd, dw2); |
| cmd_batch_write(cmd, 0); /* scratch */ |
| cmd_batch_write(cmd, dw4); |
| cmd_batch_write(cmd, dw5); |
| cmd_batch_write(cmd, dw6); |
| cmd_batch_write(cmd, 0); /* kernel 1 */ |
| cmd_batch_write(cmd, 0); /* kernel 2 */ |
| } |
| |
| static void gen7_3DSTATE_WM(struct intel_cmd *cmd) |
| { |
| const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics; |
| const struct intel_pipe_shader *fs = &pipeline->fs; |
| const struct intel_msaa_state *msaa = cmd->bind.state.msaa; |
| const uint8_t cmd_len = 3; |
| uint32_t dw0, dw1, dw2; |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_WM) | (cmd_len - 2); |
| |
| dw1 = GEN7_WM_DW1_STATISTICS | |
| GEN7_WM_DW1_PS_ENABLE | |
| GEN7_WM_DW1_ZW_INTERP_PIXEL | |
| fs->barycentric_interps << GEN7_WM_DW1_BARYCENTRIC_INTERP__SHIFT | |
| GEN7_WM_DW1_POINT_RASTRULE_UPPER_RIGHT; |
| |
| if (fs->uses & INTEL_SHADER_USE_KILL || |
| pipeline->cb_state.alphaToCoverageEnable) |
| dw1 |= GEN7_WM_DW1_PS_KILL; |
| |
| if (fs->uses & INTEL_SHADER_USE_COMPUTED_DEPTH) |
| dw1 |= GEN7_WM_DW1_PSCDEPTH_ON; |
| if (fs->uses & INTEL_SHADER_USE_DEPTH) |
| dw1 |= GEN7_WM_DW1_PS_USE_DEPTH; |
| if (fs->uses & INTEL_SHADER_USE_W) |
| dw1 |= GEN7_WM_DW1_PS_USE_W; |
| |
| dw2 = 0; |
| |
| if (msaa->sample_count > 1) { |
| dw1 |= GEN7_WM_DW1_MSRASTMODE_ON_PATTERN; |
| dw2 |= GEN7_WM_DW2_MSDISPMODE_PERPIXEL; |
| } else { |
| dw1 |= GEN7_WM_DW1_MSRASTMODE_OFF_PIXEL; |
| dw2 |= GEN7_WM_DW2_MSDISPMODE_PERSAMPLE; |
| } |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, dw1); |
| cmd_batch_write(cmd, dw2); |
| } |
| |
| static void gen7_3DSTATE_PS(struct intel_cmd *cmd) |
| { |
| const struct intel_pipeline *pipeline = cmd->bind.pipeline.graphics; |
| const struct intel_pipe_shader *fs = &pipeline->fs; |
| const struct intel_msaa_state *msaa = cmd->bind.state.msaa; |
| const uint8_t cmd_len = 8; |
| uint32_t dw0, dw2, dw4, dw5; |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_PS) | (cmd_len - 2); |
| |
| dw2 = (fs->sampler_count + 3) / 4 << GEN6_THREADDISP_SAMPLER_COUNT__SHIFT | |
| fs->surface_count << GEN6_THREADDISP_BINDING_TABLE_SIZE__SHIFT; |
| |
| dw4 = GEN7_PS_DW4_POSOFFSET_NONE | |
| GEN7_PS_DW4_8_PIXEL_DISPATCH; |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7.5)) { |
| const int max_threads = |
| (cmd->dev->gpu->gt == 3) ? 408 : |
| (cmd->dev->gpu->gt == 2) ? 204 : 102; |
| dw4 |= (max_threads - 1) << GEN75_PS_DW4_MAX_THREADS__SHIFT; |
| dw4 |= msaa->cmd[msaa->cmd_len - 1] << GEN75_PS_DW4_SAMPLE_MASK__SHIFT; |
| } else { |
| const int max_threads = (cmd->dev->gpu->gt == 2) ? 172 : 48; |
| dw4 |= (max_threads - 1) << GEN7_PS_DW4_MAX_THREADS__SHIFT; |
| } |
| |
| if (fs->pcb_size) |
| dw4 |= GEN7_PS_DW4_PUSH_CONSTANT_ENABLE; |
| |
| if (fs->in_count) |
| dw4 |= GEN7_PS_DW4_ATTR_ENABLE; |
| |
| if (pipeline->cb_state.dualSourceBlendEnable) |
| dw4 |= GEN7_PS_DW4_DUAL_SOURCE_BLEND; |
| |
| dw5 = fs->urb_grf_start << GEN7_PS_DW5_URB_GRF_START0__SHIFT | |
| 0 << GEN7_PS_DW5_URB_GRF_START1__SHIFT | |
| 0 << GEN7_PS_DW5_URB_GRF_START2__SHIFT; |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, cmd->bind.fs.kernel_pos << 2); |
| cmd_batch_write(cmd, dw2); |
| cmd_batch_write(cmd, 0); /* scratch */ |
| cmd_batch_write(cmd, dw4); |
| cmd_batch_write(cmd, dw5); |
| cmd_batch_write(cmd, 0); /* kernel 1 */ |
| cmd_batch_write(cmd, 0); /* kernel 2 */ |
| } |
| |
| static void gen6_3DSTATE_DEPTH_BUFFER(struct intel_cmd *cmd, |
| const struct intel_ds_view *view) |
| { |
| const uint8_t cmd_len = 7; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| dw0 = (cmd_gen(cmd) >= INTEL_GEN(7)) ? |
| GEN7_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER) : |
| GEN6_RENDER_CMD(3D, 3DSTATE_DEPTH_BUFFER); |
| dw0 |= (cmd_len - 2); |
| |
| cmd_batch_reserve_reloc(cmd, cmd_len, (bool) view->img); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, view->cmd[0]); |
| if (view->img) { |
| cmd_batch_reloc(cmd, view->cmd[1], view->img->obj.mem->bo, |
| INTEL_RELOC_WRITE); |
| } else { |
| cmd_batch_write(cmd, 0); |
| } |
| cmd_batch_write(cmd, view->cmd[2]); |
| cmd_batch_write(cmd, view->cmd[3]); |
| cmd_batch_write(cmd, view->cmd[4]); |
| cmd_batch_write(cmd, view->cmd[5]); |
| } |
| |
| static void gen6_3DSTATE_STENCIL_BUFFER(struct intel_cmd *cmd, |
| const struct intel_ds_view *view) |
| { |
| const uint8_t cmd_len = 3; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| dw0 = (cmd_gen(cmd) >= INTEL_GEN(7)) ? |
| GEN7_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER) : |
| GEN6_RENDER_CMD(3D, 3DSTATE_STENCIL_BUFFER); |
| dw0 |= (cmd_len - 2); |
| |
| cmd_batch_reserve_reloc(cmd, cmd_len, (bool) view->img); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, view->cmd[6]); |
| if (view->img) { |
| cmd_batch_reloc(cmd, view->cmd[7], view->img->obj.mem->bo, |
| INTEL_RELOC_WRITE); |
| } else { |
| cmd_batch_write(cmd, 0); |
| } |
| } |
| |
| static void gen6_3DSTATE_HIER_DEPTH_BUFFER(struct intel_cmd *cmd, |
| const struct intel_ds_view *view) |
| { |
| const uint8_t cmd_len = 3; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| dw0 = (cmd_gen(cmd) >= INTEL_GEN(7)) ? |
| GEN7_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER) : |
| GEN6_RENDER_CMD(3D, 3DSTATE_HIER_DEPTH_BUFFER); |
| dw0 |= (cmd_len - 2); |
| |
| cmd_batch_reserve_reloc(cmd, cmd_len, (bool) view->img); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, view->cmd[8]); |
| if (view->img) { |
| cmd_batch_reloc(cmd, view->cmd[9], view->img->obj.mem->bo, |
| INTEL_RELOC_WRITE); |
| } else { |
| cmd_batch_write(cmd, 0); |
| } |
| } |
| |
| static void gen6_3DSTATE_CLEAR_PARAMS(struct intel_cmd *cmd, |
| uint32_t clear_val) |
| { |
| const uint8_t cmd_len = 2; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | |
| GEN6_CLEAR_PARAMS_DW0_VALID | |
| (cmd_len - 2); |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, clear_val); |
| } |
| |
| static void gen7_3DSTATE_CLEAR_PARAMS(struct intel_cmd *cmd, |
| uint32_t clear_val) |
| { |
| const uint8_t cmd_len = 3; |
| const uint32_t dw0 = GEN7_RENDER_CMD(3D, 3DSTATE_CLEAR_PARAMS) | |
| (cmd_len - 2); |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, clear_val); |
| cmd_batch_write(cmd, 1); |
| } |
| |
| static void gen6_3DSTATE_CC_STATE_POINTERS(struct intel_cmd *cmd, |
| XGL_UINT blend_pos, |
| XGL_UINT ds_pos, |
| XGL_UINT cc_pos) |
| { |
| const uint8_t cmd_len = 4; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_CC_STATE_POINTERS) | |
| (cmd_len - 2); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, (blend_pos << 2) | 1); |
| cmd_batch_write(cmd, (ds_pos << 2) | 1); |
| cmd_batch_write(cmd, (cc_pos << 2) | 1); |
| } |
| |
| static void gen6_3DSTATE_VIEWPORT_STATE_POINTERS(struct intel_cmd *cmd, |
| XGL_UINT clip_pos, |
| XGL_UINT sf_pos, |
| XGL_UINT cc_pos) |
| { |
| const uint8_t cmd_len = 4; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VIEWPORT_STATE_POINTERS) | |
| GEN6_PTR_VP_DW0_CLIP_CHANGED | |
| GEN6_PTR_VP_DW0_SF_CHANGED | |
| GEN6_PTR_VP_DW0_CC_CHANGED | |
| (cmd_len - 2); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, clip_pos << 2); |
| cmd_batch_write(cmd, sf_pos << 2); |
| cmd_batch_write(cmd, cc_pos << 2); |
| } |
| |
| static void gen6_3DSTATE_SCISSOR_STATE_POINTERS(struct intel_cmd *cmd, |
| XGL_UINT scissor_pos) |
| { |
| const uint8_t cmd_len = 2; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SCISSOR_STATE_POINTERS) | |
| (cmd_len - 2); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, scissor_pos << 2); |
| } |
| |
| static void gen6_3DSTATE_BINDING_TABLE_POINTERS(struct intel_cmd *cmd, |
| XGL_UINT vs_pos, |
| XGL_UINT gs_pos, |
| XGL_UINT ps_pos) |
| { |
| const uint8_t cmd_len = 4; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_BINDING_TABLE_POINTERS) | |
| GEN6_PTR_BINDING_TABLE_DW0_VS_CHANGED | |
| GEN6_PTR_BINDING_TABLE_DW0_GS_CHANGED | |
| GEN6_PTR_BINDING_TABLE_DW0_PS_CHANGED | |
| (cmd_len - 2); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, vs_pos << 2); |
| cmd_batch_write(cmd, gs_pos << 2); |
| cmd_batch_write(cmd, ps_pos << 2); |
| } |
| |
| static void gen6_3DSTATE_SAMPLER_STATE_POINTERS(struct intel_cmd *cmd, |
| XGL_UINT vs_pos, |
| XGL_UINT gs_pos, |
| XGL_UINT ps_pos) |
| { |
| const uint8_t cmd_len = 4; |
| uint32_t dw0; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_SAMPLER_STATE_POINTERS) | |
| GEN6_PTR_SAMPLER_DW0_VS_CHANGED | |
| GEN6_PTR_SAMPLER_DW0_GS_CHANGED | |
| GEN6_PTR_SAMPLER_DW0_PS_CHANGED | |
| (cmd_len - 2); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, vs_pos << 2); |
| cmd_batch_write(cmd, gs_pos << 2); |
| cmd_batch_write(cmd, ps_pos << 2); |
| } |
| |
| static void gen7_3dstate_pointer(struct intel_cmd *cmd, |
| int subop, XGL_UINT pos) |
| { |
| const uint8_t cmd_len = 2; |
| const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | |
| GEN6_RENDER_SUBTYPE_3D | |
| subop | (cmd_len - 2); |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, pos << 2); |
| } |
| |
| static XGL_UINT gen6_BLEND_STATE(struct intel_cmd *cmd, |
| const struct intel_blend_state *state) |
| { |
| const uint8_t cmd_align = GEN6_ALIGNMENT_BLEND_STATE; |
| const uint8_t cmd_len = XGL_MAX_COLOR_ATTACHMENTS * 2; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| STATIC_ASSERT(ARRAY_SIZE(state->cmd) >= cmd_len); |
| |
| return cmd_state_copy(cmd, state->cmd, cmd_len, cmd_align); |
| } |
| |
| static XGL_UINT gen6_DEPTH_STENCIL_STATE(struct intel_cmd *cmd, |
| const struct intel_ds_state *state) |
| { |
| const uint8_t cmd_align = GEN6_ALIGNMENT_DEPTH_STENCIL_STATE; |
| const uint8_t cmd_len = 3; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| STATIC_ASSERT(ARRAY_SIZE(state->cmd) >= cmd_len); |
| |
| return cmd_state_copy(cmd, state->cmd, cmd_len, cmd_align); |
| } |
| |
| static XGL_UINT gen6_COLOR_CALC_STATE(struct intel_cmd *cmd, |
| uint32_t stencil_ref, |
| const uint32_t blend_color[4]) |
| { |
| const uint8_t cmd_align = GEN6_ALIGNMENT_COLOR_CALC_STATE; |
| const uint8_t cmd_len = 6; |
| XGL_UINT pos; |
| uint32_t *dw; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| dw = cmd_state_reserve(cmd, cmd_len, cmd_align, &pos); |
| dw[0] = stencil_ref; |
| dw[1] = 0; |
| dw[2] = blend_color[0]; |
| dw[3] = blend_color[1]; |
| dw[4] = blend_color[2]; |
| dw[5] = blend_color[3]; |
| cmd_state_advance(cmd, cmd_len); |
| |
| return pos; |
| } |
| |
| static void cmd_wa_gen6_pre_depth_stall_write(struct intel_cmd *cmd) |
| { |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| if (!cmd->bind.draw_count) |
| return; |
| |
| if (cmd->bind.wa_flags & INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE) |
| return; |
| |
| cmd->bind.wa_flags |= INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE; |
| |
| /* |
| * From the Sandy Bridge PRM, volume 2 part 1, page 60: |
| * |
| * "Pipe-control with CS-stall bit set must be sent BEFORE the |
| * pipe-control with a post-sync op and no write-cache flushes." |
| * |
| * The workaround below necessitates this workaround. |
| */ |
| gen6_PIPE_CONTROL(cmd, |
| GEN6_PIPE_CONTROL_CS_STALL | |
| GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL, |
| NULL, 0, 0); |
| |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_WRITE_IMM, |
| cmd->scratch_bo, 0, 0); |
| } |
| |
| static void cmd_wa_gen6_pre_command_scoreboard_stall(struct intel_cmd *cmd) |
| { |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| if (!cmd->bind.draw_count) |
| return; |
| |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL, |
| NULL, 0, 0); |
| } |
| |
| static void cmd_wa_gen7_pre_vs_depth_stall_write(struct intel_cmd *cmd) |
| { |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| if (!cmd->bind.draw_count) |
| return; |
| |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| |
| gen6_PIPE_CONTROL(cmd, |
| GEN6_PIPE_CONTROL_DEPTH_STALL | GEN6_PIPE_CONTROL_WRITE_IMM, |
| cmd->scratch_bo, 0, 0); |
| } |
| |
| static void cmd_wa_gen7_post_command_cs_stall(struct intel_cmd *cmd) |
| { |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| if (!cmd->bind.draw_count) |
| return; |
| |
| /* |
| * From the Ivy Bridge PRM, volume 2 part 1, page 61: |
| * |
| * "One of the following must also be set (when CS stall is set): |
| * |
| * * Render Target Cache Flush Enable ([12] of DW1) |
| * * Depth Cache Flush Enable ([0] of DW1) |
| * * Stall at Pixel Scoreboard ([1] of DW1) |
| * * Depth Stall ([13] of DW1) |
| * * Post-Sync Operation ([13] of DW1)" |
| */ |
| gen6_PIPE_CONTROL(cmd, |
| GEN6_PIPE_CONTROL_CS_STALL | |
| GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL, |
| NULL, 0, 0); |
| } |
| |
| static void cmd_wa_gen7_post_command_depth_stall(struct intel_cmd *cmd) |
| { |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| if (!cmd->bind.draw_count) |
| return; |
| |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_STALL, NULL, 0, 0); |
| } |
| |
| static void cmd_wa_gen6_pre_multisample_depth_flush(struct intel_cmd *cmd) |
| { |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| if (!cmd->bind.draw_count) |
| return; |
| |
| /* |
| * From the Sandy Bridge PRM, volume 2 part 1, page 305: |
| * |
| * "Driver must guarentee that all the caches in the depth pipe are |
| * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This |
| * requires driver to send a PIPE_CONTROL with a CS stall along with |
| * a Depth Flush prior to this command." |
| * |
| * From the Ivy Bridge PRM, volume 2 part 1, page 304: |
| * |
| * "Driver must ierarchi that all the caches in the depth pipe are |
| * flushed before this command (3DSTATE_MULTISAMPLE) is parsed. This |
| * requires driver to send a PIPE_CONTROL with a CS stall along with |
| * a Depth Flush prior to this command. |
| */ |
| gen6_PIPE_CONTROL(cmd, |
| GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH | |
| GEN6_PIPE_CONTROL_CS_STALL, |
| NULL, 0, 0); |
| } |
| |
| static void cmd_wa_gen6_pre_ds_flush(struct intel_cmd *cmd) |
| { |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| if (!cmd->bind.draw_count) |
| return; |
| |
| /* |
| * From the Ivy Bridge PRM, volume 2 part 1, page 315: |
| * |
| * "Driver must send a least one PIPE_CONTROL command with CS Stall |
| * and a post sync operation prior to the group of depth |
| * commands(3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, |
| * 3DSTATE_STENCIL_BUFFER, and 3DSTATE_HIER_DEPTH_BUFFER)." |
| * |
| * This workaround satifies all the conditions. |
| */ |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| |
| /* |
| * From the Ivy Bridge PRM, volume 2 part 1, page 315: |
| * |
| * "Restriction: Prior to changing Depth/Stencil Buffer state (i.e., |
| * any combination of 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, |
| * 3DSTATE_STENCIL_BUFFER, 3DSTATE_HIER_DEPTH_BUFFER) SW must first |
| * issue a pipelined depth stall (PIPE_CONTROL with Depth Stall bit |
| * set), followed by a pipelined depth cache flush (PIPE_CONTROL with |
| * Depth Flush Bit set, followed by another pipelined depth stall |
| * (PIPE_CONTROL with Depth Stall Bit set), unless SW can otherwise |
| * guarantee that the pipeline from WM onwards is already flushed |
| * (e.g., via a preceding MI_FLUSH)." |
| */ |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_STALL, NULL, 0, 0); |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH, NULL, 0, 0); |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_DEPTH_STALL, NULL, 0, 0); |
| } |
| |
| void cmd_batch_flush(struct intel_cmd *cmd, uint32_t pipe_control_dw0) |
| { |
| if (!cmd->bind.draw_count) |
| return; |
| |
| assert(!(pipe_control_dw0 & GEN6_PIPE_CONTROL_WRITE__MASK)); |
| |
| /* |
| * From the Sandy Bridge PRM, volume 2 part 1, page 60: |
| * |
| * "Before a PIPE_CONTROL with Write Cache Flush Enable =1, a |
| * PIPE_CONTROL with any non-zero post-sync-op is required." |
| */ |
| if (pipe_control_dw0 & GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH) |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| |
| /* |
| * From the Ivy Bridge PRM, volume 2 part 1, page 61: |
| * |
| * "One of the following must also be set (when CS stall is set): |
| * |
| * * Render Target Cache Flush Enable ([12] of DW1) |
| * * Depth Cache Flush Enable ([0] of DW1) |
| * * Stall at Pixel Scoreboard ([1] of DW1) |
| * * Depth Stall ([13] of DW1) |
| * * Post-Sync Operation ([13] of DW1)" |
| */ |
| if ((pipe_control_dw0 & GEN6_PIPE_CONTROL_CS_STALL) && |
| !(pipe_control_dw0 & (GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH | |
| GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH | |
| GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL | |
| GEN6_PIPE_CONTROL_DEPTH_STALL))) |
| pipe_control_dw0 |= GEN6_PIPE_CONTROL_PIXEL_SCOREBOARD_STALL; |
| |
| gen6_PIPE_CONTROL(cmd, pipe_control_dw0, NULL, 0, 0); |
| } |
| |
| void cmd_batch_depth_count(struct intel_cmd *cmd, |
| struct intel_bo *bo, |
| XGL_GPU_SIZE offset) |
| { |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| |
| gen6_PIPE_CONTROL(cmd, |
| GEN6_PIPE_CONTROL_DEPTH_STALL | |
| GEN6_PIPE_CONTROL_WRITE_PS_DEPTH_COUNT, |
| bo, offset, 0); |
| } |
| |
| void cmd_batch_timestamp(struct intel_cmd *cmd, |
| struct intel_bo *bo, |
| XGL_GPU_SIZE offset) |
| { |
| /* need any WA or stall? */ |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_WRITE_TIMESTAMP, bo, offset, 0); |
| } |
| |
| void cmd_batch_immediate(struct intel_cmd *cmd, |
| struct intel_bo *bo, |
| XGL_GPU_SIZE offset, |
| uint64_t val) |
| { |
| /* need any WA or stall? */ |
| gen6_PIPE_CONTROL(cmd, GEN6_PIPE_CONTROL_WRITE_IMM, bo, offset, val); |
| } |
| |
| static void gen6_cc_states(struct intel_cmd *cmd) |
| { |
| const struct intel_blend_state *blend = cmd->bind.state.blend; |
| const struct intel_ds_state *ds = cmd->bind.state.ds; |
| XGL_UINT blend_pos, ds_pos, cc_pos; |
| uint32_t stencil_ref; |
| uint32_t blend_color[4]; |
| |
| CMD_ASSERT(cmd, 6, 6); |
| |
| if (blend) { |
| blend_pos = gen6_BLEND_STATE(cmd, blend); |
| memcpy(blend_color, blend->cmd_blend_color, sizeof(blend_color)); |
| } else { |
| blend_pos = 0; |
| memset(blend_color, 0, sizeof(blend_color)); |
| } |
| |
| if (ds) { |
| ds_pos = gen6_DEPTH_STENCIL_STATE(cmd, ds); |
| stencil_ref = ds->cmd_stencil_ref; |
| } else { |
| ds_pos = 0; |
| stencil_ref = 0; |
| } |
| |
| cc_pos = gen6_COLOR_CALC_STATE(cmd, stencil_ref, blend_color); |
| |
| gen6_3DSTATE_CC_STATE_POINTERS(cmd, blend_pos, ds_pos, cc_pos); |
| } |
| |
| static void gen6_viewport_states(struct intel_cmd *cmd) |
| { |
| const struct intel_viewport_state *viewport = cmd->bind.state.viewport; |
| XGL_UINT pos; |
| |
| if (!viewport) |
| return; |
| |
| pos = cmd_state_copy(cmd, viewport->cmd, viewport->cmd_len, |
| viewport->cmd_align); |
| |
| gen6_3DSTATE_VIEWPORT_STATE_POINTERS(cmd, |
| pos + viewport->cmd_clip_offset, |
| pos, |
| pos + viewport->cmd_cc_offset); |
| |
| pos = (viewport->scissor_enable) ? |
| pos + viewport->cmd_scissor_rect_offset : 0; |
| |
| gen6_3DSTATE_SCISSOR_STATE_POINTERS(cmd, pos); |
| } |
| |
| static void gen7_cc_states(struct intel_cmd *cmd) |
| { |
| const struct intel_blend_state *blend = cmd->bind.state.blend; |
| const struct intel_ds_state *ds = cmd->bind.state.ds; |
| uint32_t stencil_ref; |
| uint32_t blend_color[4]; |
| XGL_UINT pos; |
| |
| CMD_ASSERT(cmd, 7, 7.5); |
| |
| if (!blend && !ds) |
| return; |
| |
| if (blend) { |
| pos = gen6_BLEND_STATE(cmd, blend); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_BLEND_STATE_POINTERS, pos); |
| |
| memcpy(blend_color, blend->cmd_blend_color, sizeof(blend_color)); |
| } else { |
| memset(blend_color, 0, sizeof(blend_color)); |
| } |
| |
| if (ds) { |
| pos = gen6_DEPTH_STENCIL_STATE(cmd, ds); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_DEPTH_STENCIL_STATE_POINTERS, pos); |
| } else { |
| stencil_ref = 0; |
| } |
| |
| pos = gen6_COLOR_CALC_STATE(cmd, stencil_ref, blend_color); |
| gen7_3dstate_pointer(cmd, |
| GEN6_RENDER_OPCODE_3DSTATE_CC_STATE_POINTERS, pos); |
| } |
| |
| static void gen7_viewport_states(struct intel_cmd *cmd) |
| { |
| const struct intel_viewport_state *viewport = cmd->bind.state.viewport; |
| XGL_UINT pos; |
| |
| if (!viewport) |
| return; |
| |
| pos = cmd_state_copy(cmd, viewport->cmd, viewport->cmd_len, |
| viewport->cmd_align); |
| |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP, pos); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_VIEWPORT_STATE_POINTERS_CC, |
| pos + viewport->cmd_cc_offset); |
| if (viewport->scissor_enable) { |
| gen7_3dstate_pointer(cmd, |
| GEN6_RENDER_OPCODE_3DSTATE_SCISSOR_STATE_POINTERS, |
| pos + viewport->cmd_scissor_rect_offset); |
| } |
| } |
| |
| static void gen6_pcb(struct intel_cmd *cmd, int subop, |
| const struct intel_pipe_shader *sh) |
| { |
| const uint8_t cmd_len = 5; |
| /* |
| * TODO It is actually 2048 for non-VS PCB. But we need to upload the |
| * data to multiple PCBs when the size is greater than 1024. |
| */ |
| const XGL_UINT max_size = 1024; |
| uint32_t dw0; |
| XGL_UINT pos; |
| |
| if (sh->pcb_size > max_size) { |
| cmd->result = XGL_ERROR_UNKNOWN; |
| return; |
| } |
| |
| dw0 = GEN6_RENDER_TYPE_RENDER | |
| GEN6_RENDER_SUBTYPE_3D | |
| subop | |
| (cmd_len - 2); |
| pos = 0; |
| |
| if (sh->pcb_size) { |
| const XGL_UINT alignment = 32; |
| const XGL_SIZE size = u_align(sh->pcb_size, alignment); |
| void *ptr; |
| |
| ptr = cmd_state_reserve(cmd, size / sizeof(uint32_t), |
| alignment / sizeof(uint32_t), &pos); |
| memcpy(ptr, sh->pcb, sh->pcb_size); |
| cmd_state_advance(cmd, size / sizeof(uint32_t)); |
| |
| dw0 |= GEN6_PCB_ANY_DW0_PCB0_VALID; |
| pos |= size / alignment - 1; |
| } |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, pos); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| } |
| |
| static void gen7_pcb(struct intel_cmd *cmd, int subop, |
| const struct intel_pipe_shader *sh) |
| { |
| const uint8_t cmd_len = 7; |
| const uint32_t dw0 = GEN6_RENDER_TYPE_RENDER | |
| GEN6_RENDER_SUBTYPE_3D | |
| subop | |
| (cmd_len - 2); |
| const XGL_UINT max_size = 2048; |
| XGL_UINT pcb_len = 0; |
| XGL_UINT pos = 0; |
| |
| if (sh->pcb_size > max_size) { |
| cmd->result = XGL_ERROR_UNKNOWN; |
| return; |
| } |
| |
| if (sh->pcb_size) { |
| const XGL_UINT alignment = 32; |
| const XGL_SIZE size = u_align(sh->pcb_size, alignment); |
| void *ptr; |
| |
| pcb_len = size / alignment; |
| |
| ptr = cmd_state_reserve(cmd, size / sizeof(uint32_t), |
| alignment / sizeof(uint32_t), &pos); |
| memcpy(ptr, sh->pcb, sh->pcb_size); |
| cmd_state_advance(cmd, size / sizeof(uint32_t)); |
| } |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, pcb_len); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, pos); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| } |
| |
| static void emit_ps_resources(struct intel_cmd *cmd, |
| const struct intel_rmap *rmap) |
| { |
| const XGL_UINT surface_count = rmap->rt_count + |
| rmap->resource_count + rmap->uav_count; |
| uint32_t binding_table[256]; |
| XGL_UINT pos, i; |
| |
| assert(surface_count <= ARRAY_SIZE(binding_table)); |
| |
| for (i = 0; i < surface_count; i++) { |
| const struct intel_rmap_slot *slot = &rmap->slots[i]; |
| uint32_t *dw; |
| |
| switch (slot->path_len) { |
| case 0: |
| pos = 0; |
| break; |
| case INTEL_RMAP_SLOT_RT: |
| { |
| const struct intel_rt_view *view = cmd->bind.att.rt[i]; |
| |
| dw = cmd_state_reserve_reloc(cmd, view->cmd_len, 1, |
| GEN6_ALIGNMENT_SURFACE_STATE, &pos); |
| |
| memcpy(dw, view->cmd, sizeof(uint32_t) * view->cmd_len); |
| cmd_state_reloc(cmd, 1, view->cmd[1], view->img->obj.mem->bo, |
| INTEL_RELOC_WRITE); |
| cmd_state_advance(cmd, view->cmd_len); |
| } |
| break; |
| case INTEL_RMAP_SLOT_DYN: |
| { |
| const struct intel_mem_view *view = |
| &cmd->bind.dyn_view.graphics; |
| |
| dw = cmd_state_reserve_reloc(cmd, view->cmd_len, 1, |
| GEN6_ALIGNMENT_SURFACE_STATE, &pos); |
| |
| memcpy(dw, view->cmd, sizeof(uint32_t) * view->cmd_len); |
| cmd_state_reloc(cmd, 1, view->cmd[1], view->mem->bo, |
| INTEL_RELOC_WRITE); |
| cmd_state_advance(cmd, view->cmd_len); |
| } |
| break; |
| case 1: |
| default: |
| /* TODO */ |
| assert(!"no dset support"); |
| break; |
| } |
| |
| binding_table[i] = pos << 2; |
| } |
| |
| pos = cmd_state_copy(cmd, binding_table, surface_count, |
| GEN6_ALIGNMENT_BINDING_TABLE_STATE); |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7)) { |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_PS, pos); |
| |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_VS, 0); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_HS, 0); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_DS, 0); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_BINDING_TABLE_POINTERS_GS, 0); |
| |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_VS, 0); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_HS, 0); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_DS, 0); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_GS, 0); |
| gen7_3dstate_pointer(cmd, |
| GEN7_RENDER_OPCODE_3DSTATE_SAMPLER_STATE_POINTERS_PS, 0); |
| } else { |
| gen6_3DSTATE_BINDING_TABLE_POINTERS(cmd, 0, 0, pos); |
| gen6_3DSTATE_SAMPLER_STATE_POINTERS(cmd, 0, 0, 0); |
| } |
| } |
| |
| // TODO: These should probably be generated |
| /* DW2 */ |
| # define GEN6_VS_SPF_MODE (1 << 31) |
| # define GEN6_VS_VECTOR_MASK_ENABLE (1 << 30) |
| # define GEN6_VS_SAMPLER_COUNT_SHIFT 27 |
| # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 |
| # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754 (0 << 16) |
| # define GEN6_VS_FLOATING_POINT_MODE_ALT (1 << 16) |
| |
| static void gen6_3DSTATE_VS(struct intel_cmd *cmd) |
| { |
| const uint8_t cmd_len = GEN6_3DSTATE_VS__SIZE; |
| const uint32_t dw0 = GEN6_RENDER_CMD(3D, 3DSTATE_VS) | (cmd_len - 2); |
| uint32_t dw2, dw4, dw5; |
| |
| CMD_ASSERT(cmd, 6, 7.5); |
| |
| /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State, |
| * 3DSTATE_VS, Dword 5.0 "VS Function Enable": |
| * |
| * [DevSNB] A pipeline flush must be programmed prior to a 3DSTATE_VS |
| * command that causes the VS Function Enable to toggle. Pipeline |
| * flush can be executed by sending a PIPE_CONTROL command with CS |
| * stall bit set and a post sync operation. |
| * |
| * Although we don't disable the VS during normal drawing, BLORP sometimes |
| * disables it. To be safe, do the flush here just in case. |
| */ |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| |
| if (cmd->bind.vs.shader == NULL) { |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| cmd_batch_write(cmd, 0); |
| return; |
| } |
| |
| /* |
| * Most of this is know at pipeline create EXCEPT the kernel address, |
| * so that's why this is in cmd_pipeline vs. pipeline. |
| */ |
| dw2 = (u_align(cmd->bind.vs.shader->sampler_count, 4) / 4) << GEN6_VS_SAMPLER_COUNT_SHIFT; |
| dw4 = (1 << GEN6_VS_DW4_URB_GRF_START__SHIFT) | |
| (cmd->bind.vs.shader->urb_read_length << GEN6_VS_DW4_URB_READ_LEN__SHIFT) | |
| (0 << GEN6_VS_DW4_URB_READ_OFFSET__SHIFT); |
| |
| dw5 = GEN6_VS_DW5_STATISTICS | |
| GEN6_VS_DW5_VS_ENABLE; |
| if (cmd_gen(cmd) == INTEL_GEN(7.5)) { |
| dw5 |= ((cmd->dev->gpu->max_vs_threads-1) << GEN75_VS_DW5_MAX_THREADS__SHIFT); |
| } else { |
| dw5 |= ((cmd->dev->gpu->max_vs_threads-1) << GEN6_VS_DW5_MAX_THREADS__SHIFT); |
| } |
| |
| cmd_batch_reserve(cmd, cmd_len); |
| cmd_batch_write(cmd, dw0); |
| cmd_batch_write(cmd, cmd->bind.vs.kernel_pos); |
| cmd_batch_write(cmd, dw2); |
| cmd_batch_write(cmd, 0); /* scratch */ |
| cmd_batch_write(cmd, dw4); |
| cmd_batch_write(cmd, dw5); |
| } |
| |
| static void emit_bounded_states(struct intel_cmd *cmd) |
| { |
| const struct intel_msaa_state *msaa = cmd->bind.state.msaa; |
| |
| /* TODO more states */ |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7)) { |
| gen7_cc_states(cmd); |
| gen7_viewport_states(cmd); |
| |
| gen7_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, |
| &cmd->bind.pipeline.graphics->vs); |
| gen7_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, |
| &cmd->bind.pipeline.graphics->fs); |
| |
| gen6_3DSTATE_CLIP(cmd); |
| gen7_3DSTATE_SF(cmd); |
| gen7_3DSTATE_SBE(cmd); |
| gen7_3DSTATE_WM(cmd); |
| gen7_3DSTATE_PS(cmd); |
| } else { |
| gen6_cc_states(cmd); |
| gen6_viewport_states(cmd); |
| |
| gen6_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_VS, |
| &cmd->bind.pipeline.graphics->vs); |
| gen6_pcb(cmd, GEN6_RENDER_OPCODE_3DSTATE_CONSTANT_PS, |
| &cmd->bind.pipeline.graphics->fs); |
| |
| gen6_3DSTATE_CLIP(cmd); |
| gen6_3DSTATE_SF(cmd); |
| gen6_3DSTATE_WM(cmd); |
| } |
| |
| emit_ps_resources(cmd, cmd->bind.pipeline.graphics->fs.rmap); |
| |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| cmd_wa_gen6_pre_multisample_depth_flush(cmd); |
| /* 3DSTATE_MULTISAMPLE and 3DSTATE_SAMPLE_MASK */ |
| cmd_batch_reserve(cmd, msaa->cmd_len); |
| cmd_batch_write_n(cmd, msaa->cmd, msaa->cmd_len); |
| |
| gen6_3DSTATE_VS(cmd); |
| } |
| |
| static void emit_shader(struct intel_cmd *cmd, |
| const struct intel_pipe_shader *shader, |
| struct intel_cmd_shader *pCmdShader) |
| { |
| uint32_t i; |
| struct intel_cmd_shader *cmdShader; |
| |
| for (i=0; i<cmd->bind.shaderCache.used; i++) { |
| if (cmd->bind.shaderCache.shaderArray[i].shader == shader) { |
| /* shader is already part of pipeline */ |
| return; |
| } |
| } |
| |
| if (cmd->bind.shaderCache.used == cmd->bind.shaderCache.count) { |
| const XGL_UINT new_count = cmd->bind.shaderCache.count + 16; |
| |
| cmdShader = cmd->bind.shaderCache.shaderArray; |
| |
| cmd->bind.shaderCache.shaderArray = |
| icd_alloc(sizeof(*cmdShader) * new_count, |
| 0, XGL_SYSTEM_ALLOC_INTERNAL); |
| if (cmd->bind.shaderCache.shaderArray == NULL) { |
| cmd->bind.shaderCache.shaderArray = cmdShader; |
| cmd->result = XGL_ERROR_OUT_OF_MEMORY; |
| return; |
| } |
| |
| if (cmdShader) { |
| memcpy(cmd->bind.shaderCache.shaderArray, cmdShader, |
| sizeof(*cmdShader) * cmd->bind.shaderCache.used); |
| icd_free(cmdShader); |
| } |
| |
| cmd->bind.shaderCache.count = new_count; |
| } |
| |
| cmdShader = &cmd->bind.shaderCache.shaderArray[cmd->bind.shaderCache.used]; |
| cmdShader->shader = shader; |
| cmdShader->kernel_pos = cmd_kernel_copy(cmd, shader->pCode, shader->codeSize); |
| *pCmdShader = *cmdShader; |
| cmd->bind.shaderCache.used++; |
| return; |
| } |
| |
| static void cmd_bind_graphics_pipeline(struct intel_cmd *cmd, |
| const struct intel_pipeline *pipeline) |
| { |
| cmd->bind.pipeline.graphics = pipeline; |
| |
| if (pipeline->wa_flags & INTEL_CMD_WA_GEN6_PRE_DEPTH_STALL_WRITE) |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| if (pipeline->wa_flags & INTEL_CMD_WA_GEN6_PRE_COMMAND_SCOREBOARD_STALL) |
| cmd_wa_gen6_pre_command_scoreboard_stall(cmd); |
| if (pipeline->wa_flags & INTEL_CMD_WA_GEN7_PRE_VS_DEPTH_STALL_WRITE) |
| cmd_wa_gen7_pre_vs_depth_stall_write(cmd); |
| |
| /* 3DSTATE_URB_VS and etc. */ |
| assert(pipeline->cmd_len); |
| cmd_batch_reserve(cmd, pipeline->cmd_len); |
| cmd_batch_write_n(cmd, pipeline->cmds, pipeline->cmd_len); |
| |
| if (pipeline->active_shaders & SHADER_VERTEX_FLAG) { |
| emit_shader(cmd, &pipeline->vs, &cmd->bind.vs); |
| } |
| if (pipeline->active_shaders & SHADER_GEOMETRY_FLAG) { |
| emit_shader(cmd, &pipeline->gs, &cmd->bind.gs); |
| } |
| if (pipeline->active_shaders & SHADER_FRAGMENT_FLAG) { |
| emit_shader(cmd, &pipeline->fs, &cmd->bind.fs); |
| } |
| if (pipeline->active_shaders & SHADER_TESS_CONTROL_FLAG) { |
| emit_shader(cmd, &pipeline->tess_control, &cmd->bind.tess_control); |
| } |
| if (pipeline->active_shaders & SHADER_TESS_EVAL_FLAG) { |
| emit_shader(cmd, &pipeline->tess_eval, &cmd->bind.tess_eval); |
| } |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7)) { |
| gen7_3DSTATE_GS(cmd); |
| } else { |
| gen6_3DSTATE_GS(cmd); |
| } |
| |
| if (pipeline->wa_flags & INTEL_CMD_WA_GEN7_POST_COMMAND_CS_STALL) |
| cmd_wa_gen7_post_command_cs_stall(cmd); |
| if (pipeline->wa_flags & INTEL_CMD_WA_GEN7_POST_COMMAND_DEPTH_STALL) |
| cmd_wa_gen7_post_command_depth_stall(cmd); |
| } |
| |
| static void cmd_bind_compute_pipeline(struct intel_cmd *cmd, |
| const struct intel_pipeline *pipeline) |
| { |
| cmd->bind.pipeline.compute = pipeline; |
| } |
| |
| static void cmd_bind_graphics_delta(struct intel_cmd *cmd, |
| const struct intel_pipeline_delta *delta) |
| { |
| cmd->bind.pipeline.graphics_delta = delta; |
| } |
| |
| static void cmd_bind_compute_delta(struct intel_cmd *cmd, |
| const struct intel_pipeline_delta *delta) |
| { |
| cmd->bind.pipeline.compute_delta = delta; |
| } |
| |
| static void cmd_bind_graphics_dset(struct intel_cmd *cmd, |
| const struct intel_dset *dset, |
| XGL_UINT slot_offset) |
| { |
| cmd->bind.dset.graphics = dset; |
| cmd->bind.dset.graphics_offset = slot_offset; |
| } |
| |
| static void cmd_bind_compute_dset(struct intel_cmd *cmd, |
| const struct intel_dset *dset, |
| XGL_UINT slot_offset) |
| { |
| cmd->bind.dset.compute = dset; |
| cmd->bind.dset.compute_offset = slot_offset; |
| } |
| |
| static void cmd_bind_graphics_dyn_view(struct intel_cmd *cmd, |
| const XGL_MEMORY_VIEW_ATTACH_INFO *info) |
| { |
| intel_mem_view_init(&cmd->bind.dyn_view.graphics, cmd->dev, info); |
| } |
| |
| static void cmd_bind_compute_dyn_view(struct intel_cmd *cmd, |
| const XGL_MEMORY_VIEW_ATTACH_INFO *info) |
| { |
| intel_mem_view_init(&cmd->bind.dyn_view.compute, cmd->dev, info); |
| } |
| |
| static void cmd_bind_index_data(struct intel_cmd *cmd, |
| const struct intel_mem *mem, |
| XGL_GPU_SIZE offset, XGL_INDEX_TYPE type) |
| { |
| if (cmd_gen(cmd) >= INTEL_GEN(7.5)) { |
| gen6_3DSTATE_INDEX_BUFFER(cmd, mem, offset, type, false); |
| } else { |
| cmd->bind.index.mem = mem; |
| cmd->bind.index.offset = offset; |
| cmd->bind.index.type = type; |
| } |
| } |
| |
| static void cmd_bind_rt(struct intel_cmd *cmd, |
| const XGL_COLOR_ATTACHMENT_BIND_INFO *attachments, |
| XGL_UINT count) |
| { |
| XGL_UINT width = 0, height = 0; |
| XGL_UINT i; |
| |
| for (i = 0; i < count; i++) { |
| const XGL_COLOR_ATTACHMENT_BIND_INFO *att = &attachments[i]; |
| const struct intel_rt_view *rt = intel_rt_view(att->view); |
| const struct intel_layout *layout = &rt->img->layout; |
| |
| if (i == 0) { |
| width = layout->width0; |
| height = layout->height0; |
| } else { |
| if (width > layout->width0) |
| width = layout->width0; |
| if (height > layout->height0) |
| height = layout->height0; |
| } |
| |
| cmd->bind.att.rt[i] = rt; |
| } |
| |
| cmd->bind.att.rt_count = count; |
| |
| cmd_wa_gen6_pre_depth_stall_write(cmd); |
| gen6_3DSTATE_DRAWING_RECTANGLE(cmd, width, height); |
| } |
| |
| static void cmd_bind_ds(struct intel_cmd *cmd, |
| const XGL_DEPTH_STENCIL_BIND_INFO *info) |
| { |
| const struct intel_ds_view *ds; |
| |
| if (info) { |
| cmd->bind.att.ds = intel_ds_view(info->view); |
| ds = cmd->bind.att.ds; |
| } else { |
| /* all zeros */ |
| static const struct intel_ds_view null_ds; |
| ds = &null_ds; |
| } |
| |
| cmd_wa_gen6_pre_ds_flush(cmd); |
| gen6_3DSTATE_DEPTH_BUFFER(cmd, ds); |
| gen6_3DSTATE_STENCIL_BUFFER(cmd, ds); |
| gen6_3DSTATE_HIER_DEPTH_BUFFER(cmd, ds); |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7)) |
| gen7_3DSTATE_CLEAR_PARAMS(cmd, 0); |
| else |
| gen6_3DSTATE_CLEAR_PARAMS(cmd, 0); |
| } |
| |
| static void cmd_bind_viewport_state(struct intel_cmd *cmd, |
| const struct intel_viewport_state *state) |
| { |
| cmd->bind.state.viewport = state; |
| } |
| |
| static void cmd_bind_raster_state(struct intel_cmd *cmd, |
| const struct intel_raster_state *state) |
| { |
| cmd->bind.state.raster = state; |
| } |
| |
| static void cmd_bind_ds_state(struct intel_cmd *cmd, |
| const struct intel_ds_state *state) |
| { |
| cmd->bind.state.ds = state; |
| } |
| |
| static void cmd_bind_blend_state(struct intel_cmd *cmd, |
| const struct intel_blend_state *state) |
| { |
| cmd->bind.state.blend = state; |
| } |
| |
| static void cmd_bind_msaa_state(struct intel_cmd *cmd, |
| const struct intel_msaa_state *state) |
| { |
| cmd->bind.state.msaa = state; |
| } |
| |
| static void cmd_draw(struct intel_cmd *cmd, |
| XGL_UINT vertex_start, |
| XGL_UINT vertex_count, |
| XGL_UINT instance_start, |
| XGL_UINT instance_count, |
| bool indexed, |
| XGL_UINT vertex_base) |
| { |
| const struct intel_pipeline *p = cmd->bind.pipeline.graphics; |
| |
| emit_bounded_states(cmd); |
| |
| if (indexed) { |
| if (p->primitive_restart && !gen6_can_primitive_restart(cmd)) |
| cmd->result = XGL_ERROR_UNKNOWN; |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7.5)) { |
| gen75_3DSTATE_VF(cmd, p->primitive_restart, |
| p->primitive_restart_index); |
| } else { |
| gen6_3DSTATE_INDEX_BUFFER(cmd, cmd->bind.index.mem, |
| cmd->bind.index.offset, cmd->bind.index.type, |
| p->primitive_restart); |
| } |
| } else { |
| assert(!vertex_base); |
| } |
| |
| if (cmd_gen(cmd) >= INTEL_GEN(7)) { |
| gen7_3DPRIMITIVE(cmd, p->prim_type, indexed, vertex_count, |
| vertex_start, instance_count, instance_start, vertex_base); |
| } else { |
| gen6_3DPRIMITIVE(cmd, p->prim_type, indexed, vertex_count, |
| vertex_start, instance_count, instance_start, vertex_base); |
| } |
| |
| cmd->bind.draw_count++; |
| /* need to re-emit all workarounds */ |
| cmd->bind.wa_flags = 0; |
| } |
| |
| XGL_VOID XGLAPI intelCmdBindPipeline( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_PIPELINE_BIND_POINT pipelineBindPoint, |
| XGL_PIPELINE pipeline) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| switch (pipelineBindPoint) { |
| case XGL_PIPELINE_BIND_POINT_COMPUTE: |
| cmd_bind_compute_pipeline(cmd, intel_pipeline(pipeline)); |
| break; |
| case XGL_PIPELINE_BIND_POINT_GRAPHICS: |
| cmd_bind_graphics_pipeline(cmd, intel_pipeline(pipeline)); |
| break; |
| default: |
| cmd->result = XGL_ERROR_INVALID_VALUE; |
| break; |
| } |
| } |
| |
| XGL_VOID XGLAPI intelCmdBindPipelineDelta( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_PIPELINE_BIND_POINT pipelineBindPoint, |
| XGL_PIPELINE_DELTA delta) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| switch (pipelineBindPoint) { |
| case XGL_PIPELINE_BIND_POINT_COMPUTE: |
| cmd_bind_compute_delta(cmd, delta); |
| break; |
| case XGL_PIPELINE_BIND_POINT_GRAPHICS: |
| cmd_bind_graphics_delta(cmd, delta); |
| break; |
| default: |
| cmd->result = XGL_ERROR_INVALID_VALUE; |
| break; |
| } |
| } |
| |
| XGL_VOID XGLAPI intelCmdBindStateObject( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_STATE_BIND_POINT stateBindPoint, |
| XGL_STATE_OBJECT state) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| switch (stateBindPoint) { |
| case XGL_STATE_BIND_VIEWPORT: |
| cmd_bind_viewport_state(cmd, |
| intel_viewport_state((XGL_VIEWPORT_STATE_OBJECT) state)); |
| break; |
| case XGL_STATE_BIND_RASTER: |
| cmd_bind_raster_state(cmd, |
| intel_raster_state((XGL_RASTER_STATE_OBJECT) state)); |
| break; |
| case XGL_STATE_BIND_DEPTH_STENCIL: |
| cmd_bind_ds_state(cmd, |
| intel_ds_state((XGL_DEPTH_STENCIL_STATE_OBJECT) state)); |
| break; |
| case XGL_STATE_BIND_COLOR_BLEND: |
| cmd_bind_blend_state(cmd, |
| intel_blend_state((XGL_COLOR_BLEND_STATE_OBJECT) state)); |
| break; |
| case XGL_STATE_BIND_MSAA: |
| cmd_bind_msaa_state(cmd, |
| intel_msaa_state((XGL_MSAA_STATE_OBJECT) state)); |
| break; |
| default: |
| cmd->result = XGL_ERROR_INVALID_VALUE; |
| break; |
| } |
| } |
| |
| XGL_VOID XGLAPI intelCmdBindDescriptorSet( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_PIPELINE_BIND_POINT pipelineBindPoint, |
| XGL_UINT index, |
| XGL_DESCRIPTOR_SET descriptorSet, |
| XGL_UINT slotOffset) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| struct intel_dset *dset = intel_dset(descriptorSet); |
| |
| assert(!index); |
| |
| switch (pipelineBindPoint) { |
| case XGL_PIPELINE_BIND_POINT_COMPUTE: |
| cmd_bind_compute_dset(cmd, dset, slotOffset); |
| break; |
| case XGL_PIPELINE_BIND_POINT_GRAPHICS: |
| cmd_bind_graphics_dset(cmd, dset, slotOffset); |
| break; |
| default: |
| cmd->result = XGL_ERROR_INVALID_VALUE; |
| break; |
| } |
| } |
| |
| XGL_VOID XGLAPI intelCmdBindDynamicMemoryView( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_PIPELINE_BIND_POINT pipelineBindPoint, |
| const XGL_MEMORY_VIEW_ATTACH_INFO* pMemView) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| switch (pipelineBindPoint) { |
| case XGL_PIPELINE_BIND_POINT_COMPUTE: |
| cmd_bind_compute_dyn_view(cmd, pMemView); |
| break; |
| case XGL_PIPELINE_BIND_POINT_GRAPHICS: |
| cmd_bind_graphics_dyn_view(cmd, pMemView); |
| break; |
| default: |
| cmd->result = XGL_ERROR_INVALID_VALUE; |
| break; |
| } |
| } |
| |
| XGL_VOID XGLAPI intelCmdBindIndexData( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_GPU_MEMORY mem_, |
| XGL_GPU_SIZE offset, |
| XGL_INDEX_TYPE indexType) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| struct intel_mem *mem = intel_mem(mem_); |
| |
| cmd_bind_index_data(cmd, mem, offset, indexType); |
| } |
| |
| XGL_VOID XGLAPI intelCmdBindAttachments( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_UINT colorAttachmentCount, |
| const XGL_COLOR_ATTACHMENT_BIND_INFO* pColorAttachments, |
| const XGL_DEPTH_STENCIL_BIND_INFO* pDepthStencilAttachment) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| cmd_bind_rt(cmd, pColorAttachments, colorAttachmentCount); |
| cmd_bind_ds(cmd, pDepthStencilAttachment); |
| } |
| |
| XGL_VOID XGLAPI intelCmdDraw( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_UINT firstVertex, |
| XGL_UINT vertexCount, |
| XGL_UINT firstInstance, |
| XGL_UINT instanceCount) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| cmd_draw(cmd, firstVertex, vertexCount, |
| firstInstance, instanceCount, false, 0); |
| } |
| |
| XGL_VOID XGLAPI intelCmdDrawIndexed( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_UINT firstIndex, |
| XGL_UINT indexCount, |
| XGL_INT vertexOffset, |
| XGL_UINT firstInstance, |
| XGL_UINT instanceCount) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| cmd_draw(cmd, firstIndex, indexCount, |
| firstInstance, instanceCount, true, vertexOffset); |
| } |
| |
| XGL_VOID XGLAPI intelCmdDrawIndirect( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_GPU_MEMORY mem, |
| XGL_GPU_SIZE offset, |
| XGL_UINT32 count, |
| XGL_UINT32 stride) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| cmd->result = XGL_ERROR_UNKNOWN; |
| } |
| |
| XGL_VOID XGLAPI intelCmdDrawIndexedIndirect( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_GPU_MEMORY mem, |
| XGL_GPU_SIZE offset, |
| XGL_UINT32 count, |
| XGL_UINT32 stride) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| cmd->result = XGL_ERROR_UNKNOWN; |
| } |
| |
| XGL_VOID XGLAPI intelCmdDispatch( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_UINT x, |
| XGL_UINT y, |
| XGL_UINT z) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| cmd->result = XGL_ERROR_UNKNOWN; |
| } |
| |
| XGL_VOID XGLAPI intelCmdDispatchIndirect( |
| XGL_CMD_BUFFER cmdBuffer, |
| XGL_GPU_MEMORY mem, |
| XGL_GPU_SIZE offset) |
| { |
| struct intel_cmd *cmd = intel_cmd(cmdBuffer); |
| |
| cmd->result = XGL_ERROR_UNKNOWN; |
| } |