Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright 2012 Advanced Micro Devices, Inc. |
| 3 | * |
| 4 | * Permission is hereby granted, free of charge, to any person obtaining a |
| 5 | * copy of this software and associated documentation files (the "Software"), |
| 6 | * to deal in the Software without restriction, including without limitation |
| 7 | * on the rights to use, copy, modify, merge, publish, distribute, sub |
| 8 | * license, and/or sell copies of the Software, and to permit persons to whom |
| 9 | * the Software is furnished to do so, subject to the following conditions: |
| 10 | * |
| 11 | * The above copyright notice and this permission notice (including the next |
| 12 | * paragraph) shall be included in all copies or substantial portions of the |
| 13 | * Software. |
| 14 | * |
| 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| 18 | * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| 19 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| 20 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| 21 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 22 | * |
| 23 | * Authors: |
| 24 | * Christian König <christian.koenig@amd.com> |
| 25 | */ |
| 26 | |
Andreas Hartmetz | 786af2f | 2014-01-04 18:44:33 +0100 | [diff] [blame] | 27 | #include "si_pipe.h" |
Emil Velikov | a131263 | 2014-08-16 17:58:25 +0100 | [diff] [blame] | 28 | #include "radeon/r600_cs.h" |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 29 | #include "sid.h" |
| 30 | |
Marek Olšák | 7209703 | 2014-01-22 18:50:36 +0100 | [diff] [blame] | 31 | #include "util/u_index_modify.h" |
Marek Olšák | 7209703 | 2014-01-22 18:50:36 +0100 | [diff] [blame] | 32 | #include "util/u_upload_mgr.h" |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 33 | #include "util/u_prim.h" |
Marek Olšák | 7209703 | 2014-01-22 18:50:36 +0100 | [diff] [blame] | 34 | |
Bas Nieuwenhuizen | 0ef1b4d | 2016-12-24 13:08:00 +0100 | [diff] [blame] | 35 | #include "ac_debug.h" |
| 36 | |
Marek Olšák | 508c1ca | 2014-12-07 16:02:07 +0100 | [diff] [blame] | 37 | static unsigned si_conv_pipe_prim(unsigned mode) |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 38 | { |
| 39 | static const unsigned prim_conv[] = { |
| 40 | [PIPE_PRIM_POINTS] = V_008958_DI_PT_POINTLIST, |
| 41 | [PIPE_PRIM_LINES] = V_008958_DI_PT_LINELIST, |
| 42 | [PIPE_PRIM_LINE_LOOP] = V_008958_DI_PT_LINELOOP, |
| 43 | [PIPE_PRIM_LINE_STRIP] = V_008958_DI_PT_LINESTRIP, |
| 44 | [PIPE_PRIM_TRIANGLES] = V_008958_DI_PT_TRILIST, |
| 45 | [PIPE_PRIM_TRIANGLE_STRIP] = V_008958_DI_PT_TRISTRIP, |
| 46 | [PIPE_PRIM_TRIANGLE_FAN] = V_008958_DI_PT_TRIFAN, |
| 47 | [PIPE_PRIM_QUADS] = V_008958_DI_PT_QUADLIST, |
| 48 | [PIPE_PRIM_QUAD_STRIP] = V_008958_DI_PT_QUADSTRIP, |
| 49 | [PIPE_PRIM_POLYGON] = V_008958_DI_PT_POLYGON, |
Michel Dänzer | 2863071 | 2014-01-09 16:35:46 +0900 | [diff] [blame] | 50 | [PIPE_PRIM_LINES_ADJACENCY] = V_008958_DI_PT_LINELIST_ADJ, |
| 51 | [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_008958_DI_PT_LINESTRIP_ADJ, |
| 52 | [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_008958_DI_PT_TRILIST_ADJ, |
Marek Olšák | db51ab6 | 2014-08-18 00:55:40 +0200 | [diff] [blame] | 53 | [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_008958_DI_PT_TRISTRIP_ADJ, |
Marek Olšák | d9d0de4 | 2014-09-18 23:39:44 +0200 | [diff] [blame] | 54 | [PIPE_PRIM_PATCHES] = V_008958_DI_PT_PATCH, |
Marek Olšák | db51ab6 | 2014-08-18 00:55:40 +0200 | [diff] [blame] | 55 | [R600_PRIM_RECTANGLE_LIST] = V_008958_DI_PT_RECTLIST |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 56 | }; |
Jan Vesely | 47b390f | 2016-05-17 09:25:44 -0400 | [diff] [blame] | 57 | assert(mode < ARRAY_SIZE(prim_conv)); |
Marek Olšák | 508c1ca | 2014-12-07 16:02:07 +0100 | [diff] [blame] | 58 | return prim_conv[mode]; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 59 | } |
| 60 | |
Andreas Hartmetz | b902298 | 2014-01-07 03:18:25 +0100 | [diff] [blame] | 61 | static unsigned si_conv_prim_to_gs_out(unsigned mode) |
Marek Olšák | e4c5d3e | 2013-08-18 03:05:34 +0200 | [diff] [blame] | 62 | { |
| 63 | static const int prim_conv[] = { |
| 64 | [PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST, |
| 65 | [PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, |
| 66 | [PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, |
| 67 | [PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, |
| 68 | [PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
| 69 | [PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
| 70 | [PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
| 71 | [PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
| 72 | [PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
| 73 | [PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
| 74 | [PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, |
| 75 | [PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP, |
| 76 | [PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
Marek Olšák | db51ab6 | 2014-08-18 00:55:40 +0200 | [diff] [blame] | 77 | [PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP, |
Marek Olšák | d9d0de4 | 2014-09-18 23:39:44 +0200 | [diff] [blame] | 78 | [PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST, |
Marek Olšák | db51ab6 | 2014-08-18 00:55:40 +0200 | [diff] [blame] | 79 | [R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP |
Marek Olšák | e4c5d3e | 2013-08-18 03:05:34 +0200 | [diff] [blame] | 80 | }; |
Jan Vesely | 47b390f | 2016-05-17 09:25:44 -0400 | [diff] [blame] | 81 | assert(mode < ARRAY_SIZE(prim_conv)); |
Marek Olšák | e4c5d3e | 2013-08-18 03:05:34 +0200 | [diff] [blame] | 82 | |
| 83 | return prim_conv[mode]; |
| 84 | } |
| 85 | |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 86 | /** |
| 87 | * This calculates the LDS size for tessellation shaders (VS, TCS, TES). |
| 88 | * LS.LDS_SIZE is shared by all 3 shader stages. |
| 89 | * |
| 90 | * The information about LDS and other non-compile-time parameters is then |
| 91 | * written to userdata SGPRs. |
| 92 | */ |
| 93 | static void si_emit_derived_tess_state(struct si_context *sctx, |
| 94 | const struct pipe_draw_info *info, |
| 95 | unsigned *num_patches) |
| 96 | { |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 97 | struct radeon_winsys_cs *cs = sctx->b.gfx.cs; |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 98 | struct si_shader_ctx_state *ls = &sctx->vs_shader; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 99 | /* The TES pointer will only be used for sctx->last_tcs. |
| 100 | * It would be wrong to think that TCS = TES. */ |
| 101 | struct si_shader_selector *tcs = |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 102 | sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 103 | unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL]; |
| 104 | unsigned num_tcs_input_cp = info->vertices_per_patch; |
| 105 | unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs; |
| 106 | unsigned num_tcs_patch_outputs; |
| 107 | unsigned input_vertex_size, output_vertex_size, pervertex_output_patch_size; |
| 108 | unsigned input_patch_size, output_patch_size, output_patch0_offset; |
| 109 | unsigned perpatch_output_offset, lds_size, ls_rsrc2; |
| 110 | unsigned tcs_in_layout, tcs_out_layout, tcs_out_offsets; |
Marek Olšák | 3ee9be4 | 2016-09-30 22:37:14 +0200 | [diff] [blame] | 111 | unsigned offchip_layout, hardware_lds_size, ls_hs_config; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 112 | |
Marek Olšák | fa476e0 | 2016-11-11 22:36:17 +0100 | [diff] [blame] | 113 | if (sctx->last_ls == ls->current && |
| 114 | sctx->last_tcs == tcs && |
| 115 | sctx->last_tes_sh_base == tes_sh_base && |
| 116 | sctx->last_num_tcs_input_cp == num_tcs_input_cp) { |
| 117 | *num_patches = sctx->last_num_patches; |
| 118 | return; |
| 119 | } |
| 120 | |
| 121 | sctx->last_ls = ls->current; |
| 122 | sctx->last_tcs = tcs; |
| 123 | sctx->last_tes_sh_base = tes_sh_base; |
| 124 | sctx->last_num_tcs_input_cp = num_tcs_input_cp; |
| 125 | |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 126 | /* This calculates how shader inputs and outputs among VS, TCS, and TES |
| 127 | * are laid out in LDS. */ |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 128 | num_tcs_inputs = util_last_bit64(ls->cso->outputs_written); |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 129 | |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 130 | if (sctx->tcs_shader.cso) { |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 131 | num_tcs_outputs = util_last_bit64(tcs->outputs_written); |
| 132 | num_tcs_output_cp = tcs->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; |
| 133 | num_tcs_patch_outputs = util_last_bit64(tcs->patch_outputs_written); |
| 134 | } else { |
| 135 | /* No TCS. Route varyings from LS to TES. */ |
| 136 | num_tcs_outputs = num_tcs_inputs; |
| 137 | num_tcs_output_cp = num_tcs_input_cp; |
| 138 | num_tcs_patch_outputs = 2; /* TESSINNER + TESSOUTER */ |
| 139 | } |
| 140 | |
| 141 | input_vertex_size = num_tcs_inputs * 16; |
| 142 | output_vertex_size = num_tcs_outputs * 16; |
| 143 | |
| 144 | input_patch_size = num_tcs_input_cp * input_vertex_size; |
| 145 | |
| 146 | pervertex_output_patch_size = num_tcs_output_cp * output_vertex_size; |
| 147 | output_patch_size = pervertex_output_patch_size + num_tcs_patch_outputs * 16; |
| 148 | |
Bas Nieuwenhuizen | f91c85b | 2016-05-02 15:00:21 +0200 | [diff] [blame] | 149 | /* Ensure that we only need one wave per SIMD so we don't need to check |
| 150 | * resource usage. Also ensures that the number of tcs in and out |
| 151 | * vertices per threadgroup are at most 256. |
| 152 | */ |
| 153 | *num_patches = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp) * 4; |
| 154 | |
| 155 | /* Make sure that the data fits in LDS. This assumes the shaders only |
| 156 | * use LDS for the inputs and outputs. |
| 157 | */ |
| 158 | hardware_lds_size = sctx->b.chip_class >= CIK ? 65536 : 32768; |
| 159 | *num_patches = MIN2(*num_patches, hardware_lds_size / (input_patch_size + |
| 160 | output_patch_size)); |
| 161 | |
| 162 | /* Make sure the output data fits in the offchip buffer */ |
Marek Olšák | dd56d04 | 2016-06-28 14:11:12 +0200 | [diff] [blame] | 163 | *num_patches = MIN2(*num_patches, |
| 164 | (sctx->screen->tess_offchip_block_dw_size * 4) / |
| 165 | output_patch_size); |
Bas Nieuwenhuizen | f91c85b | 2016-05-02 15:00:21 +0200 | [diff] [blame] | 166 | |
| 167 | /* Not necessary for correctness, but improves performance. The |
| 168 | * specific value is taken from the proprietary driver. |
| 169 | */ |
| 170 | *num_patches = MIN2(*num_patches, 40); |
Marek Olšák | 78c4528 | 2016-11-29 20:41:23 +0100 | [diff] [blame] | 171 | |
| 172 | /* SI bug workaround - limit LS-HS threadgroups to only one wave. */ |
| 173 | if (sctx->b.chip_class == SI) { |
| 174 | unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp); |
| 175 | *num_patches = MIN2(*num_patches, one_wave); |
| 176 | } |
| 177 | |
Marek Olšák | fa476e0 | 2016-11-11 22:36:17 +0100 | [diff] [blame] | 178 | sctx->last_num_patches = *num_patches; |
Bas Nieuwenhuizen | f91c85b | 2016-05-02 15:00:21 +0200 | [diff] [blame] | 179 | |
| 180 | output_patch0_offset = input_patch_size * *num_patches; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 181 | perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size; |
| 182 | |
| 183 | lds_size = output_patch0_offset + output_patch_size * *num_patches; |
Marek Olšák | 20b9b5d | 2015-12-28 00:14:05 +0100 | [diff] [blame] | 184 | ls_rsrc2 = ls->current->config.rsrc2; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 185 | |
| 186 | if (sctx->b.chip_class >= CIK) { |
| 187 | assert(lds_size <= 65536); |
Marek Olšák | 72d48fc | 2016-11-29 19:25:03 +0100 | [diff] [blame] | 188 | lds_size = align(lds_size, 512) / 512; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 189 | } else { |
| 190 | assert(lds_size <= 32768); |
Marek Olšák | 72d48fc | 2016-11-29 19:25:03 +0100 | [diff] [blame] | 191 | lds_size = align(lds_size, 256) / 256; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 192 | } |
Marek Olšák | 72d48fc | 2016-11-29 19:25:03 +0100 | [diff] [blame] | 193 | si_multiwave_lds_size_workaround(sctx->screen, &lds_size); |
| 194 | ls_rsrc2 |= S_00B52C_LDS_SIZE(lds_size); |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 195 | |
| 196 | /* Due to a hw bug, RSRC2_LS must be written twice with another |
| 197 | * LS register written in between. */ |
| 198 | if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII) |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 199 | radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2); |
| 200 | radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); |
Marek Olšák | 20b9b5d | 2015-12-28 00:14:05 +0100 | [diff] [blame] | 201 | radeon_emit(cs, ls->current->config.rsrc1); |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 202 | radeon_emit(cs, ls_rsrc2); |
| 203 | |
| 204 | /* Compute userdata SGPRs. */ |
| 205 | assert(((input_vertex_size / 4) & ~0xff) == 0); |
| 206 | assert(((output_vertex_size / 4) & ~0xff) == 0); |
| 207 | assert(((input_patch_size / 4) & ~0x1fff) == 0); |
| 208 | assert(((output_patch_size / 4) & ~0x1fff) == 0); |
| 209 | assert(((output_patch0_offset / 16) & ~0xffff) == 0); |
| 210 | assert(((perpatch_output_offset / 16) & ~0xffff) == 0); |
| 211 | assert(num_tcs_input_cp <= 32); |
| 212 | assert(num_tcs_output_cp <= 32); |
| 213 | |
| 214 | tcs_in_layout = (input_patch_size / 4) | |
| 215 | ((input_vertex_size / 4) << 13); |
| 216 | tcs_out_layout = (output_patch_size / 4) | |
| 217 | ((output_vertex_size / 4) << 13); |
| 218 | tcs_out_offsets = (output_patch0_offset / 16) | |
| 219 | ((perpatch_output_offset / 16) << 16); |
Bas Nieuwenhuizen | c49e68d | 2016-05-10 00:48:55 +0200 | [diff] [blame] | 220 | offchip_layout = (pervertex_output_patch_size * *num_patches << 16) | |
| 221 | (num_tcs_output_cp << 9) | *num_patches; |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 222 | |
| 223 | /* Set them for LS. */ |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 224 | radeon_set_sh_reg(cs, |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 225 | R_00B530_SPI_SHADER_USER_DATA_LS_0 + SI_SGPR_LS_OUT_LAYOUT * 4, |
| 226 | tcs_in_layout); |
| 227 | |
| 228 | /* Set them for TCS. */ |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 229 | radeon_set_sh_reg_seq(cs, |
Bas Nieuwenhuizen | c49e68d | 2016-05-10 00:48:55 +0200 | [diff] [blame] | 230 | R_00B430_SPI_SHADER_USER_DATA_HS_0 + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 4); |
| 231 | radeon_emit(cs, offchip_layout); |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 232 | radeon_emit(cs, tcs_out_offsets); |
| 233 | radeon_emit(cs, tcs_out_layout | (num_tcs_input_cp << 26)); |
| 234 | radeon_emit(cs, tcs_in_layout); |
| 235 | |
| 236 | /* Set them for TES. */ |
Bas Nieuwenhuizen | 26f4361 | 2016-05-10 01:05:32 +0200 | [diff] [blame] | 237 | radeon_set_sh_reg_seq(cs, tes_sh_base + SI_SGPR_TCS_OFFCHIP_LAYOUT * 4, 1); |
Bas Nieuwenhuizen | c49e68d | 2016-05-10 00:48:55 +0200 | [diff] [blame] | 238 | radeon_emit(cs, offchip_layout); |
Marek Olšák | 3ee9be4 | 2016-09-30 22:37:14 +0200 | [diff] [blame] | 239 | |
| 240 | ls_hs_config = S_028B58_NUM_PATCHES(*num_patches) | |
| 241 | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | |
| 242 | S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); |
| 243 | |
| 244 | if (sctx->b.chip_class >= CIK) |
| 245 | radeon_set_context_reg_idx(cs, R_028B58_VGT_LS_HS_CONFIG, 2, |
| 246 | ls_hs_config); |
| 247 | else |
| 248 | radeon_set_context_reg(cs, R_028B58_VGT_LS_HS_CONFIG, |
| 249 | ls_hs_config); |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 250 | } |
| 251 | |
Marek Olšák | 0f9519b | 2015-12-09 22:14:32 +0100 | [diff] [blame] | 252 | static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info) |
| 253 | { |
| 254 | switch (info->mode) { |
| 255 | case PIPE_PRIM_PATCHES: |
| 256 | return info->count / info->vertices_per_patch; |
| 257 | case R600_PRIM_RECTANGLE_LIST: |
| 258 | return info->count / 3; |
| 259 | default: |
| 260 | return u_prims_for_vertices(info->mode, info->count); |
| 261 | } |
| 262 | } |
| 263 | |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 264 | static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 265 | const struct pipe_draw_info *info, |
| 266 | unsigned num_patches) |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 267 | { |
| 268 | struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; |
| 269 | unsigned prim = info->mode; |
Marek Olšák | f62f882 | 2014-08-18 23:14:34 +0200 | [diff] [blame] | 270 | unsigned primgroup_size = 128; /* recommended without a GS */ |
Marek Olšák | 2070af2 | 2015-10-18 22:07:01 +0200 | [diff] [blame] | 271 | unsigned max_primgroup_in_wave = 2; |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 272 | |
| 273 | /* SWITCH_ON_EOP(0) is always preferable. */ |
| 274 | bool wd_switch_on_eop = false; |
| 275 | bool ia_switch_on_eop = false; |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 276 | bool ia_switch_on_eoi = false; |
Marek Olšák | 4be7ff5 | 2014-08-15 22:45:10 +0200 | [diff] [blame] | 277 | bool partial_vs_wave = false; |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 278 | bool partial_es_wave = false; |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 279 | |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 280 | if (sctx->gs_shader.cso) |
Marek Olšák | f62f882 | 2014-08-18 23:14:34 +0200 | [diff] [blame] | 281 | primgroup_size = 64; /* recommended with a GS */ |
| 282 | |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 283 | if (sctx->tes_shader.cso) { |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 284 | /* primgroup_size must be set to a multiple of NUM_PATCHES */ |
Marek Olšák | 2802310 | 2016-06-03 16:44:00 +0200 | [diff] [blame] | 285 | primgroup_size = num_patches; |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 286 | |
Marek Olšák | ca18f12 | 2015-10-18 22:17:04 +0200 | [diff] [blame] | 287 | /* SWITCH_ON_EOI must be set if PrimID is used. */ |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 288 | if ((sctx->tcs_shader.cso && sctx->tcs_shader.cso->info.uses_primid) || |
Marek Olšák | ca18f12 | 2015-10-18 22:17:04 +0200 | [diff] [blame] | 289 | sctx->tes_shader.cso->info.uses_primid) |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 290 | ia_switch_on_eoi = true; |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 291 | |
| 292 | /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ |
| 293 | if ((sctx->b.family == CHIP_TAHITI || |
| 294 | sctx->b.family == CHIP_PITCAIRN || |
| 295 | sctx->b.family == CHIP_BONAIRE) && |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 296 | sctx->gs_shader.cso) |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 297 | partial_vs_wave = true; |
Bas Nieuwenhuizen | 43d7305 | 2016-04-12 20:28:46 +0200 | [diff] [blame] | 298 | |
| 299 | /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ |
Marek Olšák | eff81cb | 2016-06-28 14:19:04 +0200 | [diff] [blame] | 300 | if (sctx->screen->has_distributed_tess) { |
Marek Olšák | a816c7f | 2016-11-29 21:19:52 +0100 | [diff] [blame] | 301 | if (sctx->gs_shader.cso) { |
Bas Nieuwenhuizen | 43d7305 | 2016-04-12 20:28:46 +0200 | [diff] [blame] | 302 | partial_es_wave = true; |
Marek Olšák | a816c7f | 2016-11-29 21:19:52 +0100 | [diff] [blame] | 303 | |
| 304 | /* GPU hang workaround. */ |
| 305 | if (sctx->b.family == CHIP_TONGA || |
| 306 | sctx->b.family == CHIP_FIJI || |
| 307 | sctx->b.family == CHIP_POLARIS10 || |
| 308 | sctx->b.family == CHIP_POLARIS11) |
| 309 | partial_vs_wave = true; |
| 310 | } else { |
Bas Nieuwenhuizen | 43d7305 | 2016-04-12 20:28:46 +0200 | [diff] [blame] | 311 | partial_vs_wave = true; |
Marek Olšák | a816c7f | 2016-11-29 21:19:52 +0100 | [diff] [blame] | 312 | } |
Bas Nieuwenhuizen | 43d7305 | 2016-04-12 20:28:46 +0200 | [diff] [blame] | 313 | } |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 314 | } |
| 315 | |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 316 | /* This is a hardware requirement. */ |
| 317 | if ((rs && rs->line_stipple_enable) || |
| 318 | (sctx->b.screen->debug_flags & DBG_SWITCH_ON_EOP)) { |
| 319 | ia_switch_on_eop = true; |
| 320 | wd_switch_on_eop = true; |
| 321 | } |
| 322 | |
| 323 | if (sctx->b.chip_class >= CIK) { |
| 324 | /* WD_SWITCH_ON_EOP has no effect on GPUs with less than |
| 325 | * 4 shader engines. Set 1 to pass the assertion below. |
Marek Olšák | f6ff483 | 2016-04-08 12:57:43 +0200 | [diff] [blame] | 326 | * The other cases are hardware requirements. |
| 327 | * |
| 328 | * Polaris supports primitive restart with WD_SWITCH_ON_EOP=0 |
| 329 | * for points, line strips, and tri strips. |
| 330 | */ |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 331 | if (sctx->b.screen->info.max_se < 4 || |
| 332 | prim == PIPE_PRIM_POLYGON || |
| 333 | prim == PIPE_PRIM_LINE_LOOP || |
| 334 | prim == PIPE_PRIM_TRIANGLE_FAN || |
| 335 | prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY || |
Marek Olšák | f6ff483 | 2016-04-08 12:57:43 +0200 | [diff] [blame] | 336 | (info->primitive_restart && |
| 337 | (sctx->b.family < CHIP_POLARIS10 || |
| 338 | (prim != PIPE_PRIM_POINTS && |
| 339 | prim != PIPE_PRIM_LINE_STRIP && |
| 340 | prim != PIPE_PRIM_TRIANGLE_STRIP))) || |
Marek Olšák | 0d2cb35 | 2015-10-18 22:22:22 +0200 | [diff] [blame] | 341 | info->count_from_stream_output) |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 342 | wd_switch_on_eop = true; |
| 343 | |
| 344 | /* Hawaii hangs if instancing is enabled and WD_SWITCH_ON_EOP is 0. |
| 345 | * We don't know that for indirect drawing, so treat it as |
| 346 | * always problematic. */ |
| 347 | if (sctx->b.family == CHIP_HAWAII && |
| 348 | (info->indirect || info->instance_count > 1)) |
| 349 | wd_switch_on_eop = true; |
| 350 | |
Marek Olšák | 991cbfc | 2016-06-03 16:20:17 +0200 | [diff] [blame] | 351 | /* Performance recommendation for 4 SE Gfx7-8 parts if |
Marek Olšák | a5a2cc5 | 2016-09-08 01:42:06 +0200 | [diff] [blame] | 352 | * instances are smaller than a primgroup. |
| 353 | * Assume indirect draws always use small instances. |
| 354 | * This is needed for good VS wave utilization. |
Marek Olšák | 991cbfc | 2016-06-03 16:20:17 +0200 | [diff] [blame] | 355 | */ |
| 356 | if (sctx->b.chip_class <= VI && |
| 357 | sctx->b.screen->info.max_se >= 4 && |
Marek Olšák | a5a2cc5 | 2016-09-08 01:42:06 +0200 | [diff] [blame] | 358 | (info->indirect || |
| 359 | (info->instance_count > 1 && |
| 360 | si_num_prims_for_vertices(info) < primgroup_size))) |
Marek Olšák | 991cbfc | 2016-06-03 16:20:17 +0200 | [diff] [blame] | 361 | wd_switch_on_eop = true; |
| 362 | |
Marek Olšák | 96d5879 | 2015-10-18 21:43:30 +0200 | [diff] [blame] | 363 | /* Required on CIK and later. */ |
| 364 | if (sctx->b.screen->info.max_se > 2 && !wd_switch_on_eop) |
| 365 | ia_switch_on_eoi = true; |
| 366 | |
Marek Olšák | 2070af2 | 2015-10-18 22:07:01 +0200 | [diff] [blame] | 367 | /* Required by Hawaii and, for some special cases, by VI. */ |
| 368 | if (ia_switch_on_eoi && |
| 369 | (sctx->b.family == CHIP_HAWAII || |
| 370 | (sctx->b.chip_class == VI && |
| 371 | (sctx->gs_shader.cso || max_primgroup_in_wave != 2)))) |
| 372 | partial_vs_wave = true; |
| 373 | |
Marek Olšák | a6b5684 | 2015-10-18 21:51:41 +0200 | [diff] [blame] | 374 | /* Instancing bug on Bonaire. */ |
| 375 | if (sctx->b.family == CHIP_BONAIRE && ia_switch_on_eoi && |
| 376 | (info->indirect || info->instance_count > 1)) |
| 377 | partial_vs_wave = true; |
| 378 | |
Marek Olšák | 1469c70 | 2016-08-23 17:58:22 +0200 | [diff] [blame] | 379 | /* GS hw bug with single-primitive instances and SWITCH_ON_EOI. |
| 380 | * The hw doc says all multi-SE chips are affected, but Vulkan |
| 381 | * only applies it to Hawaii. Do what Vulkan does. |
| 382 | */ |
| 383 | if (sctx->b.family == CHIP_HAWAII && |
| 384 | sctx->gs_shader.cso && |
| 385 | ia_switch_on_eoi && |
| 386 | (info->indirect || |
| 387 | (info->instance_count > 1 && |
| 388 | si_num_prims_for_vertices(info) <= 1))) |
| 389 | sctx->b.flags |= SI_CONTEXT_VGT_FLUSH; |
| 390 | |
| 391 | |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 392 | /* If the WD switch is false, the IA switch must be false too. */ |
| 393 | assert(wd_switch_on_eop || !ia_switch_on_eop); |
| 394 | } |
| 395 | |
Marek Olšák | ca18f12 | 2015-10-18 22:17:04 +0200 | [diff] [blame] | 396 | /* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */ |
| 397 | if (ia_switch_on_eoi) |
| 398 | partial_es_wave = true; |
| 399 | |
Marek Olšák | 0608304 | 2015-10-19 02:45:56 +0200 | [diff] [blame] | 400 | /* GS requirement. */ |
| 401 | if (SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3) |
| 402 | partial_es_wave = true; |
| 403 | |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 404 | return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) | |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 405 | S_028AA8_SWITCH_ON_EOI(ia_switch_on_eoi) | |
Marek Olšák | 4be7ff5 | 2014-08-15 22:45:10 +0200 | [diff] [blame] | 406 | S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 407 | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 408 | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | |
Marek Olšák | 2d1952e | 2015-04-16 20:44:54 +0200 | [diff] [blame] | 409 | S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) | |
Marek Olšák | 2070af2 | 2015-10-18 22:07:01 +0200 | [diff] [blame] | 410 | S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? |
| 411 | max_primgroup_in_wave : 0); |
Marek Olšák | 94e474f | 2014-08-15 16:32:03 +0200 | [diff] [blame] | 412 | } |
| 413 | |
Marek Olšák | dc39413 | 2015-03-15 20:13:52 +0100 | [diff] [blame] | 414 | static void si_emit_scratch_reloc(struct si_context *sctx) |
| 415 | { |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 416 | struct radeon_winsys_cs *cs = sctx->b.gfx.cs; |
Marek Olšák | dc39413 | 2015-03-15 20:13:52 +0100 | [diff] [blame] | 417 | |
| 418 | if (!sctx->emit_scratch_reloc) |
| 419 | return; |
| 420 | |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 421 | radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE, |
Marek Olšák | dc39413 | 2015-03-15 20:13:52 +0100 | [diff] [blame] | 422 | sctx->spi_tmpring_size); |
| 423 | |
| 424 | if (sctx->scratch_buffer) { |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 425 | radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, |
Marek Olšák | dc39413 | 2015-03-15 20:13:52 +0100 | [diff] [blame] | 426 | sctx->scratch_buffer, RADEON_USAGE_READWRITE, |
Marek Olšák | 2edb060 | 2015-09-26 23:18:55 +0200 | [diff] [blame] | 427 | RADEON_PRIO_SCRATCH_BUFFER); |
Marek Olšák | dc39413 | 2015-03-15 20:13:52 +0100 | [diff] [blame] | 428 | |
| 429 | } |
| 430 | sctx->emit_scratch_reloc = false; |
| 431 | } |
| 432 | |
Marek Olšák | 1fe7ba8 | 2015-01-31 20:09:46 +0100 | [diff] [blame] | 433 | /* rast_prim is the primitive type after GS. */ |
Marek Olšák | fdf2c04 | 2015-02-22 17:42:20 +0100 | [diff] [blame] | 434 | static void si_emit_rasterizer_prim_state(struct si_context *sctx) |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 435 | { |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 436 | struct radeon_winsys_cs *cs = sctx->b.gfx.cs; |
Marek Olšák | fdf2c04 | 2015-02-22 17:42:20 +0100 | [diff] [blame] | 437 | unsigned rast_prim = sctx->current_rast_prim; |
Marek Olšák | 1f4bb38 | 2015-03-15 19:21:31 +0100 | [diff] [blame] | 438 | struct si_state_rasterizer *rs = sctx->emitted.named.rasterizer; |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 439 | |
Marek Olšák | 567c8d7 | 2015-03-15 19:24:13 +0100 | [diff] [blame] | 440 | /* Skip this if not rendering lines. */ |
| 441 | if (rast_prim != PIPE_PRIM_LINES && |
| 442 | rast_prim != PIPE_PRIM_LINE_LOOP && |
| 443 | rast_prim != PIPE_PRIM_LINE_STRIP && |
| 444 | rast_prim != PIPE_PRIM_LINES_ADJACENCY && |
| 445 | rast_prim != PIPE_PRIM_LINE_STRIP_ADJACENCY) |
| 446 | return; |
| 447 | |
Marek Olšák | 1f4bb38 | 2015-03-15 19:21:31 +0100 | [diff] [blame] | 448 | if (rast_prim == sctx->last_rast_prim && |
| 449 | rs->pa_sc_line_stipple == sctx->last_sc_line_stipple) |
Marek Olšák | 3291eed | 2014-12-08 13:35:36 +0100 | [diff] [blame] | 450 | return; |
| 451 | |
Marek Olšák | 4b11ef2 | 2016-06-28 13:04:07 +0200 | [diff] [blame] | 452 | /* For lines, reset the stipple pattern at each primitive. Otherwise, |
| 453 | * reset the stipple pattern at each packet (line strips, line loops). |
| 454 | */ |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 455 | radeon_set_context_reg(cs, R_028A0C_PA_SC_LINE_STIPPLE, |
Marek Olšák | 1f4bb38 | 2015-03-15 19:21:31 +0100 | [diff] [blame] | 456 | rs->pa_sc_line_stipple | |
Marek Olšák | 4b11ef2 | 2016-06-28 13:04:07 +0200 | [diff] [blame] | 457 | S_028A0C_AUTO_RESET_CNTL(rast_prim == PIPE_PRIM_LINES ? 1 : 2)); |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 458 | |
Marek Olšák | 1fe7ba8 | 2015-01-31 20:09:46 +0100 | [diff] [blame] | 459 | sctx->last_rast_prim = rast_prim; |
Marek Olšák | 1f4bb38 | 2015-03-15 19:21:31 +0100 | [diff] [blame] | 460 | sctx->last_sc_line_stipple = rs->pa_sc_line_stipple; |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 461 | } |
| 462 | |
| 463 | static void si_emit_draw_registers(struct si_context *sctx, |
Marek Olšák | fdf2c04 | 2015-02-22 17:42:20 +0100 | [diff] [blame] | 464 | const struct pipe_draw_info *info) |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 465 | { |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 466 | struct radeon_winsys_cs *cs = sctx->b.gfx.cs; |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 467 | unsigned prim = si_conv_pipe_prim(info->mode); |
Marek Olšák | fdf2c04 | 2015-02-22 17:42:20 +0100 | [diff] [blame] | 468 | unsigned gs_out_prim = si_conv_prim_to_gs_out(sctx->current_rast_prim); |
Marek Olšák | 3ee9be4 | 2016-09-30 22:37:14 +0200 | [diff] [blame] | 469 | unsigned ia_multi_vgt_param, num_patches = 0; |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 470 | |
Marek Olšák | 28d0d0c | 2016-06-24 02:17:38 +0200 | [diff] [blame] | 471 | /* Polaris needs different VTX_REUSE_DEPTH settings depending on |
| 472 | * whether the "fractional odd" tessellation spacing is used. |
| 473 | */ |
| 474 | if (sctx->b.family >= CHIP_POLARIS10) { |
| 475 | struct si_shader_selector *tes = sctx->tes_shader.cso; |
| 476 | unsigned vtx_reuse_depth = 30; |
| 477 | |
| 478 | if (tes && |
| 479 | tes->info.properties[TGSI_PROPERTY_TES_SPACING] == |
| 480 | PIPE_TESS_SPACING_FRACTIONAL_ODD) |
| 481 | vtx_reuse_depth = 14; |
| 482 | |
| 483 | if (vtx_reuse_depth != sctx->last_vtx_reuse_depth) { |
| 484 | radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, |
| 485 | vtx_reuse_depth); |
| 486 | sctx->last_vtx_reuse_depth = vtx_reuse_depth; |
| 487 | } |
| 488 | } |
| 489 | |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 490 | if (sctx->tes_shader.cso) |
Marek Olšák | 74c1001 | 2015-02-22 18:01:18 +0100 | [diff] [blame] | 491 | si_emit_derived_tess_state(sctx, info, &num_patches); |
| 492 | |
Marek Olšák | 09d02fa | 2015-02-22 18:06:34 +0100 | [diff] [blame] | 493 | ia_multi_vgt_param = si_get_ia_multi_vgt_param(sctx, info, num_patches); |
| 494 | |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 495 | /* Draw state. */ |
Marek Olšák | 82e51e8 | 2016-09-30 22:47:20 +0200 | [diff] [blame] | 496 | if (ia_multi_vgt_param != sctx->last_multi_vgt_param) { |
| 497 | if (sctx->b.chip_class >= CIK) |
Nicolai Hähnle | 0da890e | 2016-06-24 01:11:09 +0200 | [diff] [blame] | 498 | radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); |
Marek Olšák | 82e51e8 | 2016-09-30 22:47:20 +0200 | [diff] [blame] | 499 | else |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 500 | radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); |
Marek Olšák | 82e51e8 | 2016-09-30 22:47:20 +0200 | [diff] [blame] | 501 | |
| 502 | sctx->last_multi_vgt_param = ia_multi_vgt_param; |
| 503 | } |
| 504 | if (prim != sctx->last_prim) { |
| 505 | if (sctx->b.chip_class >= CIK) |
| 506 | radeon_set_uconfig_reg_idx(cs, R_030908_VGT_PRIMITIVE_TYPE, 1, prim); |
| 507 | else |
| 508 | radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, prim); |
Nicolai Hähnle | 0da890e | 2016-06-24 01:11:09 +0200 | [diff] [blame] | 509 | |
Marek Olšák | 834bee4 | 2014-12-07 20:23:56 +0100 | [diff] [blame] | 510 | sctx->last_prim = prim; |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 511 | } |
| 512 | |
Marek Olšák | 6fde194 | 2014-12-07 20:15:49 +0100 | [diff] [blame] | 513 | if (gs_out_prim != sctx->last_gs_out_prim) { |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 514 | radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim); |
Marek Olšák | 6fde194 | 2014-12-07 20:15:49 +0100 | [diff] [blame] | 515 | sctx->last_gs_out_prim = gs_out_prim; |
| 516 | } |
Marek Olšák | 3435013 | 2014-12-07 20:14:41 +0100 | [diff] [blame] | 517 | |
| 518 | /* Primitive restart. */ |
| 519 | if (info->primitive_restart != sctx->last_primitive_restart_en) { |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 520 | radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart); |
Marek Olšák | 3435013 | 2014-12-07 20:14:41 +0100 | [diff] [blame] | 521 | sctx->last_primitive_restart_en = info->primitive_restart; |
| 522 | |
James Legg | e33f31d | 2016-10-04 14:30:11 +0100 | [diff] [blame] | 523 | } |
| 524 | if (info->primitive_restart && |
| 525 | (info->restart_index != sctx->last_restart_index || |
| 526 | sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN)) { |
| 527 | radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, |
| 528 | info->restart_index); |
| 529 | sctx->last_restart_index = info->restart_index; |
Marek Olšák | 3435013 | 2014-12-07 20:14:41 +0100 | [diff] [blame] | 530 | } |
Marek Olšák | ca7f1cf | 2014-12-07 16:40:09 +0100 | [diff] [blame] | 531 | } |
| 532 | |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 533 | static void si_emit_draw_packets(struct si_context *sctx, |
| 534 | const struct pipe_draw_info *info, |
| 535 | const struct pipe_index_buffer *ib) |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 536 | { |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 537 | struct radeon_winsys_cs *cs = sctx->b.gfx.cs; |
Marek Olšák | 3ce91c7 | 2014-09-15 23:34:28 +0200 | [diff] [blame] | 538 | unsigned sh_base_reg = sctx->shader_userdata.sh_base[PIPE_SHADER_VERTEX]; |
Marek Olšák | eb0d3e8 | 2015-11-07 16:30:01 +0100 | [diff] [blame] | 539 | bool render_cond_bit = sctx->b.render_cond && !sctx->b.render_cond_force_off; |
Nicolai Hähnle | 64ff23a | 2016-07-29 17:56:21 +0100 | [diff] [blame] | 540 | uint32_t index_max_size = 0; |
| 541 | uint64_t index_va = 0; |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 542 | |
Marek Olšák | 9d16e70 | 2013-08-26 18:17:09 +0200 | [diff] [blame] | 543 | if (info->count_from_stream_output) { |
| 544 | struct r600_so_target *t = |
| 545 | (struct r600_so_target*)info->count_from_stream_output; |
Marek Olšák | 1c03a69 | 2014-08-06 22:29:27 +0200 | [diff] [blame] | 546 | uint64_t va = t->buf_filled_size->gpu_address + |
| 547 | t->buf_filled_size_offset; |
Marek Olšák | 9d16e70 | 2013-08-26 18:17:09 +0200 | [diff] [blame] | 548 | |
Marek Olšák | d2e63ac | 2015-08-30 01:54:00 +0200 | [diff] [blame] | 549 | radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 550 | t->stride_in_dw); |
Marek Olšák | 9d16e70 | 2013-08-26 18:17:09 +0200 | [diff] [blame] | 551 | |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 552 | radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); |
| 553 | radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) | |
| 554 | COPY_DATA_DST_SEL(COPY_DATA_REG) | |
| 555 | COPY_DATA_WR_CONFIRM); |
| 556 | radeon_emit(cs, va); /* src address lo */ |
| 557 | radeon_emit(cs, va >> 32); /* src address hi */ |
| 558 | radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); |
| 559 | radeon_emit(cs, 0); /* unused */ |
| 560 | |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 561 | radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 562 | t->buf_filled_size, RADEON_USAGE_READ, |
Marek Olšák | 2edb060 | 2015-09-26 23:18:55 +0200 | [diff] [blame] | 563 | RADEON_PRIO_SO_FILLED_SIZE); |
Marek Olšák | 9d16e70 | 2013-08-26 18:17:09 +0200 | [diff] [blame] | 564 | } |
| 565 | |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 566 | /* draw packet */ |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 567 | if (info->indexed) { |
Marek Olšák | fe40a65 | 2016-09-06 00:35:12 +0200 | [diff] [blame] | 568 | if (ib->index_size != sctx->last_index_size) { |
| 569 | radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 570 | |
Marek Olšák | fe40a65 | 2016-09-06 00:35:12 +0200 | [diff] [blame] | 571 | /* index type */ |
| 572 | switch (ib->index_size) { |
| 573 | case 1: |
| 574 | radeon_emit(cs, V_028A7C_VGT_INDEX_8); |
| 575 | break; |
| 576 | case 2: |
| 577 | radeon_emit(cs, V_028A7C_VGT_INDEX_16 | |
| 578 | (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ? |
| 579 | V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); |
| 580 | break; |
| 581 | case 4: |
| 582 | radeon_emit(cs, V_028A7C_VGT_INDEX_32 | |
| 583 | (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ? |
| 584 | V_028A7C_VGT_DMA_SWAP_32_BIT : 0)); |
| 585 | break; |
| 586 | default: |
| 587 | assert(!"unreachable"); |
| 588 | return; |
| 589 | } |
| 590 | |
| 591 | sctx->last_index_size = ib->index_size; |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 592 | } |
Nicolai Hähnle | 64ff23a | 2016-07-29 17:56:21 +0100 | [diff] [blame] | 593 | |
| 594 | index_max_size = (ib->buffer->width0 - ib->offset) / |
| 595 | ib->index_size; |
| 596 | index_va = r600_resource(ib->buffer)->gpu_address + ib->offset; |
| 597 | |
Nicolai Hähnle | 64ff23a | 2016-07-29 17:56:21 +0100 | [diff] [blame] | 598 | radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, |
| 599 | (struct r600_resource *)ib->buffer, |
| 600 | RADEON_USAGE_READ, RADEON_PRIO_INDEX_BUFFER); |
Marek Olšák | fe40a65 | 2016-09-06 00:35:12 +0200 | [diff] [blame] | 601 | } else { |
| 602 | /* On CI and later, non-indexed draws overwrite VGT_INDEX_TYPE, |
| 603 | * so the state must be re-emitted before the next indexed draw. |
| 604 | */ |
| 605 | if (sctx->b.chip_class >= CIK) |
| 606 | sctx->last_index_size = -1; |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 607 | } |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 608 | |
Marek Olšák | 09056b3 | 2014-04-23 16:15:36 +0200 | [diff] [blame] | 609 | if (!info->indirect) { |
Marek Olšák | 3382036 | 2014-12-07 20:04:40 +0100 | [diff] [blame] | 610 | int base_vertex; |
| 611 | |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 612 | radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, 0)); |
| 613 | radeon_emit(cs, info->instance_count); |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 614 | |
Marek Olšák | 3382036 | 2014-12-07 20:04:40 +0100 | [diff] [blame] | 615 | /* Base vertex and start instance. */ |
| 616 | base_vertex = info->indexed ? info->index_bias : info->start; |
| 617 | |
| 618 | if (base_vertex != sctx->last_base_vertex || |
| 619 | sctx->last_base_vertex == SI_BASE_VERTEX_UNKNOWN || |
| 620 | info->start_instance != sctx->last_start_instance || |
Nicolai Hähnle | b6c71d3 | 2016-08-08 15:54:50 +0200 | [diff] [blame] | 621 | info->drawid != sctx->last_drawid || |
Marek Olšák | 3382036 | 2014-12-07 20:04:40 +0100 | [diff] [blame] | 622 | sh_base_reg != sctx->last_sh_base_reg) { |
Nicolai Hähnle | b6c71d3 | 2016-08-08 15:54:50 +0200 | [diff] [blame] | 623 | radeon_set_sh_reg_seq(cs, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, 3); |
Marek Olšák | 3382036 | 2014-12-07 20:04:40 +0100 | [diff] [blame] | 624 | radeon_emit(cs, base_vertex); |
| 625 | radeon_emit(cs, info->start_instance); |
Nicolai Hähnle | b6c71d3 | 2016-08-08 15:54:50 +0200 | [diff] [blame] | 626 | radeon_emit(cs, info->drawid); |
Marek Olšák | 3382036 | 2014-12-07 20:04:40 +0100 | [diff] [blame] | 627 | |
| 628 | sctx->last_base_vertex = base_vertex; |
| 629 | sctx->last_start_instance = info->start_instance; |
Nicolai Hähnle | b6c71d3 | 2016-08-08 15:54:50 +0200 | [diff] [blame] | 630 | sctx->last_drawid = info->drawid; |
Marek Olšák | 3382036 | 2014-12-07 20:04:40 +0100 | [diff] [blame] | 631 | sctx->last_sh_base_reg = sh_base_reg; |
| 632 | } |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 633 | } else { |
Nicolai Hähnle | cf7d18b | 2016-07-29 17:51:23 +0100 | [diff] [blame] | 634 | uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; |
| 635 | |
| 636 | assert(indirect_va % 8 == 0); |
| 637 | |
Marek Olšák | 3382036 | 2014-12-07 20:04:40 +0100 | [diff] [blame] | 638 | si_invalidate_draw_sh_constants(sctx); |
| 639 | |
Nicolai Hähnle | cf7d18b | 2016-07-29 17:51:23 +0100 | [diff] [blame] | 640 | radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); |
| 641 | radeon_emit(cs, 1); |
| 642 | radeon_emit(cs, indirect_va); |
| 643 | radeon_emit(cs, indirect_va >> 32); |
| 644 | |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 645 | radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 646 | (struct r600_resource *)info->indirect, |
Marek Olšák | 2edb060 | 2015-09-26 23:18:55 +0200 | [diff] [blame] | 647 | RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); |
Marek Olšák | 09056b3 | 2014-04-23 16:15:36 +0200 | [diff] [blame] | 648 | } |
| 649 | |
Nicolai Hähnle | 5c343cc | 2016-07-29 18:05:30 +0100 | [diff] [blame] | 650 | if (info->indirect) { |
| 651 | unsigned di_src_sel = info->indexed ? V_0287F0_DI_SRC_SEL_DMA |
| 652 | : V_0287F0_DI_SRC_SEL_AUTO_INDEX; |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 653 | |
Nicolai Hähnle | 5c343cc | 2016-07-29 18:05:30 +0100 | [diff] [blame] | 654 | assert(info->indirect_offset % 4 == 0); |
| 655 | |
| 656 | if (info->indexed) { |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 657 | radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); |
| 658 | radeon_emit(cs, index_va); |
| 659 | radeon_emit(cs, index_va >> 32); |
| 660 | |
| 661 | radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); |
| 662 | radeon_emit(cs, index_max_size); |
Nicolai Hähnle | 5c343cc | 2016-07-29 18:05:30 +0100 | [diff] [blame] | 663 | } |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 664 | |
Nicolai Hähnle | 96bbb62 | 2016-07-29 17:59:11 +0100 | [diff] [blame] | 665 | if (!sctx->screen->has_draw_indirect_multi) { |
Nicolai Hähnle | 5c343cc | 2016-07-29 18:05:30 +0100 | [diff] [blame] | 666 | radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT |
| 667 | : PKT3_DRAW_INDIRECT, |
| 668 | 3, render_cond_bit)); |
| 669 | radeon_emit(cs, info->indirect_offset); |
| 670 | radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); |
| 671 | radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); |
| 672 | radeon_emit(cs, di_src_sel); |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 673 | } else { |
Nicolai Hähnle | 6d7177f | 2016-08-08 16:00:29 +0200 | [diff] [blame] | 674 | uint64_t count_va = 0; |
| 675 | |
| 676 | if (info->indirect_params) { |
| 677 | struct r600_resource *params_buf = |
| 678 | (struct r600_resource *)info->indirect_params; |
| 679 | |
| 680 | radeon_add_to_buffer_list( |
| 681 | &sctx->b, &sctx->b.gfx, params_buf, |
| 682 | RADEON_USAGE_READ, RADEON_PRIO_DRAW_INDIRECT); |
| 683 | |
| 684 | count_va = params_buf->gpu_address + info->indirect_params_offset; |
| 685 | } |
| 686 | |
Nicolai Hähnle | 5c343cc | 2016-07-29 18:05:30 +0100 | [diff] [blame] | 687 | radeon_emit(cs, PKT3(info->indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : |
| 688 | PKT3_DRAW_INDIRECT_MULTI, |
| 689 | 8, render_cond_bit)); |
| 690 | radeon_emit(cs, info->indirect_offset); |
| 691 | radeon_emit(cs, (sh_base_reg + SI_SGPR_BASE_VERTEX * 4 - SI_SH_REG_OFFSET) >> 2); |
| 692 | radeon_emit(cs, (sh_base_reg + SI_SGPR_START_INSTANCE * 4 - SI_SH_REG_OFFSET) >> 2); |
Nicolai Hähnle | b6c71d3 | 2016-08-08 15:54:50 +0200 | [diff] [blame] | 693 | radeon_emit(cs, ((sh_base_reg + SI_SGPR_DRAWID * 4 - SI_SH_REG_OFFSET) >> 2) | |
Nicolai Hähnle | 6d7177f | 2016-08-08 16:00:29 +0200 | [diff] [blame] | 694 | S_2C3_DRAW_INDEX_ENABLE(1) | |
| 695 | S_2C3_COUNT_INDIRECT_ENABLE(!!info->indirect_params)); |
| 696 | radeon_emit(cs, info->indirect_count); |
| 697 | radeon_emit(cs, count_va); |
| 698 | radeon_emit(cs, count_va >> 32); |
| 699 | radeon_emit(cs, info->indirect_stride); |
Nicolai Hähnle | 5c343cc | 2016-07-29 18:05:30 +0100 | [diff] [blame] | 700 | radeon_emit(cs, di_src_sel); |
| 701 | } |
| 702 | } else { |
| 703 | if (info->indexed) { |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 704 | index_va += info->start * ib->index_size; |
| 705 | |
Marek Olšák | 6eff541 | 2015-11-07 14:45:58 +0100 | [diff] [blame] | 706 | radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit)); |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 707 | radeon_emit(cs, index_max_size); |
| 708 | radeon_emit(cs, index_va); |
| 709 | radeon_emit(cs, (index_va >> 32UL) & 0xFF); |
| 710 | radeon_emit(cs, info->count); |
| 711 | radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 712 | } else { |
Marek Olšák | 6eff541 | 2015-11-07 14:45:58 +0100 | [diff] [blame] | 713 | radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, render_cond_bit)); |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 714 | radeon_emit(cs, info->count); |
| 715 | radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | |
Nicolai Hähnle | 5c343cc | 2016-07-29 18:05:30 +0100 | [diff] [blame] | 716 | S_0287F0_USE_OPAQUE(!!info->count_from_stream_output)); |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 717 | } |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 718 | } |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 719 | } |
| 720 | |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 721 | static void si_emit_surface_sync(struct r600_common_context *rctx, |
| 722 | unsigned cp_coher_cntl) |
| 723 | { |
| 724 | struct radeon_winsys_cs *cs = rctx->gfx.cs; |
| 725 | |
| 726 | /* ACQUIRE_MEM is only required on a compute ring. */ |
| 727 | radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0)); |
| 728 | radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */ |
| 729 | radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */ |
| 730 | radeon_emit(cs, 0); /* CP_COHER_BASE */ |
| 731 | radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */ |
| 732 | } |
| 733 | |
Marek Olšák | a67d815 | 2016-09-08 00:59:55 +0200 | [diff] [blame] | 734 | void si_emit_cache_flush(struct si_context *sctx) |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 735 | { |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 736 | struct r600_common_context *rctx = &sctx->b; |
| 737 | struct radeon_winsys_cs *cs = rctx->gfx.cs; |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 738 | uint32_t cp_coher_cntl = 0; |
| 739 | |
Marek Olšák | 5871ebd | 2016-12-25 19:48:55 +0100 | [diff] [blame] | 740 | if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER) |
| 741 | sctx->b.num_fb_cache_flushes++; |
| 742 | |
Marek Olšák | 73c2b0d | 2014-12-28 23:11:38 +0100 | [diff] [blame] | 743 | /* SI has a bug that it always flushes ICACHE and KCACHE if either |
Marek Olšák | 7692704 | 2015-02-19 13:03:54 +0100 | [diff] [blame] | 744 | * bit is set. An alternative way is to write SQC_CACHES, but that |
| 745 | * doesn't seem to work reliably. Since the bug doesn't affect |
| 746 | * correctness (it only does more work than necessary) and |
| 747 | * the performance impact is likely negligible, there is no plan |
Marek Olšák | 3faecdd | 2016-04-17 15:34:24 +0200 | [diff] [blame] | 748 | * to add a workaround for it. |
Marek Olšák | 7692704 | 2015-02-19 13:03:54 +0100 | [diff] [blame] | 749 | */ |
| 750 | |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 751 | if (rctx->flags & SI_CONTEXT_INV_ICACHE) |
Marek Olšák | 7692704 | 2015-02-19 13:03:54 +0100 | [diff] [blame] | 752 | cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1); |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 753 | if (rctx->flags & SI_CONTEXT_INV_SMEM_L1) |
Marek Olšák | 7692704 | 2015-02-19 13:03:54 +0100 | [diff] [blame] | 754 | cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1); |
Marek Olšák | 2bfe9d4 | 2014-12-29 14:02:46 +0100 | [diff] [blame] | 755 | |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 756 | if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) { |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 757 | cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | |
| 758 | S_0085F0_CB0_DEST_BASE_ENA(1) | |
| 759 | S_0085F0_CB1_DEST_BASE_ENA(1) | |
| 760 | S_0085F0_CB2_DEST_BASE_ENA(1) | |
| 761 | S_0085F0_CB3_DEST_BASE_ENA(1) | |
| 762 | S_0085F0_CB4_DEST_BASE_ENA(1) | |
| 763 | S_0085F0_CB5_DEST_BASE_ENA(1) | |
| 764 | S_0085F0_CB6_DEST_BASE_ENA(1) | |
| 765 | S_0085F0_CB7_DEST_BASE_ENA(1); |
Bas Nieuwenhuizen | 81ebd6a | 2015-10-21 00:10:38 +0200 | [diff] [blame] | 766 | |
| 767 | /* Necessary for DCC */ |
Marek Olšák | dc6bbe2 | 2016-10-03 15:37:19 +0200 | [diff] [blame] | 768 | if (rctx->chip_class == VI) |
| 769 | r600_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS, |
| 770 | 0, 0, NULL, 0, 0, 0); |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 771 | } |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 772 | if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) { |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 773 | cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) | |
| 774 | S_0085F0_DB_DEST_BASE_ENA(1); |
| 775 | } |
| 776 | |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 777 | if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB_META) { |
Bas Nieuwenhuizen | 41d79bc | 2016-04-02 11:37:06 +0200 | [diff] [blame] | 778 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 779 | radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0)); |
Marek Olšák | 58494b42 | 2016-04-17 16:14:32 +0200 | [diff] [blame] | 780 | /* needed for wait for idle in SURFACE_SYNC */ |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 781 | assert(rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB); |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 782 | } |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 783 | if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB_META) { |
Bas Nieuwenhuizen | 41d79bc | 2016-04-02 11:37:06 +0200 | [diff] [blame] | 784 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 785 | radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0)); |
Marek Olšák | 58494b42 | 2016-04-17 16:14:32 +0200 | [diff] [blame] | 786 | /* needed for wait for idle in SURFACE_SYNC */ |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 787 | assert(rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB); |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 788 | } |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 789 | |
Marek Olšák | 1db5678 | 2016-04-17 16:18:54 +0200 | [diff] [blame] | 790 | /* Wait for shader engines to go idle. |
| 791 | * VS and PS waits are unnecessary if SURFACE_SYNC is going to wait |
| 792 | * for everything including CB/DB cache flushes. |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 793 | */ |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 794 | if (!(rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_CB | |
Marek Olšák | 1db5678 | 2016-04-17 16:18:54 +0200 | [diff] [blame] | 795 | SI_CONTEXT_FLUSH_AND_INV_DB))) { |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 796 | if (rctx->flags & SI_CONTEXT_PS_PARTIAL_FLUSH) { |
Bas Nieuwenhuizen | 41d79bc | 2016-04-02 11:37:06 +0200 | [diff] [blame] | 797 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
Marek Olšák | 1db5678 | 2016-04-17 16:18:54 +0200 | [diff] [blame] | 798 | radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4)); |
Marek Olšák | addca75 | 2016-08-23 15:17:35 +0200 | [diff] [blame] | 799 | /* Only count explicit shader flushes, not implicit ones |
| 800 | * done by SURFACE_SYNC. |
| 801 | */ |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 802 | rctx->num_vs_flushes++; |
| 803 | rctx->num_ps_flushes++; |
| 804 | } else if (rctx->flags & SI_CONTEXT_VS_PARTIAL_FLUSH) { |
Bas Nieuwenhuizen | 41d79bc | 2016-04-02 11:37:06 +0200 | [diff] [blame] | 805 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
Marek Olšák | 1db5678 | 2016-04-17 16:18:54 +0200 | [diff] [blame] | 806 | radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4)); |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 807 | rctx->num_vs_flushes++; |
Marek Olšák | 1db5678 | 2016-04-17 16:18:54 +0200 | [diff] [blame] | 808 | } |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 809 | } |
Marek Olšák | addca75 | 2016-08-23 15:17:35 +0200 | [diff] [blame] | 810 | |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 811 | if (rctx->flags & SI_CONTEXT_CS_PARTIAL_FLUSH && |
| 812 | sctx->compute_is_busy) { |
Bas Nieuwenhuizen | 41d79bc | 2016-04-02 11:37:06 +0200 | [diff] [blame] | 813 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 814 | radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH | EVENT_INDEX(4))); |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 815 | rctx->num_cs_flushes++; |
| 816 | sctx->compute_is_busy = false; |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 817 | } |
Marek Olšák | 1db5678 | 2016-04-17 16:18:54 +0200 | [diff] [blame] | 818 | |
| 819 | /* VGT state synchronization. */ |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 820 | if (rctx->flags & SI_CONTEXT_VGT_FLUSH) { |
Bas Nieuwenhuizen | 41d79bc | 2016-04-02 11:37:06 +0200 | [diff] [blame] | 821 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 822 | radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0)); |
| 823 | } |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 824 | if (rctx->flags & SI_CONTEXT_VGT_STREAMOUT_SYNC) { |
Bas Nieuwenhuizen | 41d79bc | 2016-04-02 11:37:06 +0200 | [diff] [blame] | 825 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 826 | radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_STREAMOUT_SYNC) | EVENT_INDEX(0)); |
| 827 | } |
| 828 | |
Marek Olšák | dd9ca77 | 2016-04-17 17:28:25 +0200 | [diff] [blame] | 829 | /* Make sure ME is idle (it executes most packets) before continuing. |
| 830 | * This prevents read-after-write hazards between PFP and ME. |
| 831 | */ |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 832 | if (cp_coher_cntl || |
| 833 | (rctx->flags & (SI_CONTEXT_CS_PARTIAL_FLUSH | |
| 834 | SI_CONTEXT_INV_VMEM_L1 | |
| 835 | SI_CONTEXT_INV_GLOBAL_L2 | |
| 836 | SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { |
Marek Olšák | dd9ca77 | 2016-04-17 17:28:25 +0200 | [diff] [blame] | 837 | radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); |
| 838 | radeon_emit(cs, 0); |
| 839 | } |
| 840 | |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 841 | /* When one of the CP_COHER_CNTL.DEST_BASE flags is set, SURFACE_SYNC |
| 842 | * waits for idle. Therefore, it should be last. SURFACE_SYNC is done |
| 843 | * in PFP. |
| 844 | * |
| 845 | * cp_coher_cntl should contain all necessary flags except TC flags |
| 846 | * at this point. |
| 847 | * |
| 848 | * SI-CIK don't support L2 write-back. |
Marek Olšák | d8185aa | 2014-12-30 18:41:25 +0100 | [diff] [blame] | 849 | */ |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 850 | if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 || |
| 851 | (rctx->chip_class <= CIK && |
| 852 | (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) { |
Marek Olšák | e405d0d | 2017-01-20 01:13:39 +0100 | [diff] [blame] | 853 | /* Invalidate L1 & L2. (L1 is always invalidated on SI) |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 854 | * WB must be set on VI+ when TC_ACTION is set. |
| 855 | */ |
| 856 | si_emit_surface_sync(rctx, cp_coher_cntl | |
| 857 | S_0085F0_TC_ACTION_ENA(1) | |
Marek Olšák | e405d0d | 2017-01-20 01:13:39 +0100 | [diff] [blame] | 858 | S_0085F0_TCL1_ACTION_ENA(1) | |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 859 | S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI)); |
| 860 | cp_coher_cntl = 0; |
Marek Olšák | 5871ebd | 2016-12-25 19:48:55 +0100 | [diff] [blame] | 861 | sctx->b.num_L2_invalidates++; |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 862 | } else { |
| 863 | /* L1 invalidation and L2 writeback must be done separately, |
| 864 | * because both operations can't be done together. |
| 865 | */ |
| 866 | if (rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2) { |
| 867 | /* WB = write-back |
| 868 | * NC = apply to non-coherent MTYPEs |
| 869 | * (i.e. MTYPE <= 1, which is what we use everywhere) |
| 870 | * |
| 871 | * WB doesn't work without NC. |
| 872 | */ |
| 873 | si_emit_surface_sync(rctx, cp_coher_cntl | |
| 874 | S_0301F0_TC_WB_ACTION_ENA(1) | |
| 875 | S_0301F0_TC_NC_ACTION_ENA(1)); |
| 876 | cp_coher_cntl = 0; |
Marek Olšák | 5871ebd | 2016-12-25 19:48:55 +0100 | [diff] [blame] | 877 | sctx->b.num_L2_writebacks++; |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 878 | } |
| 879 | if (rctx->flags & SI_CONTEXT_INV_VMEM_L1) { |
| 880 | /* Invalidate per-CU VMEM L1. */ |
| 881 | si_emit_surface_sync(rctx, cp_coher_cntl | |
| 882 | S_0085F0_TCL1_ACTION_ENA(1)); |
| 883 | cp_coher_cntl = 0; |
| 884 | } |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 885 | } |
| 886 | |
Marek Olšák | 8cdce30 | 2016-10-10 18:49:22 +0200 | [diff] [blame] | 887 | /* If TC flushes haven't cleared this... */ |
| 888 | if (cp_coher_cntl) |
| 889 | si_emit_surface_sync(rctx, cp_coher_cntl); |
| 890 | |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 891 | if (rctx->flags & R600_CONTEXT_START_PIPELINE_STATS) { |
Marek Olšák | f3eebb84e | 2016-04-07 02:59:09 +0200 | [diff] [blame] | 892 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
| 893 | radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_START) | |
| 894 | EVENT_INDEX(0)); |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 895 | } else if (rctx->flags & R600_CONTEXT_STOP_PIPELINE_STATS) { |
Marek Olšák | f3eebb84e | 2016-04-07 02:59:09 +0200 | [diff] [blame] | 896 | radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); |
| 897 | radeon_emit(cs, EVENT_TYPE(V_028A90_PIPELINESTAT_STOP) | |
| 898 | EVENT_INDEX(0)); |
| 899 | } |
| 900 | |
Marek Olšák | 22cb5ae | 2016-08-25 14:08:24 +0200 | [diff] [blame] | 901 | rctx->flags = 0; |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 902 | } |
| 903 | |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 904 | static void si_get_draw_start_count(struct si_context *sctx, |
| 905 | const struct pipe_draw_info *info, |
| 906 | unsigned *start, unsigned *count) |
| 907 | { |
| 908 | if (info->indirect) { |
Nicolai Hähnle | 7cb3535 | 2017-02-20 10:46:13 +0100 | [diff] [blame^] | 909 | unsigned indirect_count; |
| 910 | struct pipe_transfer *transfer; |
| 911 | unsigned begin, end; |
| 912 | unsigned map_size; |
| 913 | unsigned *data; |
| 914 | |
| 915 | if (info->indirect_params) { |
| 916 | data = pipe_buffer_map_range(&sctx->b.b, |
| 917 | info->indirect_params, |
| 918 | info->indirect_params_offset, |
| 919 | sizeof(unsigned), |
| 920 | PIPE_TRANSFER_READ, &transfer); |
| 921 | |
| 922 | indirect_count = *data; |
| 923 | |
| 924 | pipe_buffer_unmap(&sctx->b.b, transfer); |
| 925 | } else { |
| 926 | indirect_count = info->indirect_count; |
| 927 | } |
| 928 | |
| 929 | if (!indirect_count) { |
| 930 | *start = *count = 0; |
| 931 | return; |
| 932 | } |
| 933 | |
| 934 | map_size = (indirect_count - 1) * info->indirect_stride + 3 * sizeof(unsigned); |
| 935 | data = pipe_buffer_map_range(&sctx->b.b, info->indirect, |
| 936 | info->indirect_offset, map_size, |
| 937 | PIPE_TRANSFER_READ, &transfer); |
| 938 | |
| 939 | begin = UINT_MAX; |
| 940 | end = 0; |
| 941 | |
| 942 | for (unsigned i = 0; i < indirect_count; ++i) { |
| 943 | unsigned count = data[0]; |
| 944 | unsigned start = data[2]; |
| 945 | |
| 946 | if (count > 0) { |
| 947 | begin = MIN2(begin, start); |
| 948 | end = MAX2(end, start + count); |
| 949 | } |
| 950 | |
| 951 | data += info->indirect_stride / sizeof(unsigned); |
| 952 | } |
| 953 | |
| 954 | pipe_buffer_unmap(&sctx->b.b, transfer); |
| 955 | |
| 956 | if (begin < end) { |
| 957 | *start = begin; |
| 958 | *count = end - begin; |
| 959 | } else { |
| 960 | *start = *count = 0; |
| 961 | } |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 962 | } else { |
| 963 | *start = info->start; |
| 964 | *count = info->count; |
| 965 | } |
| 966 | } |
| 967 | |
Bas Nieuwenhuizen | 6c833ba | 2016-04-19 13:52:32 +0200 | [diff] [blame] | 968 | void si_ce_pre_draw_synchronization(struct si_context *sctx) |
Bas Nieuwenhuizen | 86c71ff | 2016-03-10 21:01:39 +0100 | [diff] [blame] | 969 | { |
| 970 | if (sctx->ce_need_synchronization) { |
| 971 | radeon_emit(sctx->ce_ib, PKT3(PKT3_INCREMENT_CE_COUNTER, 0, 0)); |
| 972 | radeon_emit(sctx->ce_ib, 1); |
| 973 | |
| 974 | radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_WAIT_ON_CE_COUNTER, 0, 0)); |
| 975 | radeon_emit(sctx->b.gfx.cs, 1); |
| 976 | } |
| 977 | } |
| 978 | |
Bas Nieuwenhuizen | 6c833ba | 2016-04-19 13:52:32 +0200 | [diff] [blame] | 979 | void si_ce_post_draw_synchronization(struct si_context *sctx) |
Bas Nieuwenhuizen | 86c71ff | 2016-03-10 21:01:39 +0100 | [diff] [blame] | 980 | { |
| 981 | if (sctx->ce_need_synchronization) { |
| 982 | radeon_emit(sctx->b.gfx.cs, PKT3(PKT3_INCREMENT_DE_COUNTER, 0, 0)); |
| 983 | radeon_emit(sctx->b.gfx.cs, 0); |
| 984 | |
| 985 | sctx->ce_need_synchronization = false; |
| 986 | } |
| 987 | } |
| 988 | |
Marek Olšák | ece6e1f | 2016-12-25 18:11:59 +0100 | [diff] [blame] | 989 | static void cik_prefetch_shader_async(struct si_context *sctx, |
| 990 | struct si_pm4_state *state) |
| 991 | { |
| 992 | if (state) { |
| 993 | struct pipe_resource *bo = &state->bo[0]->b.b; |
| 994 | assert(state->nbo == 1); |
| 995 | |
| 996 | cik_prefetch_TC_L2_async(sctx, bo, 0, bo->width0); |
| 997 | } |
| 998 | } |
| 999 | |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 1000 | void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1001 | { |
Andreas Hartmetz | 8662e66 | 2014-01-11 16:00:50 +0100 | [diff] [blame] | 1002 | struct si_context *sctx = (struct si_context *)ctx; |
Marek Olšák | 50bb2de | 2015-10-22 22:18:49 +0200 | [diff] [blame] | 1003 | struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1004 | struct pipe_index_buffer ib = {}; |
Marek Olšák | c15a9de | 2016-08-02 11:51:21 +0200 | [diff] [blame] | 1005 | unsigned mask, dirty_fb_counter, dirty_tex_counter, rast_prim; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1006 | |
Marek Olšák | a8e7ea6 | 2016-09-06 00:09:17 +0200 | [diff] [blame] | 1007 | if (likely(!info->indirect)) { |
| 1008 | /* SI-CI treat instance_count==0 as instance_count==1. There is |
| 1009 | * no workaround for indirect draws, but we can at least skip |
| 1010 | * direct draws. |
| 1011 | */ |
| 1012 | if (unlikely(!info->instance_count)) |
| 1013 | return; |
| 1014 | |
| 1015 | /* Handle count == 0. */ |
| 1016 | if (unlikely(!info->count && |
| 1017 | (info->indexed || !info->count_from_stream_output))) |
| 1018 | return; |
| 1019 | } |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1020 | |
Marek Olšák | bdf767d | 2016-09-06 00:10:38 +0200 | [diff] [blame] | 1021 | if (unlikely(!sctx->vs_shader.cso)) { |
Marek Olšák | 50bb2de | 2015-10-22 22:18:49 +0200 | [diff] [blame] | 1022 | assert(0); |
| 1023 | return; |
| 1024 | } |
Marek Olšák | bdf767d | 2016-09-06 00:10:38 +0200 | [diff] [blame] | 1025 | if (unlikely(!sctx->ps_shader.cso && (!rs || !rs->rasterizer_discard))) { |
Marek Olšák | 99bf47f | 2015-02-22 18:10:38 +0100 | [diff] [blame] | 1026 | assert(0); |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1027 | return; |
Marek Olšák | 99bf47f | 2015-02-22 18:10:38 +0100 | [diff] [blame] | 1028 | } |
Marek Olšák | bdf767d | 2016-09-06 00:10:38 +0200 | [diff] [blame] | 1029 | if (unlikely(!!sctx->tes_shader.cso != (info->mode == PIPE_PRIM_PATCHES))) { |
Marek Olšák | 99bf47f | 2015-02-22 18:10:38 +0100 | [diff] [blame] | 1030 | assert(0); |
| 1031 | return; |
| 1032 | } |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1033 | |
Marek Olšák | 60c08aa | 2016-02-24 22:04:47 +0100 | [diff] [blame] | 1034 | /* Re-emit the framebuffer state if needed. */ |
| 1035 | dirty_fb_counter = p_atomic_read(&sctx->b.screen->dirty_fb_counter); |
Marek Olšák | bdf767d | 2016-09-06 00:10:38 +0200 | [diff] [blame] | 1036 | if (unlikely(dirty_fb_counter != sctx->b.last_dirty_fb_counter)) { |
Marek Olšák | 60c08aa | 2016-02-24 22:04:47 +0100 | [diff] [blame] | 1037 | sctx->b.last_dirty_fb_counter = dirty_fb_counter; |
| 1038 | sctx->framebuffer.dirty_cbufs |= |
| 1039 | ((1 << sctx->framebuffer.state.nr_cbufs) - 1); |
| 1040 | sctx->framebuffer.dirty_zsbuf = true; |
| 1041 | si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); |
| 1042 | } |
| 1043 | |
Marek Olšák | 30b2b86 | 2016-05-17 21:45:50 +0200 | [diff] [blame] | 1044 | /* Invalidate & recompute texture descriptors if needed. */ |
| 1045 | dirty_tex_counter = p_atomic_read(&sctx->b.screen->dirty_tex_descriptor_counter); |
Marek Olšák | bdf767d | 2016-09-06 00:10:38 +0200 | [diff] [blame] | 1046 | if (unlikely(dirty_tex_counter != sctx->b.last_dirty_tex_descriptor_counter)) { |
Marek Olšák | 30b2b86 | 2016-05-17 21:45:50 +0200 | [diff] [blame] | 1047 | sctx->b.last_dirty_tex_descriptor_counter = dirty_tex_counter; |
| 1048 | si_update_all_texture_descriptors(sctx); |
| 1049 | } |
| 1050 | |
Bas Nieuwenhuizen | 061ce93 | 2016-03-19 18:41:20 +0100 | [diff] [blame] | 1051 | si_decompress_graphics_textures(sctx); |
Marek Olšák | 0b1f31a | 2015-02-22 19:14:42 +0100 | [diff] [blame] | 1052 | |
| 1053 | /* Set the rasterization primitive type. |
| 1054 | * |
| 1055 | * This must be done after si_decompress_textures, which can call |
| 1056 | * draw_vbo recursively, and before si_update_shaders, which uses |
| 1057 | * current_rast_prim for this draw_vbo call. */ |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 1058 | if (sctx->gs_shader.cso) |
Marek Olšák | c15a9de | 2016-08-02 11:51:21 +0200 | [diff] [blame] | 1059 | rast_prim = sctx->gs_shader.cso->gs_output_prim; |
Marek Olšák | 9b54ce3 | 2015-10-07 01:48:18 +0200 | [diff] [blame] | 1060 | else if (sctx->tes_shader.cso) |
Marek Olšák | c15a9de | 2016-08-02 11:51:21 +0200 | [diff] [blame] | 1061 | rast_prim = sctx->tes_shader.cso->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]; |
Marek Olšák | 1fe7ba8 | 2015-01-31 20:09:46 +0100 | [diff] [blame] | 1062 | else |
Marek Olšák | c15a9de | 2016-08-02 11:51:21 +0200 | [diff] [blame] | 1063 | rast_prim = info->mode; |
Marek Olšák | 1fe7ba8 | 2015-01-31 20:09:46 +0100 | [diff] [blame] | 1064 | |
Marek Olšák | c15a9de | 2016-08-02 11:51:21 +0200 | [diff] [blame] | 1065 | if (rast_prim != sctx->current_rast_prim) { |
| 1066 | sctx->current_rast_prim = rast_prim; |
| 1067 | sctx->do_update_shaders = true; |
| 1068 | } |
| 1069 | |
Nicolai Hähnle | 908f92a | 2016-10-31 12:50:09 +0100 | [diff] [blame] | 1070 | if (sctx->gs_shader.cso) { |
| 1071 | /* Determine whether the GS triangle strip adjacency fix should |
| 1072 | * be applied. Rotate every other triangle if |
| 1073 | * - triangle strips with adjacency are fed to the GS and |
| 1074 | * - primitive restart is disabled (the rotation doesn't help |
| 1075 | * when the restart occurs after an odd number of triangles). |
| 1076 | */ |
| 1077 | bool gs_tri_strip_adj_fix = |
| 1078 | !sctx->tes_shader.cso && |
| 1079 | info->mode == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY && |
| 1080 | !info->primitive_restart; |
| 1081 | |
| 1082 | if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) { |
| 1083 | sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix; |
| 1084 | sctx->do_update_shaders = true; |
| 1085 | } |
| 1086 | } |
| 1087 | |
Marek Olšák | c15a9de | 2016-08-02 11:51:21 +0200 | [diff] [blame] | 1088 | if (sctx->do_update_shaders && !si_update_shaders(sctx)) |
| 1089 | return; |
| 1090 | |
| 1091 | if (!si_upload_graphics_shader_descriptors(sctx)) |
Marek Olšák | b052811 | 2015-07-25 00:53:16 +0200 | [diff] [blame] | 1092 | return; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1093 | |
Christian König | 9f5ff59 | 2012-08-03 10:26:01 +0200 | [diff] [blame] | 1094 | if (info->indexed) { |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1095 | /* Initialize the index buffer struct. */ |
Andreas Hartmetz | 8662e66 | 2014-01-11 16:00:50 +0100 | [diff] [blame] | 1096 | pipe_resource_reference(&ib.buffer, sctx->index_buffer.buffer); |
| 1097 | ib.user_buffer = sctx->index_buffer.user_buffer; |
| 1098 | ib.index_size = sctx->index_buffer.index_size; |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1099 | ib.offset = sctx->index_buffer.offset; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1100 | |
| 1101 | /* Translate or upload, if needed. */ |
Marek Olšák | 2d1952e | 2015-04-16 20:44:54 +0200 | [diff] [blame] | 1102 | /* 8-bit indices are supported on VI. */ |
| 1103 | if (sctx->b.chip_class <= CIK && ib.index_size == 1) { |
Marek Olšák | 9f5c037 | 2014-01-22 03:05:21 +0100 | [diff] [blame] | 1104 | struct pipe_resource *out_buffer = NULL; |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1105 | unsigned out_offset, start, count, start_offset; |
Marek Olšák | 9f5c037 | 2014-01-22 03:05:21 +0100 | [diff] [blame] | 1106 | void *ptr; |
| 1107 | |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 1108 | si_get_draw_start_count(sctx, info, &start, &count); |
Marek Olšák | 9c80a81 | 2017-02-15 17:24:38 +0100 | [diff] [blame] | 1109 | start_offset = start * 2; |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1110 | |
Marek Olšák | 020009f | 2015-12-19 17:15:02 +0100 | [diff] [blame] | 1111 | u_upload_alloc(sctx->b.uploader, start_offset, count * 2, 256, |
Marek Olšák | 9f5c037 | 2014-01-22 03:05:21 +0100 | [diff] [blame] | 1112 | &out_offset, &out_buffer, &ptr); |
Marek Olšák | 29dff6f | 2015-09-10 17:42:31 +0200 | [diff] [blame] | 1113 | if (!out_buffer) { |
| 1114 | pipe_resource_reference(&ib.buffer, NULL); |
| 1115 | return; |
| 1116 | } |
Marek Olšák | 9f5c037 | 2014-01-22 03:05:21 +0100 | [diff] [blame] | 1117 | |
Marek Olšák | bc8d047 | 2017-02-17 12:55:24 +0100 | [diff] [blame] | 1118 | util_shorten_ubyte_elts_to_userptr(&sctx->b.b, &ib, 0, 0, |
Marek Olšák | 9c80a81 | 2017-02-15 17:24:38 +0100 | [diff] [blame] | 1119 | ib.offset + start, |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1120 | count, ptr); |
Marek Olšák | 9f5c037 | 2014-01-22 03:05:21 +0100 | [diff] [blame] | 1121 | |
| 1122 | pipe_resource_reference(&ib.buffer, NULL); |
| 1123 | ib.user_buffer = NULL; |
| 1124 | ib.buffer = out_buffer; |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1125 | /* info->start will be added by the drawing code */ |
| 1126 | ib.offset = out_offset - start_offset; |
Marek Olšák | 9f5c037 | 2014-01-22 03:05:21 +0100 | [diff] [blame] | 1127 | ib.index_size = 2; |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1128 | } else if (ib.user_buffer && !ib.buffer) { |
| 1129 | unsigned start, count, start_offset; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1130 | |
Marek Olšák | 2a7b57a | 2014-04-24 03:03:43 +0200 | [diff] [blame] | 1131 | si_get_draw_start_count(sctx, info, &start, &count); |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1132 | start_offset = start * ib.index_size; |
| 1133 | |
| 1134 | u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size, |
Marek Olšák | e0f9328 | 2015-12-19 17:15:02 +0100 | [diff] [blame] | 1135 | 256, (char*)ib.user_buffer + start_offset, |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1136 | &ib.offset, &ib.buffer); |
Marek Olšák | 29dff6f | 2015-09-10 17:42:31 +0200 | [diff] [blame] | 1137 | if (!ib.buffer) |
| 1138 | return; |
Marek Olšák | 887b69a | 2014-04-24 16:13:54 +0200 | [diff] [blame] | 1139 | /* info->start will be added by the drawing code */ |
| 1140 | ib.offset -= start_offset; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1141 | } |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1142 | } |
| 1143 | |
Marek Olšák | 5749676 | 2015-09-06 15:43:23 +0200 | [diff] [blame] | 1144 | /* VI reads index buffers through TC L2. */ |
| 1145 | if (info->indexed && sctx->b.chip_class <= CIK && |
| 1146 | r600_resource(ib.buffer)->TC_L2_dirty) { |
Marek Olšák | 40e1f7e | 2016-10-10 18:51:24 +0200 | [diff] [blame] | 1147 | sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; |
Marek Olšák | 18a30c9 | 2014-12-29 14:53:11 +0100 | [diff] [blame] | 1148 | r600_resource(ib.buffer)->TC_L2_dirty = false; |
| 1149 | } |
| 1150 | |
Nicolai Hähnle | 2852ded | 2016-08-08 17:06:22 +0200 | [diff] [blame] | 1151 | if (info->indirect && r600_resource(info->indirect)->TC_L2_dirty) { |
Marek Olšák | 40e1f7e | 2016-10-10 18:51:24 +0200 | [diff] [blame] | 1152 | sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; |
Nicolai Hähnle | 2852ded | 2016-08-08 17:06:22 +0200 | [diff] [blame] | 1153 | r600_resource(info->indirect)->TC_L2_dirty = false; |
| 1154 | } |
| 1155 | |
Nicolai Hähnle | 6d7177f | 2016-08-08 16:00:29 +0200 | [diff] [blame] | 1156 | if (info->indirect_params && |
| 1157 | r600_resource(info->indirect_params)->TC_L2_dirty) { |
Marek Olšák | 40e1f7e | 2016-10-10 18:51:24 +0200 | [diff] [blame] | 1158 | sctx->b.flags |= SI_CONTEXT_WRITEBACK_GLOBAL_L2; |
Nicolai Hähnle | 6d7177f | 2016-08-08 16:00:29 +0200 | [diff] [blame] | 1159 | r600_resource(info->indirect_params)->TC_L2_dirty = false; |
| 1160 | } |
| 1161 | |
Marek Olšák | c56ecb6 | 2016-07-29 16:33:50 +0200 | [diff] [blame] | 1162 | /* Add buffer sizes for memory checking in need_cs_space. */ |
| 1163 | if (sctx->emit_scratch_reloc && sctx->scratch_buffer) |
| 1164 | r600_context_add_resource_size(ctx, &sctx->scratch_buffer->b.b); |
| 1165 | if (info->indirect) |
| 1166 | r600_context_add_resource_size(ctx, info->indirect); |
| 1167 | |
Marek Olšák | 28b34b4 | 2015-08-30 03:56:13 +0200 | [diff] [blame] | 1168 | si_need_cs_space(sctx); |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1169 | |
Marek Olšák | d82cfab | 2016-07-29 17:28:43 +0200 | [diff] [blame] | 1170 | /* Since we've called r600_context_add_resource_size for vertex buffers, |
| 1171 | * this must be called after si_need_cs_space, because we must let |
| 1172 | * need_cs_space flush before we add buffers to the buffer list. |
| 1173 | */ |
| 1174 | if (!si_upload_vertex_buffer_descriptors(sctx)) |
| 1175 | return; |
| 1176 | |
Marek Olšák | ece6e1f | 2016-12-25 18:11:59 +0100 | [diff] [blame] | 1177 | /* Flushed caches prior to prefetching shaders. */ |
Marek Olšák | a67d815 | 2016-09-08 00:59:55 +0200 | [diff] [blame] | 1178 | if (sctx->b.flags) |
| 1179 | si_emit_cache_flush(sctx); |
| 1180 | |
Marek Olšák | ece6e1f | 2016-12-25 18:11:59 +0100 | [diff] [blame] | 1181 | /* Prefetch shaders and VBO descriptors to TC L2. */ |
| 1182 | if (sctx->b.chip_class >= CIK) { |
| 1183 | if (si_pm4_state_changed(sctx, ls)) |
| 1184 | cik_prefetch_shader_async(sctx, sctx->queued.named.ls); |
| 1185 | if (si_pm4_state_changed(sctx, hs)) |
| 1186 | cik_prefetch_shader_async(sctx, sctx->queued.named.hs); |
| 1187 | if (si_pm4_state_changed(sctx, es)) |
| 1188 | cik_prefetch_shader_async(sctx, sctx->queued.named.es); |
| 1189 | if (si_pm4_state_changed(sctx, gs)) |
| 1190 | cik_prefetch_shader_async(sctx, sctx->queued.named.gs); |
| 1191 | if (si_pm4_state_changed(sctx, vs)) |
| 1192 | cik_prefetch_shader_async(sctx, sctx->queued.named.vs); |
| 1193 | |
| 1194 | /* Vertex buffer descriptors are uploaded uncached, so prefetch |
| 1195 | * them right after the VS binary. */ |
Marek Olšák | cf24892 | 2017-01-17 21:30:23 +0100 | [diff] [blame] | 1196 | if (sctx->vertex_buffer_pointer_dirty) { |
Marek Olšák | ece6e1f | 2016-12-25 18:11:59 +0100 | [diff] [blame] | 1197 | cik_prefetch_TC_L2_async(sctx, &sctx->vertex_buffers.buffer->b.b, |
| 1198 | sctx->vertex_buffers.buffer_offset, |
| 1199 | sctx->vertex_elements->count * 16); |
| 1200 | } |
| 1201 | if (si_pm4_state_changed(sctx, ps)) |
| 1202 | cik_prefetch_shader_async(sctx, sctx->queued.named.ps); |
| 1203 | } |
| 1204 | |
Marek Olšák | a77ee8b | 2013-08-26 17:19:39 +0200 | [diff] [blame] | 1205 | /* Emit states. */ |
Marek Olšák | 87c1e9e | 2015-08-29 00:49:40 +0200 | [diff] [blame] | 1206 | mask = sctx->dirty_atoms; |
| 1207 | while (mask) { |
| 1208 | struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)]; |
| 1209 | |
| 1210 | atom->emit(&sctx->b, atom); |
Marek Olšák | c8e70e6 | 2013-08-06 06:42:22 +0200 | [diff] [blame] | 1211 | } |
Marek Olšák | 87c1e9e | 2015-08-29 00:49:40 +0200 | [diff] [blame] | 1212 | sctx->dirty_atoms = 0; |
Marek Olšák | c8e70e6 | 2013-08-06 06:42:22 +0200 | [diff] [blame] | 1213 | |
Andreas Hartmetz | 8662e66 | 2014-01-11 16:00:50 +0100 | [diff] [blame] | 1214 | si_pm4_emit_dirty(sctx); |
Marek Olšák | dc39413 | 2015-03-15 20:13:52 +0100 | [diff] [blame] | 1215 | si_emit_scratch_reloc(sctx); |
Marek Olšák | fdf2c04 | 2015-02-22 17:42:20 +0100 | [diff] [blame] | 1216 | si_emit_rasterizer_prim_state(sctx); |
| 1217 | si_emit_draw_registers(sctx, info); |
Bas Nieuwenhuizen | 86c71ff | 2016-03-10 21:01:39 +0100 | [diff] [blame] | 1218 | |
| 1219 | si_ce_pre_draw_synchronization(sctx); |
| 1220 | |
Marek Olšák | 384213c | 2014-12-07 15:52:15 +0100 | [diff] [blame] | 1221 | si_emit_draw_packets(sctx, info, &ib); |
| 1222 | |
Bas Nieuwenhuizen | 86c71ff | 2016-03-10 21:01:39 +0100 | [diff] [blame] | 1223 | si_ce_post_draw_synchronization(sctx); |
| 1224 | |
Marek Olšák | 2c14a6d | 2015-08-19 11:53:25 +0200 | [diff] [blame] | 1225 | if (sctx->trace_buf) |
Andreas Hartmetz | 8662e66 | 2014-01-11 16:00:50 +0100 | [diff] [blame] | 1226 | si_trace_emit(sctx); |
Jerome Glisse | 3f7d971 | 2013-03-25 11:46:38 -0400 | [diff] [blame] | 1227 | |
Marek Olšák | 0e7f563 | 2014-07-26 03:16:22 +0200 | [diff] [blame] | 1228 | /* Workaround for a VGT hang when streamout is enabled. |
| 1229 | * It must be done after drawing. */ |
Marek Olšák | 787ada6 | 2015-12-04 21:24:21 +0100 | [diff] [blame] | 1230 | if ((sctx->b.family == CHIP_HAWAII || |
| 1231 | sctx->b.family == CHIP_TONGA || |
| 1232 | sctx->b.family == CHIP_FIJI) && |
Marek Olšák | a4c288d | 2016-04-07 03:24:06 +0200 | [diff] [blame] | 1233 | r600_get_strmout_en(&sctx->b)) { |
Marek Olšák | 2bfe9d4 | 2014-12-29 14:02:46 +0100 | [diff] [blame] | 1234 | sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC; |
Marek Olšák | 0e7f563 | 2014-07-26 03:16:22 +0200 | [diff] [blame] | 1235 | } |
| 1236 | |
Marek Olšák | 6f6112a | 2013-01-17 19:36:41 +0100 | [diff] [blame] | 1237 | /* Set the depth buffer as dirty. */ |
Marek Olšák | 6a5499b | 2014-03-04 17:49:39 +0100 | [diff] [blame] | 1238 | if (sctx->framebuffer.state.zsbuf) { |
| 1239 | struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; |
Marek Olšák | 363b280 | 2013-08-05 03:42:11 +0200 | [diff] [blame] | 1240 | struct r600_texture *rtex = (struct r600_texture *)surf->texture; |
Marek Olšák | 6f6112a | 2013-01-17 19:36:41 +0100 | [diff] [blame] | 1241 | |
Marek Olšák | d4d9ec5 | 2016-10-11 23:19:46 +0200 | [diff] [blame] | 1242 | if (!rtex->tc_compatible_htile) |
| 1243 | rtex->dirty_level_mask |= 1 << surf->u.tex.level; |
Marek Olšák | 5804c6a | 2015-09-06 17:35:06 +0200 | [diff] [blame] | 1244 | |
| 1245 | if (rtex->surface.flags & RADEON_SURF_SBUFFER) |
| 1246 | rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1247 | } |
Marek Olšák | 6a5499b | 2014-03-04 17:49:39 +0100 | [diff] [blame] | 1248 | if (sctx->framebuffer.compressed_cb_mask) { |
Marek Olšák | 3c3feb3 | 2013-08-06 08:48:07 +0200 | [diff] [blame] | 1249 | struct pipe_surface *surf; |
| 1250 | struct r600_texture *rtex; |
Marek Olšák | 6a5499b | 2014-03-04 17:49:39 +0100 | [diff] [blame] | 1251 | unsigned mask = sctx->framebuffer.compressed_cb_mask; |
Marek Olšák | 3c3feb3 | 2013-08-06 08:48:07 +0200 | [diff] [blame] | 1252 | |
| 1253 | do { |
| 1254 | unsigned i = u_bit_scan(&mask); |
Marek Olšák | 6a5499b | 2014-03-04 17:49:39 +0100 | [diff] [blame] | 1255 | surf = sctx->framebuffer.state.cbufs[i]; |
Marek Olšák | 3c3feb3 | 2013-08-06 08:48:07 +0200 | [diff] [blame] | 1256 | rtex = (struct r600_texture*)surf->texture; |
| 1257 | |
Marek Olšák | 49e3c74 | 2016-06-21 18:18:46 +0200 | [diff] [blame] | 1258 | if (rtex->fmask.size) |
| 1259 | rtex->dirty_level_mask |= 1 << surf->u.tex.level; |
| 1260 | if (rtex->dcc_gather_statistics) |
| 1261 | rtex->separate_dcc_dirty = true; |
Marek Olšák | 3c3feb3 | 2013-08-06 08:48:07 +0200 | [diff] [blame] | 1262 | } while (mask); |
| 1263 | } |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1264 | |
| 1265 | pipe_resource_reference(&ib.buffer, NULL); |
Marek Olšák | ba0c16f | 2014-01-22 01:29:18 +0100 | [diff] [blame] | 1266 | sctx->b.num_draw_calls++; |
Marek Olšák | 4140afd | 2016-06-09 23:16:43 +0200 | [diff] [blame] | 1267 | if (G_0286E8_WAVESIZE(sctx->spi_tmpring_size)) |
| 1268 | sctx->b.num_spill_draw_calls++; |
Christian König | ca9cf61 | 2012-07-19 15:20:45 +0200 | [diff] [blame] | 1269 | } |
Marek Olšák | 837907b | 2014-09-05 11:59:10 +0200 | [diff] [blame] | 1270 | |
Marek Olšák | 837907b | 2014-09-05 11:59:10 +0200 | [diff] [blame] | 1271 | void si_trace_emit(struct si_context *sctx) |
| 1272 | { |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 1273 | struct radeon_winsys_cs *cs = sctx->b.gfx.cs; |
Marek Olšák | 837907b | 2014-09-05 11:59:10 +0200 | [diff] [blame] | 1274 | |
Marek Olšák | 2c14a6d | 2015-08-19 11:53:25 +0200 | [diff] [blame] | 1275 | sctx->trace_id++; |
Marek Olšák | 6cc8f6c | 2015-11-07 14:00:30 +0100 | [diff] [blame] | 1276 | radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, sctx->trace_buf, |
Marek Olšák | 2edb060 | 2015-09-26 23:18:55 +0200 | [diff] [blame] | 1277 | RADEON_USAGE_READWRITE, RADEON_PRIO_TRACE); |
Marek Olšák | 2c14a6d | 2015-08-19 11:53:25 +0200 | [diff] [blame] | 1278 | radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); |
Marek Olšák | 16e5d8a | 2015-08-19 18:45:11 +0200 | [diff] [blame] | 1279 | radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) | |
| 1280 | S_370_WR_CONFIRM(1) | |
| 1281 | S_370_ENGINE_SEL(V_370_ME)); |
Marek Olšák | 2c14a6d | 2015-08-19 11:53:25 +0200 | [diff] [blame] | 1282 | radeon_emit(cs, sctx->trace_buf->gpu_address); |
| 1283 | radeon_emit(cs, sctx->trace_buf->gpu_address >> 32); |
| 1284 | radeon_emit(cs, sctx->trace_id); |
| 1285 | radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); |
Bas Nieuwenhuizen | 0ef1b4d | 2016-12-24 13:08:00 +0100 | [diff] [blame] | 1286 | radeon_emit(cs, AC_ENCODE_TRACE_POINT(sctx->trace_id)); |
Marek Olšák | 837907b | 2014-09-05 11:59:10 +0200 | [diff] [blame] | 1287 | } |