| /* |
| * Copyright 2020 Advanced Micro Devices, Inc. |
| * All Rights Reserved. |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * on the rights to use, copy, modify, merge, publish, distribute, sub |
| * license, and/or sell copies of the Software, and to permit persons to whom |
| * the Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, |
| * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
| * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
| * USE OR OTHER DEALINGS IN THE SOFTWARE. |
| */ |
| |
| #include "si_pipe.h" |
| #include "si_shader_internal.h" |
| #include "sid.h" |
| |
| static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx) |
| { |
| switch (ctx->type) { |
| case PIPE_SHADER_TESS_CTRL: |
| return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8); |
| |
| case PIPE_SHADER_TESS_EVAL: |
| return ac_get_arg(&ctx->ac, ctx->tes_rel_patch_id); |
| |
| default: |
| assert(0); |
| return NULL; |
| } |
| } |
| |
| /* Tessellation shaders pass outputs to the next shader using LDS. |
| * |
| * LS outputs = TCS inputs |
| * TCS outputs = TES inputs |
| * |
| * The LDS layout is: |
| * - TCS inputs for patch 0 |
| * - TCS inputs for patch 1 |
| * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2) |
| * - ... |
| * - TCS outputs for patch 0 = get_tcs_out_patch0_offset |
| * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset |
| * - TCS outputs for patch 1 |
| * - Per-patch TCS outputs for patch 1 |
| * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2) |
| * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2) |
| * - ... |
| * |
| * All three shaders VS(LS), TCS, TES share the same LDS space. |
| */ |
| |
| static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx) |
| { |
| return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13); |
| } |
| |
| static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx) |
| { |
| assert(ctx->type == PIPE_SHADER_TESS_CTRL); |
| |
| if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) |
| return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4; |
| |
| return util_last_bit64(ctx->shader->selector->outputs_written) * 4; |
| } |
| |
| static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx) |
| { |
| unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx); |
| |
| return LLVMConstInt(ctx->ac.i32, stride, 0); |
| } |
| |
| static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx) |
| { |
| if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) |
| return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13); |
| |
| const struct si_shader_info *info = &ctx->shader->selector->info; |
| unsigned tcs_out_vertices = info->properties[TGSI_PROPERTY_TCS_VERTICES_OUT]; |
| unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx); |
| unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written); |
| unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4; |
| return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0); |
| } |
| |
| static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx) |
| { |
| return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16), |
| LLVMConstInt(ctx->ac.i32, 4, 0), ""); |
| } |
| |
| static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx) |
| { |
| return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16), |
| LLVMConstInt(ctx->ac.i32, 4, 0), ""); |
| } |
| |
| static LLVMValueRef get_tcs_in_current_patch_offset(struct si_shader_context *ctx) |
| { |
| LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx); |
| LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); |
| |
| return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, ""); |
| } |
| |
| static LLVMValueRef get_tcs_out_current_patch_offset(struct si_shader_context *ctx) |
| { |
| LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx); |
| LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); |
| LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); |
| |
| return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset); |
| } |
| |
| static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx) |
| { |
| LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx); |
| LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx); |
| LLVMValueRef rel_patch_id = get_rel_patch_id(ctx); |
| |
| return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset); |
| } |
| |
| static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx) |
| { |
| unsigned tcs_out_vertices = |
| ctx->shader->selector ? ctx->shader->selector->info.properties[TGSI_PROPERTY_TCS_VERTICES_OUT] |
| : 0; |
| |
| /* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */ |
| if (ctx->type == PIPE_SHADER_TESS_CTRL && tcs_out_vertices) |
| return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0); |
| |
| return si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 6); |
| } |
| |
| static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx) |
| { |
| unsigned stride; |
| |
| switch (ctx->type) { |
| case PIPE_SHADER_VERTEX: |
| stride = ctx->shader->selector->lshs_vertex_stride / 4; |
| return LLVMConstInt(ctx->ac.i32, stride, 0); |
| |
| case PIPE_SHADER_TESS_CTRL: |
| if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) { |
| stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4; |
| return LLVMConstInt(ctx->ac.i32, stride, 0); |
| } |
| return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8); |
| |
| default: |
| assert(0); |
| return NULL; |
| } |
| } |
| |
| static LLVMValueRef |
| get_dw_address_from_generic_indices(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride, |
| LLVMValueRef base_addr, LLVMValueRef vertex_index, |
| LLVMValueRef param_index, ubyte name, ubyte index) |
| { |
| if (vertex_dw_stride) { |
| base_addr = ac_build_imad(&ctx->ac, vertex_index, vertex_dw_stride, base_addr); |
| } |
| |
| if (param_index) { |
| base_addr = ac_build_imad(&ctx->ac, param_index, LLVMConstInt(ctx->ac.i32, 4, 0), base_addr); |
| } |
| |
| int param = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER || |
| name == TGSI_SEMANTIC_TESSOUTER |
| ? si_shader_io_get_unique_index_patch(name, index) |
| : si_shader_io_get_unique_index(name, index, false); |
| |
| /* Add the base address of the element. */ |
| return LLVMBuildAdd(ctx->ac.builder, base_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), ""); |
| } |
| |
| /* The offchip buffer layout for TCS->TES is |
| * |
| * - attribute 0 of patch 0 vertex 0 |
| * - attribute 0 of patch 0 vertex 1 |
| * - attribute 0 of patch 0 vertex 2 |
| * ... |
| * - attribute 0 of patch 1 vertex 0 |
| * - attribute 0 of patch 1 vertex 1 |
| * ... |
| * - attribute 1 of patch 0 vertex 0 |
| * - attribute 1 of patch 0 vertex 1 |
| * ... |
| * - per patch attribute 0 of patch 0 |
| * - per patch attribute 0 of patch 1 |
| * ... |
| * |
| * Note that every attribute has 4 components. |
| */ |
| static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx, |
| LLVMValueRef rel_patch_id, LLVMValueRef vertex_index, |
| LLVMValueRef param_index) |
| { |
| LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices; |
| LLVMValueRef param_stride, constant16; |
| |
| vertices_per_patch = get_num_tcs_out_vertices(ctx); |
| num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6); |
| total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, ""); |
| |
| constant16 = LLVMConstInt(ctx->ac.i32, 16, 0); |
| if (vertex_index) { |
| base_addr = ac_build_imad(&ctx->ac, rel_patch_id, vertices_per_patch, vertex_index); |
| param_stride = total_vertices; |
| } else { |
| base_addr = rel_patch_id; |
| param_stride = num_patches; |
| } |
| |
| base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr); |
| base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, ""); |
| |
| if (!vertex_index) { |
| LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->tcs_offchip_layout, 12, 20); |
| |
| base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, ""); |
| } |
| return base_addr; |
| } |
| |
| static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context *ctx, |
| LLVMValueRef vertex_index, |
| LLVMValueRef param_index, |
| ubyte name, ubyte index) |
| { |
| unsigned param_index_base; |
| |
| param_index_base = name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER || |
| name == TGSI_SEMANTIC_TESSOUTER |
| ? si_shader_io_get_unique_index_patch(name, index) |
| : si_shader_io_get_unique_index(name, index, false); |
| |
| if (param_index) { |
| param_index = LLVMBuildAdd(ctx->ac.builder, param_index, |
| LLVMConstInt(ctx->ac.i32, param_index_base, 0), ""); |
| } else { |
| param_index = LLVMConstInt(ctx->ac.i32, param_index_base, 0); |
| } |
| |
| return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), vertex_index, param_index); |
| } |
| |
| static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle, |
| LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base, |
| bool can_speculate) |
| { |
| LLVMValueRef value, value2; |
| LLVMTypeRef vec_type = LLVMVectorType(type, 4); |
| |
| if (swizzle == ~0) { |
| value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc, |
| can_speculate, false); |
| |
| return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, ""); |
| } |
| |
| if (ac_get_type_size(type) != 8) { |
| value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, ac_glc, |
| can_speculate, false); |
| |
| value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, ""); |
| return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0), |
| ""); |
| } |
| |
| value = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4, ac_glc, |
| can_speculate, false); |
| |
| value2 = ac_build_buffer_load(&ctx->ac, buffer, 1, NULL, base, offset, swizzle * 4 + 4, ac_glc, |
| can_speculate, false); |
| |
| return si_build_gather_64bit(ctx, type, value, value2); |
| } |
| |
| /** |
| * Load from LSHS LDS storage. |
| * |
| * \param type output value type |
| * \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4 |
| * \param dw_addr address in dwords |
| */ |
| static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle, |
| LLVMValueRef dw_addr) |
| { |
| LLVMValueRef value; |
| |
| if (swizzle == ~0) { |
| LLVMValueRef values[4]; |
| |
| for (unsigned chan = 0; chan < 4; chan++) |
| values[chan] = lshs_lds_load(ctx, type, chan, dw_addr); |
| |
| return ac_build_gather_values(&ctx->ac, values, 4); |
| } |
| |
| /* Split 64-bit loads. */ |
| if (ac_get_type_size(type) == 8) { |
| LLVMValueRef lo, hi; |
| |
| lo = lshs_lds_load(ctx, ctx->ac.i32, swizzle, dw_addr); |
| hi = lshs_lds_load(ctx, ctx->ac.i32, swizzle + 1, dw_addr); |
| return si_build_gather_64bit(ctx, type, lo, hi); |
| } |
| |
| dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), ""); |
| |
| value = ac_lds_load(&ctx->ac, dw_addr); |
| |
| return LLVMBuildBitCast(ctx->ac.builder, value, type, ""); |
| } |
| |
| /** |
| * Store to LSHS LDS storage. |
| * |
| * \param swizzle offset (typically 0..3) |
| * \param dw_addr address in dwords |
| * \param value value to store |
| */ |
| static void lshs_lds_store(struct si_shader_context *ctx, unsigned dw_offset_imm, |
| LLVMValueRef dw_addr, LLVMValueRef value) |
| { |
| dw_addr = |
| LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, dw_offset_imm, 0), ""); |
| |
| ac_lds_store(&ctx->ac, dw_addr, value); |
| } |
| |
| enum si_tess_ring |
| { |
| TCS_FACTOR_RING, |
| TESS_OFFCHIP_RING_TCS, |
| TESS_OFFCHIP_RING_TES, |
| }; |
| |
| static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring) |
| { |
| LLVMBuilderRef builder = ctx->ac.builder; |
| LLVMValueRef addr = ac_get_arg( |
| &ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->tes_offchip_addr : ctx->tcs_out_lds_layout); |
| |
| /* TCS only receives high 13 bits of the address. */ |
| if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) { |
| addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 0), ""); |
| } |
| |
| if (ring == TCS_FACTOR_RING) { |
| unsigned tf_offset = ctx->screen->tess_offchip_ring_size; |
| addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), ""); |
| } |
| |
| uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | |
| S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); |
| |
| if (ctx->screen->info.chip_class >= GFX10) |
| rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) | |
| S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); |
| else |
| rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | |
| S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); |
| |
| LLVMValueRef desc[4]; |
| desc[0] = addr; |
| desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0); |
| desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0); |
| desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false); |
| |
| return ac_build_gather_values(&ctx->ac, desc, 4); |
| } |
| |
| void si_llvm_preload_tes_rings(struct si_shader_context *ctx) |
| { |
| ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES); |
| } |
| |
| static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type, |
| LLVMValueRef vertex_index, LLVMValueRef param_index, |
| unsigned const_index, unsigned location, |
| unsigned driver_location, unsigned component, |
| unsigned num_components, bool is_patch, |
| bool is_compact, bool load_input) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| struct si_shader_info *info = &ctx->shader->selector->info; |
| LLVMValueRef dw_addr, stride; |
| ubyte name, index; |
| |
| driver_location = driver_location / 4; |
| |
| if (load_input) { |
| name = info->input_semantic_name[driver_location]; |
| index = info->input_semantic_index[driver_location]; |
| } else { |
| name = info->output_semantic_name[driver_location]; |
| index = info->output_semantic_index[driver_location]; |
| } |
| |
| assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER || |
| name == TGSI_SEMANTIC_TESSOUTER) == is_patch); |
| |
| if (load_input) { |
| stride = get_tcs_in_vertex_dw_stride(ctx); |
| dw_addr = get_tcs_in_current_patch_offset(ctx); |
| } else { |
| if (is_patch) { |
| stride = NULL; |
| dw_addr = get_tcs_out_current_patch_data_offset(ctx); |
| } else { |
| stride = get_tcs_out_vertex_dw_stride(ctx); |
| dw_addr = get_tcs_out_current_patch_offset(ctx); |
| } |
| } |
| |
| if (!param_index) { |
| param_index = LLVMConstInt(ctx->ac.i32, const_index, 0); |
| } |
| |
| dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index, |
| name, index); |
| |
| LLVMValueRef value[4]; |
| for (unsigned i = 0; i < num_components; i++) { |
| unsigned offset = i; |
| if (ac_get_type_size(type) == 8) |
| offset *= 2; |
| |
| offset += component; |
| value[i + component] = lshs_lds_load(ctx, type, offset, dw_addr); |
| } |
| |
| return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); |
| } |
| |
| static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type, |
| LLVMValueRef vertex_index, LLVMValueRef param_index, |
| unsigned const_index, unsigned location, |
| unsigned driver_location, unsigned component, |
| unsigned num_components, bool is_patch, bool is_compact, |
| bool load_input) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| struct si_shader_info *info = &ctx->shader->selector->info; |
| LLVMValueRef base, addr; |
| |
| driver_location = driver_location / 4; |
| ubyte name = info->input_semantic_name[driver_location]; |
| ubyte index = info->input_semantic_index[driver_location]; |
| |
| assert((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER || |
| name == TGSI_SEMANTIC_TESSOUTER) == is_patch); |
| |
| base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); |
| |
| if (!param_index) { |
| param_index = LLVMConstInt(ctx->ac.i32, const_index, 0); |
| } |
| |
| addr = |
| get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index); |
| |
| /* TODO: This will generate rather ordinary llvm code, although it |
| * should be easy for the optimiser to fix up. In future we might want |
| * to refactor buffer_load(). |
| */ |
| LLVMValueRef value[4]; |
| for (unsigned i = 0; i < num_components; i++) { |
| unsigned offset = i; |
| if (ac_get_type_size(type) == 8) { |
| offset *= 2; |
| if (offset == 4) { |
| ubyte name = info->input_semantic_name[driver_location + 1]; |
| ubyte index = info->input_semantic_index[driver_location + 1]; |
| addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, |
| name, index); |
| } |
| |
| offset = offset % 4; |
| } |
| |
| offset += component; |
| value[i + component] = |
| buffer_load(ctx, type, offset, ctx->tess_offchip_ring, base, addr, true); |
| } |
| |
| return ac_build_varying_gather_values(&ctx->ac, value, num_components, component); |
| } |
| |
| static void si_nir_store_output_tcs(struct ac_shader_abi *abi, const struct nir_variable *var, |
| LLVMValueRef vertex_index, LLVMValueRef param_index, |
| unsigned const_index, LLVMValueRef src, unsigned writemask) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| struct si_shader_info *info = &ctx->shader->selector->info; |
| const unsigned component = var->data.location_frac; |
| unsigned driver_location = var->data.driver_location; |
| LLVMValueRef dw_addr, stride; |
| LLVMValueRef buffer, base, addr; |
| LLVMValueRef values[8]; |
| bool skip_lds_store; |
| bool is_tess_factor = false, is_tess_inner = false; |
| |
| driver_location = driver_location / 4; |
| ubyte name = info->output_semantic_name[driver_location]; |
| ubyte index = info->output_semantic_index[driver_location]; |
| |
| bool is_const = !param_index; |
| if (!param_index) |
| param_index = LLVMConstInt(ctx->ac.i32, const_index, 0); |
| |
| const bool is_patch = var->data.patch || var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || |
| var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER; |
| |
| /* Invalid SPIR-V can cause this. */ |
| if ((name == TGSI_SEMANTIC_PATCH || name == TGSI_SEMANTIC_TESSINNER || |
| name == TGSI_SEMANTIC_TESSOUTER) != is_patch) |
| return; |
| |
| if (!is_patch) { |
| stride = get_tcs_out_vertex_dw_stride(ctx); |
| dw_addr = get_tcs_out_current_patch_offset(ctx); |
| dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index, |
| name, index); |
| |
| skip_lds_store = !info->reads_pervertex_outputs; |
| } else { |
| dw_addr = get_tcs_out_current_patch_data_offset(ctx); |
| dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index, |
| name, index); |
| |
| skip_lds_store = !info->reads_perpatch_outputs; |
| |
| if (is_const && const_index == 0) { |
| int name = info->output_semantic_name[driver_location]; |
| |
| /* Always write tess factors into LDS for the TCS epilog. */ |
| if (name == TGSI_SEMANTIC_TESSINNER || name == TGSI_SEMANTIC_TESSOUTER) { |
| /* The epilog doesn't read LDS if invocation 0 defines tess factors. */ |
| skip_lds_store = !info->reads_tessfactor_outputs && |
| ctx->shader->selector->info.tessfactors_are_def_in_all_invocs; |
| is_tess_factor = true; |
| is_tess_inner = name == TGSI_SEMANTIC_TESSINNER; |
| } |
| } |
| } |
| |
| buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); |
| |
| base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); |
| |
| addr = |
| get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, name, index); |
| |
| for (unsigned chan = component; chan < 8; chan++) { |
| if (!(writemask & (1 << chan))) |
| continue; |
| LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component); |
| |
| unsigned buffer_store_offset = chan % 4; |
| if (chan == 4) { |
| ubyte name = info->output_semantic_name[driver_location + 1]; |
| ubyte index = info->output_semantic_index[driver_location + 1]; |
| addr = get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, |
| name, index); |
| } |
| |
| /* Skip LDS stores if there is no LDS read of this output. */ |
| if (!skip_lds_store) |
| lshs_lds_store(ctx, chan, dw_addr, value); |
| |
| value = ac_to_integer(&ctx->ac, value); |
| values[chan] = value; |
| |
| if (writemask != 0xF && !is_tess_factor) { |
| ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base, |
| 4 * buffer_store_offset, ac_glc); |
| } |
| |
| /* Write tess factors into VGPRs for the epilog. */ |
| if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) { |
| if (!is_tess_inner) { |
| LLVMBuildStore(ctx->ac.builder, value, /* outer */ |
| ctx->invoc0_tess_factors[chan]); |
| } else if (chan < 2) { |
| LLVMBuildStore(ctx->ac.builder, value, /* inner */ |
| ctx->invoc0_tess_factors[4 + chan]); |
| } |
| } |
| } |
| |
| if (writemask == 0xF && !is_tess_factor) { |
| LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4); |
| ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr, base, 0, ac_glc); |
| } |
| } |
| |
| static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->tes_u), ac_get_arg(&ctx->ac, ctx->tes_v), |
| ctx->ac.f32_0, ctx->ac.f32_0}; |
| |
| /* For triangles, the vector should be (u, v, 1-u-v). */ |
| if (ctx->shader->selector->info.properties[TGSI_PROPERTY_TES_PRIM_MODE] == PIPE_PRIM_TRIANGLES) { |
| coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1, |
| LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), ""); |
| } |
| return ac_build_gather_values(&ctx->ac, coord, 4); |
| } |
| |
| static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic_name) |
| { |
| LLVMValueRef base, addr; |
| |
| int param = si_shader_io_get_unique_index_patch(semantic_name, 0); |
| |
| base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); |
| addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL, |
| LLVMConstInt(ctx->ac.i32, param, 0)); |
| |
| return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true); |
| } |
| |
| static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned semantic_name) |
| { |
| LLVMValueRef buf, slot, val[4]; |
| int i, offset; |
| |
| slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0); |
| buf = ac_get_arg(&ctx->ac, ctx->rw_buffers); |
| buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot); |
| offset = semantic_name == TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL ? 4 : 0; |
| |
| for (i = 0; i < 4; i++) |
| val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0)); |
| return ac_build_gather_values(&ctx->ac, val, 4); |
| } |
| |
| static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id, |
| bool load_default_state) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| unsigned semantic_name; |
| |
| if (load_default_state) { |
| switch (varying_id) { |
| case VARYING_SLOT_TESS_LEVEL_INNER: |
| semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_INNER_LEVEL; |
| break; |
| case VARYING_SLOT_TESS_LEVEL_OUTER: |
| semantic_name = TGSI_SEMANTIC_TESS_DEFAULT_OUTER_LEVEL; |
| break; |
| default: |
| unreachable("unknown tess level"); |
| } |
| return load_tess_level_default(ctx, semantic_name); |
| } |
| |
| switch (varying_id) { |
| case VARYING_SLOT_TESS_LEVEL_INNER: |
| semantic_name = TGSI_SEMANTIC_TESSINNER; |
| break; |
| case VARYING_SLOT_TESS_LEVEL_OUTER: |
| semantic_name = TGSI_SEMANTIC_TESSOUTER; |
| break; |
| default: |
| unreachable("unknown tess level"); |
| } |
| |
| return load_tess_level(ctx, semantic_name); |
| } |
| |
| static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| if (ctx->type == PIPE_SHADER_TESS_CTRL) |
| return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6); |
| else if (ctx->type == PIPE_SHADER_TESS_EVAL) |
| return get_num_tcs_out_vertices(ctx); |
| else |
| unreachable("invalid shader stage for TGSI_SEMANTIC_VERTICESIN"); |
| } |
| |
| /** |
| * Forward all outputs from the vertex shader to the TES. This is only used |
| * for the fixed function TCS. |
| */ |
| static void si_copy_tcs_inputs(struct si_shader_context *ctx) |
| { |
| LLVMValueRef invocation_id, buffer, buffer_offset; |
| LLVMValueRef lds_vertex_stride, lds_base; |
| uint64_t inputs; |
| |
| invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5); |
| buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); |
| buffer_offset = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); |
| |
| lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx); |
| lds_base = get_tcs_in_current_patch_offset(ctx); |
| lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base); |
| |
| inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy; |
| while (inputs) { |
| unsigned i = u_bit_scan64(&inputs); |
| |
| LLVMValueRef lds_ptr = |
| LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), ""); |
| |
| LLVMValueRef buffer_addr = get_tcs_tes_buffer_address( |
| ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0)); |
| |
| LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr); |
| |
| ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0, |
| ac_glc); |
| } |
| } |
| |
| static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef rel_patch_id, |
| LLVMValueRef invocation_id, |
| LLVMValueRef tcs_out_current_patch_data_offset, |
| LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2]) |
| { |
| struct si_shader *shader = ctx->shader; |
| unsigned tess_inner_index, tess_outer_index; |
| LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer; |
| LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4]; |
| unsigned stride, outer_comps, inner_comps, i, offset; |
| |
| /* Add a barrier before loading tess factors from LDS. */ |
| if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) |
| si_llvm_emit_barrier(ctx); |
| |
| /* Do this only for invocation 0, because the tess levels are per-patch, |
| * not per-vertex. |
| * |
| * This can't jump, because invocation 0 executes this. It should |
| * at least mask out the loads and stores for other invocations. |
| */ |
| ac_build_ifcc(&ctx->ac, |
| LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503); |
| |
| /* Determine the layout of one tess factor element in the buffer. */ |
| switch (shader->key.part.tcs.epilog.prim_mode) { |
| case PIPE_PRIM_LINES: |
| stride = 2; /* 2 dwords, 1 vec2 store */ |
| outer_comps = 2; |
| inner_comps = 0; |
| break; |
| case PIPE_PRIM_TRIANGLES: |
| stride = 4; /* 4 dwords, 1 vec4 store */ |
| outer_comps = 3; |
| inner_comps = 1; |
| break; |
| case PIPE_PRIM_QUADS: |
| stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */ |
| outer_comps = 4; |
| inner_comps = 2; |
| break; |
| default: |
| assert(0); |
| return; |
| } |
| |
| for (i = 0; i < 4; i++) { |
| inner[i] = LLVMGetUndef(ctx->ac.i32); |
| outer[i] = LLVMGetUndef(ctx->ac.i32); |
| } |
| |
| if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) { |
| /* Tess factors are in VGPRs. */ |
| for (i = 0; i < outer_comps; i++) |
| outer[i] = out[i] = invoc0_tf_outer[i]; |
| for (i = 0; i < inner_comps; i++) |
| inner[i] = out[outer_comps + i] = invoc0_tf_inner[i]; |
| } else { |
| /* Load tess_inner and tess_outer from LDS. |
| * Any invocation can write them, so we can't get them from a temporary. |
| */ |
| tess_inner_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0); |
| tess_outer_index = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0); |
| |
| lds_base = tcs_out_current_patch_data_offset; |
| lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base, |
| LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), ""); |
| lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base, |
| LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), ""); |
| |
| for (i = 0; i < outer_comps; i++) { |
| outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer); |
| } |
| for (i = 0; i < inner_comps; i++) { |
| inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner); |
| } |
| } |
| |
| if (shader->key.part.tcs.epilog.prim_mode == PIPE_PRIM_LINES) { |
| /* For isolines, the hardware expects tess factors in the |
| * reverse order from what NIR specifies. |
| */ |
| LLVMValueRef tmp = out[0]; |
| out[0] = out[1]; |
| out[1] = tmp; |
| } |
| |
| /* Convert the outputs to vectors for stores. */ |
| vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4)); |
| vec1 = NULL; |
| |
| if (stride > 4) |
| vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4); |
| |
| /* Get the buffer. */ |
| buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING); |
| |
| /* Get the offset. */ |
| tf_base = ac_get_arg(&ctx->ac, ctx->tcs_factor_offset); |
| byteoffset = |
| LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), ""); |
| |
| ac_build_ifcc(&ctx->ac, |
| LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504); |
| |
| /* Store the dynamic HS control word. */ |
| offset = 0; |
| if (ctx->screen->info.chip_class <= GFX8) { |
| ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1, |
| ctx->ac.i32_0, tf_base, offset, ac_glc); |
| offset += 4; |
| } |
| |
| ac_build_endif(&ctx->ac, 6504); |
| |
| /* Store the tessellation factors. */ |
| ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, offset, |
| ac_glc); |
| offset += 16; |
| if (vec1) |
| ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, offset, |
| ac_glc); |
| |
| /* Store the tess factors into the offchip buffer if TES reads them. */ |
| if (shader->key.part.tcs.epilog.tes_reads_tess_factors) { |
| LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset; |
| LLVMValueRef tf_inner_offset; |
| unsigned param_outer, param_inner; |
| |
| buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS); |
| base = ac_get_arg(&ctx->ac, ctx->tcs_offchip_offset); |
| |
| param_outer = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSOUTER, 0); |
| tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, |
| LLVMConstInt(ctx->ac.i32, param_outer, 0)); |
| |
| unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false) |
| ? outer_comps |
| : util_next_power_of_two(outer_comps); |
| outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size); |
| |
| ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, base, 0, |
| ac_glc); |
| if (inner_comps) { |
| param_inner = si_shader_io_get_unique_index_patch(TGSI_SEMANTIC_TESSINNER, 0); |
| tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL, |
| LLVMConstInt(ctx->ac.i32, param_inner, 0)); |
| |
| inner_vec = |
| inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps); |
| ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, base, |
| 0, ac_glc); |
| } |
| } |
| |
| ac_build_endif(&ctx->ac, 6503); |
| } |
| |
| /* This only writes the tessellation factor levels. */ |
| static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, |
| LLVMValueRef *addrs) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| LLVMBuilderRef builder = ctx->ac.builder; |
| LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset; |
| |
| si_copy_tcs_inputs(ctx); |
| |
| rel_patch_id = get_rel_patch_id(ctx); |
| invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5); |
| tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx); |
| |
| if (ctx->screen->info.chip_class >= GFX9) { |
| LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block}; |
| LLVMValueRef values[2]; |
| |
| ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label); |
| |
| values[0] = rel_patch_id; |
| values[1] = LLVMGetUndef(ctx->ac.i32); |
| rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks); |
| |
| values[0] = tf_lds_offset; |
| values[1] = LLVMGetUndef(ctx->ac.i32); |
| tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks); |
| |
| values[0] = invocation_id; |
| values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */ |
| invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks); |
| } |
| |
| /* Return epilog parameters from this function. */ |
| LLVMValueRef ret = ctx->return_value; |
| unsigned vgpr; |
| |
| if (ctx->screen->info.chip_class >= GFX9) { |
| ret = |
| si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); |
| /* Tess offchip and tess factor offsets are at the beginning. */ |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4); |
| vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1; |
| } else { |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT); |
| /* Tess offchip and tess factor offsets are after user SGPRs. */ |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, GFX6_TCS_NUM_USER_SGPR); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1); |
| vgpr = GFX6_TCS_NUM_USER_SGPR + 2; |
| } |
| |
| /* VGPRs */ |
| rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id); |
| invocation_id = ac_to_float(&ctx->ac, invocation_id); |
| tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset); |
| |
| /* Leave a hole corresponding to the two input VGPRs. This ensures that |
| * the invocation_id output does not alias the tcs_rel_ids input, |
| * which saves a V_MOV on gfx9. |
| */ |
| vgpr += 2; |
| |
| ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, ""); |
| ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, ""); |
| |
| if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) { |
| vgpr++; /* skip the tess factor LDS offset */ |
| for (unsigned i = 0; i < 6; i++) { |
| LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], ""); |
| value = ac_to_float(&ctx->ac, value); |
| ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, ""); |
| } |
| } else { |
| ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, ""); |
| } |
| ctx->return_value = ret; |
| } |
| |
| /* Pass TCS inputs from LS to TCS on GFX9. */ |
| static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx) |
| { |
| LLVMValueRef ret = ctx->return_value; |
| |
| ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0); |
| ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_offset, 2); |
| ret = si_insert_input_ret(ctx, ret, ctx->merged_wave_info, 3); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_factor_offset, 4); |
| ret = si_insert_input_ret(ctx, ret, ctx->merged_scratch_offset, 5); |
| |
| ret = si_insert_input_ptr(ctx, ret, ctx->rw_buffers, 8 + SI_SGPR_RW_BUFFERS); |
| ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images, |
| 8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES); |
| |
| ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS); |
| |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS); |
| ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT); |
| |
| unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR; |
| ret = LLVMBuildInsertValue(ctx->ac.builder, ret, |
| ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)), |
| vgpr++, ""); |
| ret = LLVMBuildInsertValue(ctx->ac.builder, ret, |
| ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)), |
| vgpr++, ""); |
| ctx->return_value = ret; |
| } |
| |
| void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs) |
| { |
| struct si_shader_context *ctx = si_shader_context_from_abi(abi); |
| struct si_shader *shader = ctx->shader; |
| struct si_shader_info *info = &shader->selector->info; |
| unsigned i, chan; |
| LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->rel_auto_id); |
| LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx); |
| LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, ""); |
| |
| /* Write outputs to LDS. The next shader (TCS aka HS) will read |
| * its inputs from it. */ |
| for (i = 0; i < info->num_outputs; i++) { |
| unsigned name = info->output_semantic_name[i]; |
| unsigned index = info->output_semantic_index[i]; |
| |
| /* The ARB_shader_viewport_layer_array spec contains the |
| * following issue: |
| * |
| * 2) What happens if gl_ViewportIndex or gl_Layer is |
| * written in the vertex shader and a geometry shader is |
| * present? |
| * |
| * RESOLVED: The value written by the last vertex processing |
| * stage is used. If the last vertex processing stage |
| * (vertex, tessellation evaluation or geometry) does not |
| * statically assign to gl_ViewportIndex or gl_Layer, index |
| * or layer zero is assumed. |
| * |
| * So writes to those outputs in VS-as-LS are simply ignored. |
| */ |
| if (name == TGSI_SEMANTIC_LAYER || name == TGSI_SEMANTIC_VIEWPORT_INDEX) |
| continue; |
| |
| int param = si_shader_io_get_unique_index(name, index, false); |
| LLVMValueRef dw_addr = |
| LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), ""); |
| |
| for (chan = 0; chan < 4; chan++) { |
| if (!(info->output_usagemask[i] & (1 << chan))) |
| continue; |
| |
| lshs_lds_store(ctx, chan, dw_addr, |
| LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "")); |
| } |
| } |
| |
| if (ctx->screen->info.chip_class >= GFX9) |
| si_set_ls_return_value_for_tcs(ctx); |
| } |
| |
| /** |
| * Compile the TCS epilog function. This writes tesselation factors to memory |
| * based on the output primitive type of the tesselator (determined by TES). |
| */ |
| void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key) |
| { |
| memset(&ctx->args, 0, sizeof(ctx->args)); |
| |
| if (ctx->screen->info.chip_class >= GFX9) { |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */ |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout); |
| } else { |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_offset); |
| ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_factor_offset); |
| } |
| |
| ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */ |
| ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */ |
| struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */ |
| ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id); |
| struct ac_arg invocation_id; /* invocation ID within the patch */ |
| ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id); |
| struct ac_arg |
| tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */ |
| ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset); |
| |
| struct ac_arg tess_factors[6]; |
| for (unsigned i = 0; i < 6; i++) |
| ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]); |
| |
| /* Create the function. */ |
| si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.chip_class >= GFX7 ? 128 : 0); |
| ac_declare_lds_as_pointer(&ctx->ac); |
| |
| LLVMValueRef invoc0_tess_factors[6]; |
| for (unsigned i = 0; i < 6; i++) |
| invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]); |
| |
| si_write_tess_factors(ctx, ac_get_arg(&ctx->ac, rel_patch_id), |
| ac_get_arg(&ctx->ac, invocation_id), |
| ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset), |
| invoc0_tess_factors, invoc0_tess_factors + 4); |
| |
| LLVMBuildRetVoid(ctx->ac.builder); |
| } |
| |
| void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx) |
| { |
| ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings; |
| ctx->abi.load_tess_level = si_load_tess_level; |
| ctx->abi.store_tcs_outputs = si_nir_store_output_tcs; |
| ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue; |
| ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in; |
| } |
| |
| void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader) |
| { |
| ctx->abi.load_tess_varyings = si_nir_load_input_tes; |
| ctx->abi.load_tess_coord = si_load_tess_coord; |
| ctx->abi.load_tess_level = si_load_tess_level; |
| ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in; |
| |
| if (ctx->shader->key.as_es) |
| ctx->abi.emit_outputs = si_llvm_emit_es_epilogue; |
| else if (ngg_cull_shader) |
| ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue_4x_wave32; |
| else if (ctx->shader->key.as_ngg) |
| ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue; |
| else |
| ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue; |
| } |