| /* |
| * Copyright © 2012 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /** |
| * \file lower_ubo_reference.cpp |
| * |
| * IR lower pass to replace dereferences of variables in a uniform |
| * buffer object with usage of ir_binop_ubo_load expressions, each of |
| * which can read data up to the size of a vec4. |
| * |
| * This relieves drivers of the responsibility to deal with tricky UBO |
| * layout issues like std140 structures and row_major matrices on |
| * their own. |
| */ |
| |
| #include "lower_buffer_access.h" |
| #include "ir_builder.h" |
| #include "main/macros.h" |
| #include "glsl_parser_extras.h" |
| |
| using namespace ir_builder; |
| |
| namespace { |
| class lower_ubo_reference_visitor : |
| public lower_buffer_access::lower_buffer_access { |
| public: |
| lower_ubo_reference_visitor(struct gl_shader *shader) |
| : shader(shader) |
| { |
| } |
| |
| void handle_rvalue(ir_rvalue **rvalue); |
| ir_visitor_status visit_enter(ir_assignment *ir); |
| |
| void setup_for_load_or_store(void *mem_ctx, |
| ir_variable *var, |
| ir_rvalue *deref, |
| ir_rvalue **offset, |
| unsigned *const_offset, |
| bool *row_major, |
| int *matrix_columns, |
| unsigned packing); |
| ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, |
| ir_rvalue *offset); |
| ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, |
| ir_rvalue *offset); |
| |
| bool check_for_buffer_array_copy(ir_assignment *ir); |
| bool check_for_buffer_struct_copy(ir_assignment *ir); |
| void check_for_ssbo_store(ir_assignment *ir); |
| void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, |
| ir_variable *write_var, unsigned write_mask); |
| ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, |
| unsigned write_mask); |
| |
| enum { |
| ubo_load_access, |
| ssbo_load_access, |
| ssbo_store_access, |
| ssbo_unsized_array_length_access, |
| ssbo_atomic_access, |
| } buffer_access_type; |
| |
| void insert_buffer_access(void *mem_ctx, ir_dereference *deref, |
| const glsl_type *type, ir_rvalue *offset, |
| unsigned mask, int channel); |
| |
| ir_visitor_status visit_enter(class ir_expression *); |
| ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); |
| void check_ssbo_unsized_array_length_expression(class ir_expression *); |
| void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); |
| |
| ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, |
| ir_dereference *, |
| ir_variable *); |
| ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); |
| |
| unsigned calculate_unsized_array_stride(ir_dereference *deref, |
| unsigned packing); |
| |
| ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); |
| ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); |
| ir_visitor_status visit_enter(ir_call *ir); |
| |
| struct gl_shader *shader; |
| struct gl_uniform_buffer_variable *ubo_var; |
| ir_rvalue *uniform_block; |
| bool progress; |
| }; |
| |
| /** |
| * Determine the name of the interface block field |
| * |
| * This is the name of the specific member as it would appear in the |
| * \c gl_uniform_buffer_variable::Name field in the shader's |
| * \c UniformBlocks array. |
| */ |
| static const char * |
| interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, |
| ir_rvalue **nonconst_block_index) |
| { |
| *nonconst_block_index = NULL; |
| char *name_copy = NULL; |
| size_t base_length = 0; |
| |
| /* Loop back through the IR until we find the uniform block */ |
| ir_rvalue *ir = d; |
| while (ir != NULL) { |
| switch (ir->ir_type) { |
| case ir_type_dereference_variable: { |
| /* Exit loop */ |
| ir = NULL; |
| break; |
| } |
| |
| case ir_type_dereference_record: { |
| ir_dereference_record *r = (ir_dereference_record *) ir; |
| ir = r->record->as_dereference(); |
| |
| /* If we got here it means any previous array subscripts belong to |
| * block members and not the block itself so skip over them in the |
| * next pass. |
| */ |
| d = ir; |
| break; |
| } |
| |
| case ir_type_dereference_array: { |
| ir_dereference_array *a = (ir_dereference_array *) ir; |
| ir = a->array->as_dereference(); |
| break; |
| } |
| |
| case ir_type_swizzle: { |
| ir_swizzle *s = (ir_swizzle *) ir; |
| ir = s->val->as_dereference(); |
| /* Skip swizzle in the next pass */ |
| d = ir; |
| break; |
| } |
| |
| default: |
| assert(!"Should not get here."); |
| break; |
| } |
| } |
| |
| while (d != NULL) { |
| switch (d->ir_type) { |
| case ir_type_dereference_variable: { |
| ir_dereference_variable *v = (ir_dereference_variable *) d; |
| if (name_copy != NULL && |
| v->var->is_interface_instance() && |
| v->var->type->is_array()) { |
| return name_copy; |
| } else { |
| *nonconst_block_index = NULL; |
| return base_name; |
| } |
| |
| break; |
| } |
| |
| case ir_type_dereference_array: { |
| ir_dereference_array *a = (ir_dereference_array *) d; |
| size_t new_length; |
| |
| if (name_copy == NULL) { |
| name_copy = ralloc_strdup(mem_ctx, base_name); |
| base_length = strlen(name_copy); |
| } |
| |
| /* For arrays of arrays we start at the innermost array and work our |
| * way out so we need to insert the subscript at the base of the |
| * name string rather than just attaching it to the end. |
| */ |
| new_length = base_length; |
| ir_constant *const_index = a->array_index->as_constant(); |
| char *end = ralloc_strdup(NULL, &name_copy[new_length]); |
| if (!const_index) { |
| ir_rvalue *array_index = a->array_index; |
| if (array_index->type != glsl_type::uint_type) |
| array_index = i2u(array_index); |
| |
| if (a->array->type->is_array() && |
| a->array->type->fields.array->is_array()) { |
| ir_constant *base_size = new(mem_ctx) |
| ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); |
| array_index = mul(array_index, base_size); |
| } |
| |
| if (*nonconst_block_index) { |
| *nonconst_block_index = add(*nonconst_block_index, array_index); |
| } else { |
| *nonconst_block_index = array_index; |
| } |
| |
| ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", |
| end); |
| } else { |
| ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", |
| const_index->get_uint_component(0), |
| end); |
| } |
| ralloc_free(end); |
| |
| d = a->array->as_dereference(); |
| |
| break; |
| } |
| |
| default: |
| assert(!"Should not get here."); |
| break; |
| } |
| } |
| |
| assert(!"Should not get here."); |
| return NULL; |
| } |
| |
| void |
| lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, |
| ir_variable *var, |
| ir_rvalue *deref, |
| ir_rvalue **offset, |
| unsigned *const_offset, |
| bool *row_major, |
| int *matrix_columns, |
| unsigned packing) |
| { |
| /* Determine the name of the interface block */ |
| ir_rvalue *nonconst_block_index; |
| const char *const field_name = |
| interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, |
| deref, &nonconst_block_index); |
| |
| /* Locate the block by interface name */ |
| unsigned num_blocks; |
| struct gl_uniform_block **blocks; |
| if (this->buffer_access_type != ubo_load_access) { |
| num_blocks = shader->NumShaderStorageBlocks; |
| blocks = shader->ShaderStorageBlocks; |
| } else { |
| num_blocks = shader->NumUniformBlocks; |
| blocks = shader->UniformBlocks; |
| } |
| this->uniform_block = NULL; |
| for (unsigned i = 0; i < num_blocks; i++) { |
| if (strcmp(field_name, blocks[i]->Name) == 0) { |
| |
| ir_constant *index = new(mem_ctx) ir_constant(i); |
| |
| if (nonconst_block_index) { |
| this->uniform_block = add(nonconst_block_index, index); |
| } else { |
| this->uniform_block = index; |
| } |
| |
| this->ubo_var = var->is_interface_instance() |
| ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location]; |
| |
| break; |
| } |
| } |
| |
| assert(this->uniform_block); |
| |
| *const_offset = ubo_var->Offset; |
| |
| setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major, |
| matrix_columns, packing); |
| } |
| |
| void |
| lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) |
| { |
| if (!*rvalue) |
| return; |
| |
| ir_dereference *deref = (*rvalue)->as_dereference(); |
| if (!deref) |
| return; |
| |
| ir_variable *var = deref->variable_referenced(); |
| if (!var || !var->is_in_buffer_block()) |
| return; |
| |
| void *mem_ctx = ralloc_parent(shader->ir); |
| |
| ir_rvalue *offset = NULL; |
| unsigned const_offset; |
| bool row_major; |
| int matrix_columns; |
| unsigned packing = var->get_interface_type()->interface_packing; |
| |
| this->buffer_access_type = |
| var->is_in_shader_storage_block() ? |
| ssbo_load_access : ubo_load_access; |
| |
| /* Compute the offset to the start if the dereference as well as other |
| * information we need to configure the write |
| */ |
| setup_for_load_or_store(mem_ctx, var, deref, |
| &offset, &const_offset, |
| &row_major, &matrix_columns, |
| packing); |
| assert(offset); |
| |
| /* Now that we've calculated the offset to the start of the |
| * dereference, walk over the type and emit loads into a temporary. |
| */ |
| const glsl_type *type = (*rvalue)->type; |
| ir_variable *load_var = new(mem_ctx) ir_variable(type, |
| "ubo_load_temp", |
| ir_var_temporary); |
| base_ir->insert_before(load_var); |
| |
| ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, |
| "ubo_load_temp_offset", |
| ir_var_temporary); |
| base_ir->insert_before(load_offset); |
| base_ir->insert_before(assign(load_offset, offset)); |
| |
| deref = new(mem_ctx) ir_dereference_variable(load_var); |
| emit_access(mem_ctx, false, deref, load_offset, const_offset, |
| row_major, matrix_columns, packing, 0); |
| *rvalue = deref; |
| |
| progress = true; |
| } |
| |
| ir_expression * |
| lower_ubo_reference_visitor::ubo_load(void *mem_ctx, |
| const glsl_type *type, |
| ir_rvalue *offset) |
| { |
| ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); |
| return new(mem_ctx) |
| ir_expression(ir_binop_ubo_load, |
| type, |
| block_ref, |
| offset); |
| |
| } |
| |
| static bool |
| shader_storage_buffer_object(const _mesa_glsl_parse_state *state) |
| { |
| return state->ARB_shader_storage_buffer_object_enable; |
| } |
| |
| ir_call * |
| lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, |
| ir_rvalue *deref, |
| ir_rvalue *offset, |
| unsigned write_mask) |
| { |
| exec_list sig_params; |
| |
| ir_variable *block_ref = new(mem_ctx) |
| ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); |
| sig_params.push_tail(block_ref); |
| |
| ir_variable *offset_ref = new(mem_ctx) |
| ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); |
| sig_params.push_tail(offset_ref); |
| |
| ir_variable *val_ref = new(mem_ctx) |
| ir_variable(deref->type, "value" , ir_var_function_in); |
| sig_params.push_tail(val_ref); |
| |
| ir_variable *writemask_ref = new(mem_ctx) |
| ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); |
| sig_params.push_tail(writemask_ref); |
| |
| ir_function_signature *sig = new(mem_ctx) |
| ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); |
| assert(sig); |
| sig->replace_parameters(&sig_params); |
| sig->is_intrinsic = true; |
| |
| ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); |
| f->add_signature(sig); |
| |
| exec_list call_params; |
| call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); |
| call_params.push_tail(offset->clone(mem_ctx, NULL)); |
| call_params.push_tail(deref->clone(mem_ctx, NULL)); |
| call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); |
| return new(mem_ctx) ir_call(sig, NULL, &call_params); |
| } |
| |
| ir_call * |
| lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, |
| const struct glsl_type *type, |
| ir_rvalue *offset) |
| { |
| exec_list sig_params; |
| |
| ir_variable *block_ref = new(mem_ctx) |
| ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); |
| sig_params.push_tail(block_ref); |
| |
| ir_variable *offset_ref = new(mem_ctx) |
| ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); |
| sig_params.push_tail(offset_ref); |
| |
| ir_function_signature *sig = |
| new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); |
| assert(sig); |
| sig->replace_parameters(&sig_params); |
| sig->is_intrinsic = true; |
| |
| ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); |
| f->add_signature(sig); |
| |
| ir_variable *result = new(mem_ctx) |
| ir_variable(type, "ssbo_load_result", ir_var_temporary); |
| base_ir->insert_before(result); |
| ir_dereference_variable *deref_result = new(mem_ctx) |
| ir_dereference_variable(result); |
| |
| exec_list call_params; |
| call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); |
| call_params.push_tail(offset->clone(mem_ctx, NULL)); |
| |
| return new(mem_ctx) ir_call(sig, deref_result, &call_params); |
| } |
| |
| void |
| lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, |
| ir_dereference *deref, |
| const glsl_type *type, |
| ir_rvalue *offset, |
| unsigned mask, |
| int channel) |
| { |
| switch (this->buffer_access_type) { |
| case ubo_load_access: |
| base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), |
| ubo_load(mem_ctx, type, offset), |
| mask)); |
| break; |
| case ssbo_load_access: { |
| ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); |
| base_ir->insert_before(load_ssbo); |
| ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); |
| ir_assignment *assignment = |
| assign(deref->clone(mem_ctx, NULL), value, mask); |
| base_ir->insert_before(assignment); |
| break; |
| } |
| case ssbo_store_access: |
| if (channel >= 0) { |
| base_ir->insert_after(ssbo_store(mem_ctx, |
| swizzle(deref, channel, 1), |
| offset, 1)); |
| } else { |
| base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); |
| } |
| break; |
| default: |
| unreachable("invalid buffer_access_type in insert_buffer_access"); |
| } |
| } |
| |
| void |
| lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, |
| ir_dereference *deref, |
| ir_variable *var, |
| ir_variable *write_var, |
| unsigned write_mask) |
| { |
| ir_rvalue *offset = NULL; |
| unsigned const_offset; |
| bool row_major; |
| int matrix_columns; |
| unsigned packing = var->get_interface_type()->interface_packing; |
| |
| this->buffer_access_type = ssbo_store_access; |
| |
| /* Compute the offset to the start if the dereference as well as other |
| * information we need to configure the write |
| */ |
| setup_for_load_or_store(mem_ctx, var, deref, |
| &offset, &const_offset, |
| &row_major, &matrix_columns, |
| packing); |
| assert(offset); |
| |
| /* Now emit writes from the temporary to memory */ |
| ir_variable *write_offset = |
| new(mem_ctx) ir_variable(glsl_type::uint_type, |
| "ssbo_store_temp_offset", |
| ir_var_temporary); |
| |
| base_ir->insert_before(write_offset); |
| base_ir->insert_before(assign(write_offset, offset)); |
| |
| deref = new(mem_ctx) ir_dereference_variable(write_var); |
| emit_access(mem_ctx, true, deref, write_offset, const_offset, |
| row_major, matrix_columns, packing, write_mask); |
| } |
| |
| ir_visitor_status |
| lower_ubo_reference_visitor::visit_enter(ir_expression *ir) |
| { |
| check_ssbo_unsized_array_length_expression(ir); |
| return rvalue_visit(ir); |
| } |
| |
| ir_expression * |
| lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) |
| { |
| if (expr->operation != |
| ir_expression_operation(ir_unop_ssbo_unsized_array_length)) |
| return NULL; |
| |
| ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); |
| if (!rvalue || |
| !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) |
| return NULL; |
| |
| ir_dereference *deref = expr->operands[0]->as_dereference(); |
| if (!deref) |
| return NULL; |
| |
| ir_variable *var = expr->operands[0]->variable_referenced(); |
| if (!var || !var->is_in_shader_storage_block()) |
| return NULL; |
| return process_ssbo_unsized_array_length(&rvalue, deref, var); |
| } |
| |
| void |
| lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) |
| { |
| if (ir->operation == |
| ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { |
| /* Don't replace this unop if it is found alone. It is going to be |
| * removed by the optimization passes or replaced if it is part of |
| * an ir_assignment or another ir_expression. |
| */ |
| return; |
| } |
| |
| for (unsigned i = 0; i < ir->get_num_operands(); i++) { |
| if (ir->operands[i]->ir_type != ir_type_expression) |
| continue; |
| ir_expression *expr = (ir_expression *) ir->operands[i]; |
| ir_expression *temp = calculate_ssbo_unsized_array_length(expr); |
| if (!temp) |
| continue; |
| |
| delete expr; |
| ir->operands[i] = temp; |
| } |
| } |
| |
| void |
| lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) |
| { |
| if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) |
| return; |
| |
| ir_expression *expr = (ir_expression *) ir->rhs; |
| ir_expression *temp = calculate_ssbo_unsized_array_length(expr); |
| if (!temp) |
| return; |
| |
| delete expr; |
| ir->rhs = temp; |
| return; |
| } |
| |
| ir_expression * |
| lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) |
| { |
| ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); |
| return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, |
| glsl_type::int_type, |
| block_ref); |
| } |
| |
| unsigned |
| lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, |
| unsigned packing) |
| { |
| unsigned array_stride = 0; |
| |
| switch (deref->ir_type) { |
| case ir_type_dereference_variable: |
| { |
| ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; |
| const struct glsl_type *unsized_array_type = NULL; |
| /* An unsized array can be sized by other lowering passes, so pick |
| * the first field of the array which has the data type of the unsized |
| * array. |
| */ |
| unsized_array_type = deref_var->var->type->fields.array; |
| |
| /* Whether or not the field is row-major (because it might be a |
| * bvec2 or something) does not affect the array itself. We need |
| * to know whether an array element in its entirety is row-major. |
| */ |
| const bool array_row_major = |
| is_dereferenced_thing_row_major(deref_var); |
| |
| if (packing == GLSL_INTERFACE_PACKING_STD430) { |
| array_stride = unsized_array_type->std430_array_stride(array_row_major); |
| } else { |
| array_stride = unsized_array_type->std140_size(array_row_major); |
| array_stride = glsl_align(array_stride, 16); |
| } |
| break; |
| } |
| case ir_type_dereference_record: |
| { |
| ir_dereference_record *deref_record = (ir_dereference_record *) deref; |
| ir_dereference *interface_deref = |
| deref_record->record->as_dereference(); |
| assert(interface_deref != NULL); |
| const struct glsl_type *interface_type = interface_deref->type; |
| unsigned record_length = interface_type->length; |
| /* Unsized array is always the last element of the interface */ |
| const struct glsl_type *unsized_array_type = |
| interface_type->fields.structure[record_length - 1].type->fields.array; |
| |
| const bool array_row_major = |
| is_dereferenced_thing_row_major(deref_record); |
| |
| if (packing == GLSL_INTERFACE_PACKING_STD430) { |
| array_stride = unsized_array_type->std430_array_stride(array_row_major); |
| } else { |
| array_stride = unsized_array_type->std140_size(array_row_major); |
| array_stride = glsl_align(array_stride, 16); |
| } |
| break; |
| } |
| default: |
| unreachable("Unsupported dereference type"); |
| } |
| return array_stride; |
| } |
| |
| ir_expression * |
| lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, |
| ir_dereference *deref, |
| ir_variable *var) |
| { |
| void *mem_ctx = ralloc_parent(*rvalue); |
| |
| ir_rvalue *base_offset = NULL; |
| unsigned const_offset; |
| bool row_major; |
| int matrix_columns; |
| unsigned packing = var->get_interface_type()->interface_packing; |
| int unsized_array_stride = calculate_unsized_array_stride(deref, packing); |
| |
| this->buffer_access_type = ssbo_unsized_array_length_access; |
| |
| /* Compute the offset to the start if the dereference as well as other |
| * information we need to calculate the length. |
| */ |
| setup_for_load_or_store(mem_ctx, var, deref, |
| &base_offset, &const_offset, |
| &row_major, &matrix_columns, |
| packing); |
| /* array.length() = |
| * max((buffer_object_size - offset_of_array) / stride_of_array, 0) |
| */ |
| ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); |
| |
| ir_expression *offset_of_array = new(mem_ctx) |
| ir_expression(ir_binop_add, base_offset, |
| new(mem_ctx) ir_constant(const_offset)); |
| ir_expression *offset_of_array_int = new(mem_ctx) |
| ir_expression(ir_unop_u2i, offset_of_array); |
| |
| ir_expression *sub = new(mem_ctx) |
| ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); |
| ir_expression *div = new(mem_ctx) |
| ir_expression(ir_binop_div, sub, |
| new(mem_ctx) ir_constant(unsized_array_stride)); |
| ir_expression *max = new(mem_ctx) |
| ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); |
| |
| return max; |
| } |
| |
| void |
| lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) |
| { |
| if (!ir || !ir->lhs) |
| return; |
| |
| ir_rvalue *rvalue = ir->lhs->as_rvalue(); |
| if (!rvalue) |
| return; |
| |
| ir_dereference *deref = ir->lhs->as_dereference(); |
| if (!deref) |
| return; |
| |
| ir_variable *var = ir->lhs->variable_referenced(); |
| if (!var || !var->is_in_shader_storage_block()) |
| return; |
| |
| /* We have a write to a buffer variable, so declare a temporary and rewrite |
| * the assignment so that the temporary is the LHS. |
| */ |
| void *mem_ctx = ralloc_parent(shader->ir); |
| |
| const glsl_type *type = rvalue->type; |
| ir_variable *write_var = new(mem_ctx) ir_variable(type, |
| "ssbo_store_temp", |
| ir_var_temporary); |
| base_ir->insert_before(write_var); |
| ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); |
| |
| /* Now we have to write the value assigned to the temporary back to memory */ |
| write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); |
| progress = true; |
| } |
| |
| static bool |
| is_buffer_backed_variable(ir_variable *var) |
| { |
| return var->is_in_buffer_block() || |
| var->data.mode == ir_var_shader_shared; |
| } |
| |
| bool |
| lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) |
| { |
| if (!ir || !ir->lhs || !ir->rhs) |
| return false; |
| |
| /* LHS and RHS must be arrays |
| * FIXME: arrays of arrays? |
| */ |
| if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) |
| return false; |
| |
| /* RHS must be a buffer-backed variable. This is what can cause the problem |
| * since it would lead to a series of loads that need to live until we |
| * see the writes to the LHS. |
| */ |
| ir_variable *rhs_var = ir->rhs->variable_referenced(); |
| if (!rhs_var || !is_buffer_backed_variable(rhs_var)) |
| return false; |
| |
| /* Split the array copy into individual element copies to reduce |
| * register pressure |
| */ |
| ir_dereference *rhs_deref = ir->rhs->as_dereference(); |
| if (!rhs_deref) |
| return false; |
| |
| ir_dereference *lhs_deref = ir->lhs->as_dereference(); |
| if (!lhs_deref) |
| return false; |
| |
| assert(lhs_deref->type->length == rhs_deref->type->length); |
| void *mem_ctx = ralloc_parent(shader->ir); |
| |
| for (unsigned i = 0; i < lhs_deref->type->length; i++) { |
| ir_dereference *lhs_i = |
| new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), |
| new(mem_ctx) ir_constant(i)); |
| |
| ir_dereference *rhs_i = |
| new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), |
| new(mem_ctx) ir_constant(i)); |
| ir->insert_after(assign(lhs_i, rhs_i)); |
| } |
| |
| ir->remove(); |
| progress = true; |
| return true; |
| } |
| |
| bool |
| lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) |
| { |
| if (!ir || !ir->lhs || !ir->rhs) |
| return false; |
| |
| /* LHS and RHS must be records */ |
| if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record()) |
| return false; |
| |
| /* RHS must be a buffer-backed variable. This is what can cause the problem |
| * since it would lead to a series of loads that need to live until we |
| * see the writes to the LHS. |
| */ |
| ir_variable *rhs_var = ir->rhs->variable_referenced(); |
| if (!rhs_var || !is_buffer_backed_variable(rhs_var)) |
| return false; |
| |
| /* Split the struct copy into individual element copies to reduce |
| * register pressure |
| */ |
| ir_dereference *rhs_deref = ir->rhs->as_dereference(); |
| if (!rhs_deref) |
| return false; |
| |
| ir_dereference *lhs_deref = ir->lhs->as_dereference(); |
| if (!lhs_deref) |
| return false; |
| |
| assert(lhs_deref->type->record_compare(rhs_deref->type)); |
| void *mem_ctx = ralloc_parent(shader->ir); |
| |
| for (unsigned i = 0; i < lhs_deref->type->length; i++) { |
| const char *field_name = lhs_deref->type->fields.structure[i].name; |
| ir_dereference *lhs_field = |
| new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), |
| field_name); |
| ir_dereference *rhs_field = |
| new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), |
| field_name); |
| ir->insert_after(assign(lhs_field, rhs_field)); |
| } |
| |
| ir->remove(); |
| progress = true; |
| return true; |
| } |
| |
| ir_visitor_status |
| lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) |
| { |
| /* Array and struct copies could involve large amounts of load/store |
| * operations. To improve register pressure we want to special-case |
| * these and split them into individual element copies. |
| * This way we avoid emitting all the loads for the RHS first and |
| * all the writes for the LHS second and register usage is more |
| * efficient. |
| */ |
| if (check_for_buffer_array_copy(ir)) |
| return visit_continue_with_parent; |
| |
| if (check_for_buffer_struct_copy(ir)) |
| return visit_continue_with_parent; |
| |
| check_ssbo_unsized_array_length_assignment(ir); |
| check_for_ssbo_store(ir); |
| return rvalue_visit(ir); |
| } |
| |
| /* Lowers the intrinsic call to a new internal intrinsic that swaps the |
| * access to the buffer variable in the first parameter by an offset |
| * and block index. This involves creating the new internal intrinsic |
| * (i.e. the new function signature). |
| */ |
| ir_call * |
| lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) |
| { |
| /* SSBO atomics usually have 2 parameters, the buffer variable and an |
| * integer argument. The exception is CompSwap, that has an additional |
| * integer parameter. |
| */ |
| int param_count = ir->actual_parameters.length(); |
| assert(param_count == 2 || param_count == 3); |
| |
| /* First argument must be a scalar integer buffer variable */ |
| exec_node *param = ir->actual_parameters.get_head(); |
| ir_instruction *inst = (ir_instruction *) param; |
| assert(inst->ir_type == ir_type_dereference_variable || |
| inst->ir_type == ir_type_dereference_array || |
| inst->ir_type == ir_type_dereference_record || |
| inst->ir_type == ir_type_swizzle); |
| |
| ir_rvalue *deref = (ir_rvalue *) inst; |
| assert(deref->type->is_scalar() && deref->type->is_integer()); |
| |
| ir_variable *var = deref->variable_referenced(); |
| assert(var); |
| |
| /* Compute the offset to the start if the dereference and the |
| * block index |
| */ |
| void *mem_ctx = ralloc_parent(shader->ir); |
| |
| ir_rvalue *offset = NULL; |
| unsigned const_offset; |
| bool row_major; |
| int matrix_columns; |
| unsigned packing = var->get_interface_type()->interface_packing; |
| |
| this->buffer_access_type = ssbo_atomic_access; |
| |
| setup_for_load_or_store(mem_ctx, var, deref, |
| &offset, &const_offset, |
| &row_major, &matrix_columns, |
| packing); |
| assert(offset); |
| assert(!row_major); |
| assert(matrix_columns == 1); |
| |
| ir_rvalue *deref_offset = |
| add(offset, new(mem_ctx) ir_constant(const_offset)); |
| ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); |
| |
| /* Create the new internal function signature that will take a block |
| * index and offset instead of a buffer variable |
| */ |
| exec_list sig_params; |
| ir_variable *sig_param = new(mem_ctx) |
| ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); |
| sig_params.push_tail(sig_param); |
| |
| sig_param = new(mem_ctx) |
| ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); |
| sig_params.push_tail(sig_param); |
| |
| const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? |
| glsl_type::int_type : glsl_type::uint_type; |
| sig_param = new(mem_ctx) |
| ir_variable(type, "data1", ir_var_function_in); |
| sig_params.push_tail(sig_param); |
| |
| if (param_count == 3) { |
| sig_param = new(mem_ctx) |
| ir_variable(type, "data2", ir_var_function_in); |
| sig_params.push_tail(sig_param); |
| } |
| |
| ir_function_signature *sig = |
| new(mem_ctx) ir_function_signature(deref->type, |
| shader_storage_buffer_object); |
| assert(sig); |
| sig->replace_parameters(&sig_params); |
| sig->is_intrinsic = true; |
| |
| char func_name[64]; |
| sprintf(func_name, "%s_ssbo", ir->callee_name()); |
| ir_function *f = new(mem_ctx) ir_function(func_name); |
| f->add_signature(sig); |
| |
| /* Now, create the call to the internal intrinsic */ |
| exec_list call_params; |
| call_params.push_tail(block_index); |
| call_params.push_tail(deref_offset); |
| param = ir->actual_parameters.get_head()->get_next(); |
| ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); |
| call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); |
| if (param_count == 3) { |
| param = param->get_next(); |
| param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); |
| call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); |
| } |
| ir_dereference_variable *return_deref = |
| ir->return_deref->clone(mem_ctx, NULL); |
| return new(mem_ctx) ir_call(sig, return_deref, &call_params); |
| } |
| |
| ir_call * |
| lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) |
| { |
| exec_list& params = ir->actual_parameters; |
| |
| if (params.length() < 2 || params.length() > 3) |
| return ir; |
| |
| ir_rvalue *rvalue = |
| ((ir_instruction *) params.get_head())->as_rvalue(); |
| if (!rvalue) |
| return ir; |
| |
| ir_variable *var = rvalue->variable_referenced(); |
| if (!var || !var->is_in_shader_storage_block()) |
| return ir; |
| |
| const char *callee = ir->callee_name(); |
| if (!strcmp("__intrinsic_atomic_add", callee) || |
| !strcmp("__intrinsic_atomic_min", callee) || |
| !strcmp("__intrinsic_atomic_max", callee) || |
| !strcmp("__intrinsic_atomic_and", callee) || |
| !strcmp("__intrinsic_atomic_or", callee) || |
| !strcmp("__intrinsic_atomic_xor", callee) || |
| !strcmp("__intrinsic_atomic_exchange", callee) || |
| !strcmp("__intrinsic_atomic_comp_swap", callee)) { |
| return lower_ssbo_atomic_intrinsic(ir); |
| } |
| |
| return ir; |
| } |
| |
| |
| ir_visitor_status |
| lower_ubo_reference_visitor::visit_enter(ir_call *ir) |
| { |
| ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); |
| if (new_ir != ir) { |
| progress = true; |
| base_ir->replace_with(new_ir); |
| return visit_continue_with_parent; |
| } |
| |
| return rvalue_visit(ir); |
| } |
| |
| |
| } /* unnamed namespace */ |
| |
| void |
| lower_ubo_reference(struct gl_shader *shader) |
| { |
| lower_ubo_reference_visitor v(shader); |
| |
| /* Loop over the instructions lowering references, because we take |
| * a deref of a UBO array using a UBO dereference as the index will |
| * produce a collection of instructions all of which have cloned |
| * UBO dereferences for that array index. |
| */ |
| do { |
| v.progress = false; |
| visit_list_elements(&v, shader->ir); |
| } while (v.progress); |
| } |