| /* |
| * Copyright © 2016 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| /** |
| * \file lower_int64.cpp |
| * |
| * Lower 64-bit operations to 32-bit operations. Each 64-bit value is lowered |
| * to a uvec2. For each operation that can be lowered, there is a function |
| * called __builtin_foo with the same number of parameters that takes uvec2 |
| * sources and produces uvec2 results. An operation like |
| * |
| * uint64_t(x) * uint64_t(y) |
| * |
| * becomes |
| * |
| * packUint2x32(__builtin_umul64(unpackUint2x32(x), unpackUint2x32(y))); |
| */ |
| |
| #include "main/macros.h" |
| #include "compiler/glsl_types.h" |
| #include "ir.h" |
| #include "ir_rvalue_visitor.h" |
| #include "ir_builder.h" |
| #include "ir_optimization.h" |
| #include "util/hash_table.h" |
| #include "builtin_functions.h" |
| |
| typedef ir_function_signature *(*function_generator)(void *mem_ctx, |
| builtin_available_predicate avail); |
| |
| using namespace ir_builder; |
| |
| namespace lower_64bit { |
| void expand_source(ir_factory &, ir_rvalue *val, ir_variable **expanded_src); |
| |
| ir_dereference_variable *compact_destination(ir_factory &, |
| const glsl_type *type, |
| ir_variable *result[4]); |
| |
| ir_rvalue *lower_op_to_function_call(ir_instruction *base_ir, |
| ir_expression *ir, |
| ir_function_signature *callee); |
| }; |
| |
| using namespace lower_64bit; |
| |
| namespace { |
| |
| class lower_64bit_visitor : public ir_rvalue_visitor { |
| public: |
| lower_64bit_visitor(void *mem_ctx, exec_list *instructions, unsigned lower) |
| : progress(false), lower(lower), |
| function_list(), added_functions(&function_list, mem_ctx) |
| { |
| functions = _mesa_hash_table_create(mem_ctx, |
| _mesa_hash_string, |
| _mesa_key_string_equal); |
| |
| foreach_in_list(ir_instruction, node, instructions) { |
| ir_function *const f = node->as_function(); |
| |
| if (f == NULL || strncmp(f->name, "__builtin_", 10) != 0) |
| continue; |
| |
| add_function(f); |
| } |
| } |
| |
| ~lower_64bit_visitor() |
| { |
| _mesa_hash_table_destroy(functions, NULL); |
| } |
| |
| void handle_rvalue(ir_rvalue **rvalue); |
| |
| void add_function(ir_function *f) |
| { |
| _mesa_hash_table_insert(functions, f->name, f); |
| } |
| |
| ir_function *find_function(const char *name) |
| { |
| struct hash_entry *const entry = |
| _mesa_hash_table_search(functions, name); |
| |
| return entry != NULL ? (ir_function *) entry->data : NULL; |
| } |
| |
| bool progress; |
| |
| private: |
| unsigned lower; /** Bitfield of which operations to lower */ |
| |
| /** Hashtable containing all of the known functions in the IR */ |
| struct hash_table *functions; |
| |
| public: |
| exec_list function_list; |
| |
| private: |
| ir_factory added_functions; |
| |
| ir_rvalue *handle_op(ir_expression *ir, const char *function_name, |
| function_generator generator); |
| }; |
| |
| } /* anonymous namespace */ |
| |
| /** |
| * Determine if a particular type of lowering should occur |
| */ |
| #define lowering(x) (this->lower & x) |
| |
| bool |
| lower_64bit_integer_instructions(exec_list *instructions, |
| unsigned what_to_lower) |
| { |
| if (instructions->is_empty()) |
| return false; |
| |
| ir_instruction *first_inst = (ir_instruction *) instructions->get_head_raw(); |
| void *const mem_ctx = ralloc_parent(first_inst); |
| lower_64bit_visitor v(mem_ctx, instructions, what_to_lower); |
| |
| visit_list_elements(&v, instructions); |
| |
| if (v.progress && !v.function_list.is_empty()) { |
| /* Move all of the nodes from function_list to the head if the incoming |
| * instruction list. |
| */ |
| exec_node *const after = &instructions->head_sentinel; |
| exec_node *const before = instructions->head_sentinel.next; |
| exec_node *const head = v.function_list.head_sentinel.next; |
| exec_node *const tail = v.function_list.tail_sentinel.prev; |
| |
| before->next = head; |
| head->prev = before; |
| |
| after->prev = tail; |
| tail->next = after; |
| } |
| |
| return v.progress; |
| } |
| |
| |
| /** |
| * Expand individual 64-bit values to uvec2 values |
| * |
| * Each operation is in one of a few forms. |
| * |
| * vector op vector |
| * vector op scalar |
| * scalar op vector |
| * scalar op scalar |
| * |
| * In the 'vector op vector' case, the two vectors must have the same size. |
| * In a way, the 'scalar op scalar' form is special case of the 'vector op |
| * vector' form. |
| * |
| * This method generates a new set of uvec2 values for each element of a |
| * single operand. If the operand is a scalar, the uvec2 is replicated |
| * multiple times. A value like |
| * |
| * u64vec3(a) + u64vec3(b) |
| * |
| * becomes |
| * |
| * u64vec3 tmp0 = u64vec3(a) + u64vec3(b); |
| * uvec2 tmp1 = unpackUint2x32(tmp0.x); |
| * uvec2 tmp2 = unpackUint2x32(tmp0.y); |
| * uvec2 tmp3 = unpackUint2x32(tmp0.z); |
| * |
| * and the returned operands array contains ir_variable pointers to |
| * |
| * { tmp1, tmp2, tmp3, tmp1 } |
| */ |
| void |
| lower_64bit::expand_source(ir_factory &body, |
| ir_rvalue *val, |
| ir_variable **expanded_src) |
| { |
| assert(val->type->is_integer_64()); |
| |
| ir_variable *const temp = body.make_temp(val->type, "tmp"); |
| |
| body.emit(assign(temp, val)); |
| |
| const ir_expression_operation unpack_opcode = |
| val->type->base_type == GLSL_TYPE_UINT64 |
| ? ir_unop_unpack_uint_2x32 : ir_unop_unpack_int_2x32; |
| |
| const glsl_type *const type = |
| val->type->base_type == GLSL_TYPE_UINT64 |
| ? glsl_type::uvec2_type : glsl_type::ivec2_type; |
| |
| unsigned i; |
| for (i = 0; i < val->type->vector_elements; i++) { |
| expanded_src[i] = body.make_temp(type, "expanded_64bit_source"); |
| |
| body.emit(assign(expanded_src[i], |
| expr(unpack_opcode, swizzle(temp, i, 1)))); |
| } |
| |
| for (/* empty */; i < 4; i++) |
| expanded_src[i] = expanded_src[0]; |
| } |
| |
| /** |
| * Convert a series of uvec2 results into a single 64-bit integer vector |
| */ |
| ir_dereference_variable * |
| lower_64bit::compact_destination(ir_factory &body, |
| const glsl_type *type, |
| ir_variable *result[4]) |
| { |
| const ir_expression_operation pack_opcode = |
| type->base_type == GLSL_TYPE_UINT64 |
| ? ir_unop_pack_uint_2x32 : ir_unop_pack_int_2x32; |
| |
| ir_variable *const compacted_result = |
| body.make_temp(type, "compacted_64bit_result"); |
| |
| for (unsigned i = 0; i < type->vector_elements; i++) { |
| body.emit(assign(compacted_result, |
| expr(pack_opcode, result[i]), |
| 1U << i)); |
| } |
| |
| void *const mem_ctx = ralloc_parent(compacted_result); |
| return new(mem_ctx) ir_dereference_variable(compacted_result); |
| } |
| |
| ir_rvalue * |
| lower_64bit::lower_op_to_function_call(ir_instruction *base_ir, |
| ir_expression *ir, |
| ir_function_signature *callee) |
| { |
| const unsigned num_operands = ir->num_operands; |
| ir_variable *src[4][4]; |
| ir_variable *dst[4]; |
| void *const mem_ctx = ralloc_parent(ir); |
| exec_list instructions; |
| unsigned source_components = 0; |
| const glsl_type *const result_type = |
| ir->type->base_type == GLSL_TYPE_UINT64 |
| ? glsl_type::uvec2_type : glsl_type::ivec2_type; |
| |
| ir_factory body(&instructions, mem_ctx); |
| |
| for (unsigned i = 0; i < num_operands; i++) { |
| expand_source(body, ir->operands[i], src[i]); |
| |
| if (ir->operands[i]->type->vector_elements > source_components) |
| source_components = ir->operands[i]->type->vector_elements; |
| } |
| |
| for (unsigned i = 0; i < source_components; i++) { |
| dst[i] = body.make_temp(result_type, "expanded_64bit_result"); |
| |
| exec_list parameters; |
| |
| for (unsigned j = 0; j < num_operands; j++) |
| parameters.push_tail(new(mem_ctx) ir_dereference_variable(src[j][i])); |
| |
| ir_dereference_variable *const return_deref = |
| new(mem_ctx) ir_dereference_variable(dst[i]); |
| |
| ir_call *const c = new(mem_ctx) ir_call(callee, |
| return_deref, |
| ¶meters); |
| |
| body.emit(c); |
| } |
| |
| ir_rvalue *const rv = compact_destination(body, ir->type, dst); |
| |
| /* Move all of the nodes from instructions between base_ir and the |
| * instruction before it. |
| */ |
| exec_node *const after = base_ir; |
| exec_node *const before = after->prev; |
| exec_node *const head = instructions.head_sentinel.next; |
| exec_node *const tail = instructions.tail_sentinel.prev; |
| |
| before->next = head; |
| head->prev = before; |
| |
| after->prev = tail; |
| tail->next = after; |
| |
| return rv; |
| } |
| |
| ir_rvalue * |
| lower_64bit_visitor::handle_op(ir_expression *ir, |
| const char *function_name, |
| function_generator generator) |
| { |
| for (unsigned i = 0; i < ir->num_operands; i++) |
| if (!ir->operands[i]->type->is_integer_64()) |
| return ir; |
| |
| /* Get a handle to the correct ir_function_signature for the core |
| * operation. |
| */ |
| ir_function_signature *callee = NULL; |
| ir_function *f = find_function(function_name); |
| |
| if (f != NULL) { |
| callee = (ir_function_signature *) f->signatures.get_head(); |
| assert(callee != NULL && callee->ir_type == ir_type_function_signature); |
| } else { |
| f = new(base_ir) ir_function(function_name); |
| callee = generator(base_ir, NULL); |
| |
| f->add_signature(callee); |
| |
| add_function(f); |
| } |
| |
| this->progress = true; |
| return lower_op_to_function_call(this->base_ir, ir, callee); |
| } |
| |
| void |
| lower_64bit_visitor::handle_rvalue(ir_rvalue **rvalue) |
| { |
| if (*rvalue == NULL || (*rvalue)->ir_type != ir_type_expression) |
| return; |
| |
| ir_expression *const ir = (*rvalue)->as_expression(); |
| assert(ir != NULL); |
| |
| switch (ir->operation) { |
| case ir_unop_sign: |
| if (lowering(SIGN64)) { |
| *rvalue = handle_op(ir, "__builtin_sign64", generate_ir::sign64); |
| } |
| break; |
| |
| case ir_binop_div: |
| if (lowering(DIV64)) { |
| if (ir->type->base_type == GLSL_TYPE_UINT64) { |
| *rvalue = handle_op(ir, "__builtin_udiv64", generate_ir::udiv64); |
| } else { |
| *rvalue = handle_op(ir, "__builtin_idiv64", generate_ir::idiv64); |
| } |
| } |
| break; |
| |
| case ir_binop_mod: |
| if (lowering(MOD64)) { |
| if (ir->type->base_type == GLSL_TYPE_UINT64) { |
| *rvalue = handle_op(ir, "__builtin_umod64", generate_ir::umod64); |
| } else { |
| *rvalue = handle_op(ir, "__builtin_imod64", generate_ir::imod64); |
| } |
| } |
| break; |
| |
| case ir_binop_mul: |
| if (lowering(MUL64)) { |
| *rvalue = handle_op(ir, "__builtin_umul64", generate_ir::umul64); |
| } |
| break; |
| |
| default: |
| break; |
| } |
| } |