| /* |
| * Copyright © 2012 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| #include "ir.h" |
| #include "ir_builder.h" |
| #include "ir_optimization.h" |
| #include "ir_rvalue_visitor.h" |
| |
| namespace { |
| |
| using namespace ir_builder; |
| |
| /** |
| * A visitor that lowers built-in floating-point pack/unpack expressions |
| * such packSnorm2x16. |
| */ |
| class lower_packing_builtins_visitor : public ir_rvalue_visitor { |
| public: |
| /** |
| * \param op_mask is a bitmask of `enum lower_packing_builtins_op` |
| */ |
| explicit lower_packing_builtins_visitor(int op_mask) |
| : op_mask(op_mask), |
| progress(false) |
| { |
| /* Mutually exclusive options. */ |
| assert(!((op_mask & LOWER_PACK_HALF_2x16) && |
| (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); |
| |
| assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && |
| (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); |
| |
| factory.instructions = &factory_instructions; |
| } |
| |
| virtual ~lower_packing_builtins_visitor() |
| { |
| assert(factory_instructions.is_empty()); |
| } |
| |
| bool get_progress() { return progress; } |
| |
| void handle_rvalue(ir_rvalue **rvalue) |
| { |
| if (!*rvalue) |
| return; |
| |
| ir_expression *expr = (*rvalue)->as_expression(); |
| if (!expr) |
| return; |
| |
| enum lower_packing_builtins_op lowering_op = |
| choose_lowering_op(expr->operation); |
| |
| if (lowering_op == LOWER_PACK_UNPACK_NONE) |
| return; |
| |
| setup_factory(ralloc_parent(expr)); |
| |
| ir_rvalue *op0 = expr->operands[0]; |
| ralloc_steal(factory.mem_ctx, op0); |
| |
| switch (lowering_op) { |
| case LOWER_PACK_SNORM_2x16: |
| *rvalue = lower_pack_snorm_2x16(op0); |
| break; |
| case LOWER_PACK_SNORM_4x8: |
| *rvalue = lower_pack_snorm_4x8(op0); |
| break; |
| case LOWER_PACK_UNORM_2x16: |
| *rvalue = lower_pack_unorm_2x16(op0); |
| break; |
| case LOWER_PACK_UNORM_4x8: |
| *rvalue = lower_pack_unorm_4x8(op0); |
| break; |
| case LOWER_PACK_HALF_2x16: |
| *rvalue = lower_pack_half_2x16(op0); |
| break; |
| case LOWER_PACK_HALF_2x16_TO_SPLIT: |
| *rvalue = split_pack_half_2x16(op0); |
| break; |
| case LOWER_UNPACK_SNORM_2x16: |
| *rvalue = lower_unpack_snorm_2x16(op0); |
| break; |
| case LOWER_UNPACK_SNORM_4x8: |
| *rvalue = lower_unpack_snorm_4x8(op0); |
| break; |
| case LOWER_UNPACK_UNORM_2x16: |
| *rvalue = lower_unpack_unorm_2x16(op0); |
| break; |
| case LOWER_UNPACK_UNORM_4x8: |
| *rvalue = lower_unpack_unorm_4x8(op0); |
| break; |
| case LOWER_UNPACK_HALF_2x16: |
| *rvalue = lower_unpack_half_2x16(op0); |
| break; |
| case LOWER_UNPACK_HALF_2x16_TO_SPLIT: |
| *rvalue = split_unpack_half_2x16(op0); |
| break; |
| case LOWER_PACK_UNPACK_NONE: |
| case LOWER_PACK_USE_BFI: |
| case LOWER_PACK_USE_BFE: |
| assert(!"not reached"); |
| break; |
| } |
| |
| teardown_factory(); |
| progress = true; |
| } |
| |
| private: |
| const int op_mask; |
| bool progress; |
| ir_factory factory; |
| exec_list factory_instructions; |
| |
| /** |
| * Determine the needed lowering operation by filtering \a expr_op |
| * through \ref op_mask. |
| */ |
| enum lower_packing_builtins_op |
| choose_lowering_op(ir_expression_operation expr_op) |
| { |
| /* C++ regards int and enum as fundamentally different types. |
| * So, we can't simply return from each case; we must cast the return |
| * value. |
| */ |
| int result; |
| |
| switch (expr_op) { |
| case ir_unop_pack_snorm_2x16: |
| result = op_mask & LOWER_PACK_SNORM_2x16; |
| break; |
| case ir_unop_pack_snorm_4x8: |
| result = op_mask & LOWER_PACK_SNORM_4x8; |
| break; |
| case ir_unop_pack_unorm_2x16: |
| result = op_mask & LOWER_PACK_UNORM_2x16; |
| break; |
| case ir_unop_pack_unorm_4x8: |
| result = op_mask & LOWER_PACK_UNORM_4x8; |
| break; |
| case ir_unop_pack_half_2x16: |
| result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); |
| break; |
| case ir_unop_unpack_snorm_2x16: |
| result = op_mask & LOWER_UNPACK_SNORM_2x16; |
| break; |
| case ir_unop_unpack_snorm_4x8: |
| result = op_mask & LOWER_UNPACK_SNORM_4x8; |
| break; |
| case ir_unop_unpack_unorm_2x16: |
| result = op_mask & LOWER_UNPACK_UNORM_2x16; |
| break; |
| case ir_unop_unpack_unorm_4x8: |
| result = op_mask & LOWER_UNPACK_UNORM_4x8; |
| break; |
| case ir_unop_unpack_half_2x16: |
| result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); |
| break; |
| default: |
| result = LOWER_PACK_UNPACK_NONE; |
| break; |
| } |
| |
| return static_cast<enum lower_packing_builtins_op>(result); |
| } |
| |
| void |
| setup_factory(void *mem_ctx) |
| { |
| assert(factory.mem_ctx == NULL); |
| assert(factory.instructions->is_empty()); |
| |
| factory.mem_ctx = mem_ctx; |
| } |
| |
| void |
| teardown_factory() |
| { |
| base_ir->insert_before(factory.instructions); |
| assert(factory.instructions->is_empty()); |
| factory.mem_ctx = NULL; |
| } |
| |
| template <typename T> |
| ir_constant* |
| constant(T x) |
| { |
| return factory.constant(x); |
| } |
| |
| /** |
| * \brief Pack two uint16's into a single uint32. |
| * |
| * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 |
| * where the least significant bits specify the first element of the pair. |
| * Return the uint32. |
| */ |
| ir_rvalue* |
| pack_uvec2_to_uint(ir_rvalue *uvec2_rval) |
| { |
| assert(uvec2_rval->type == glsl_type::uvec2_type); |
| |
| /* uvec2 u = UVEC2_RVAL; */ |
| ir_variable *u = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_pack_uvec2_to_uint"); |
| factory.emit(assign(u, uvec2_rval)); |
| |
| if (op_mask & LOWER_PACK_USE_BFI) { |
| return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), |
| swizzle_y(u), |
| constant(16), |
| constant(16)); |
| } |
| |
| /* return (u.y << 16) | (u.x & 0xffff); */ |
| return bit_or(lshift(swizzle_y(u), constant(16u)), |
| bit_and(swizzle_x(u), constant(0xffffu))); |
| } |
| |
| /** |
| * \brief Pack four uint8's into a single uint32. |
| * |
| * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a |
| * uint32 where the least significant bits specify the first element of the |
| * 4-tuple. Return the uint32. |
| */ |
| ir_rvalue* |
| pack_uvec4_to_uint(ir_rvalue *uvec4_rval) |
| { |
| assert(uvec4_rval->type == glsl_type::uvec4_type); |
| |
| ir_variable *u = factory.make_temp(glsl_type::uvec4_type, |
| "tmp_pack_uvec4_to_uint"); |
| |
| if (op_mask & LOWER_PACK_USE_BFI) { |
| /* uvec4 u = UVEC4_RVAL; */ |
| factory.emit(assign(u, uvec4_rval)); |
| |
| return bitfield_insert(bitfield_insert( |
| bitfield_insert( |
| bit_and(swizzle_x(u), constant(0xffu)), |
| swizzle_y(u), constant(8), constant(8)), |
| swizzle_z(u), constant(16), constant(8)), |
| swizzle_w(u), constant(24), constant(8)); |
| } |
| |
| /* uvec4 u = UVEC4_RVAL & 0xff */ |
| factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); |
| |
| /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ |
| return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), |
| lshift(swizzle_z(u), constant(16u))), |
| bit_or(lshift(swizzle_y(u), constant(8u)), |
| swizzle_x(u))); |
| } |
| |
| /** |
| * \brief Unpack a uint32 into two uint16's. |
| * |
| * Interpret the given uint32 as a uint16 pair where the uint32's least |
| * significant bits specify the pair's first element. Return the uint16 |
| * pair as a uvec2. |
| */ |
| ir_rvalue* |
| unpack_uint_to_uvec2(ir_rvalue *uint_rval) |
| { |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| /* uint u = UINT_RVAL; */ |
| ir_variable *u = factory.make_temp(glsl_type::uint_type, |
| "tmp_unpack_uint_to_uvec2_u"); |
| factory.emit(assign(u, uint_rval)); |
| |
| /* uvec2 u2; */ |
| ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_unpack_uint_to_uvec2_u2"); |
| |
| /* u2.x = u & 0xffffu; */ |
| factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); |
| |
| /* u2.y = u >> 16u; */ |
| factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); |
| |
| return deref(u2).val; |
| } |
| |
| /** |
| * \brief Unpack a uint32 into two int16's. |
| * |
| * Specifically each 16-bit value is sign-extended to the full width of an |
| * int32 on return. |
| */ |
| ir_rvalue * |
| unpack_uint_to_ivec2(ir_rvalue *uint_rval) |
| { |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| if (!(op_mask & LOWER_PACK_USE_BFE)) { |
| return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), |
| constant(16u)), |
| constant(16u)); |
| } |
| |
| ir_variable *i = factory.make_temp(glsl_type::int_type, |
| "tmp_unpack_uint_to_ivec2_i"); |
| factory.emit(assign(i, u2i(uint_rval))); |
| |
| /* ivec2 i2; */ |
| ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, |
| "tmp_unpack_uint_to_ivec2_i2"); |
| |
| factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), |
| WRITEMASK_X)); |
| factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), |
| WRITEMASK_Y)); |
| |
| return deref(i2).val; |
| } |
| |
| /** |
| * \brief Unpack a uint32 into four uint8's. |
| * |
| * Interpret the given uint32 as a uint8 4-tuple where the uint32's least |
| * significant bits specify the 4-tuple's first element. Return the uint8 |
| * 4-tuple as a uvec4. |
| */ |
| ir_rvalue* |
| unpack_uint_to_uvec4(ir_rvalue *uint_rval) |
| { |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| /* uint u = UINT_RVAL; */ |
| ir_variable *u = factory.make_temp(glsl_type::uint_type, |
| "tmp_unpack_uint_to_uvec4_u"); |
| factory.emit(assign(u, uint_rval)); |
| |
| /* uvec4 u4; */ |
| ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, |
| "tmp_unpack_uint_to_uvec4_u4"); |
| |
| /* u4.x = u & 0xffu; */ |
| factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); |
| |
| if (op_mask & LOWER_PACK_USE_BFE) { |
| /* u4.y = bitfield_extract(u, 8, 8); */ |
| factory.emit(assign(u4, bitfield_extract(u, constant(8), constant(8)), |
| WRITEMASK_Y)); |
| |
| /* u4.z = bitfield_extract(u, 16, 8); */ |
| factory.emit(assign(u4, bitfield_extract(u, constant(16), constant(8)), |
| WRITEMASK_Z)); |
| } else { |
| /* u4.y = (u >> 8u) & 0xffu; */ |
| factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), |
| constant(0xffu)), WRITEMASK_Y)); |
| |
| /* u4.z = (u >> 16u) & 0xffu; */ |
| factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), |
| constant(0xffu)), WRITEMASK_Z)); |
| } |
| |
| /* u4.w = (u >> 24u) */ |
| factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); |
| |
| return deref(u4).val; |
| } |
| |
| /** |
| * \brief Unpack a uint32 into four int8's. |
| * |
| * Specifically each 8-bit value is sign-extended to the full width of an |
| * int32 on return. |
| */ |
| ir_rvalue * |
| unpack_uint_to_ivec4(ir_rvalue *uint_rval) |
| { |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| if (!(op_mask & LOWER_PACK_USE_BFE)) { |
| return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), |
| constant(24u)), |
| constant(24u)); |
| } |
| |
| ir_variable *i = factory.make_temp(glsl_type::int_type, |
| "tmp_unpack_uint_to_ivec4_i"); |
| factory.emit(assign(i, u2i(uint_rval))); |
| |
| /* ivec4 i4; */ |
| ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, |
| "tmp_unpack_uint_to_ivec4_i4"); |
| |
| factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), |
| WRITEMASK_X)); |
| factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), |
| WRITEMASK_Y)); |
| factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), |
| WRITEMASK_Z)); |
| factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), |
| WRITEMASK_W)); |
| |
| return deref(i4).val; |
| } |
| |
| /** |
| * \brief Lower a packSnorm2x16 expression. |
| * |
| * \param vec2_rval is packSnorm2x16's input |
| * \return packSnorm2x16's output as a uint rvalue |
| */ |
| ir_rvalue* |
| lower_pack_snorm_2x16(ir_rvalue *vec2_rval) |
| { |
| /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: |
| * |
| * highp uint packSnorm2x16(vec2 v) |
| * -------------------------------- |
| * First, converts each component of the normalized floating-point value |
| * v into 16-bit integer values. Then, the results are packed into the |
| * returned 32-bit unsigned integer. |
| * |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) |
| * |
| * The first component of the vector will be written to the least |
| * significant bits of the output; the last component will be written to |
| * the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return pack_uvec2_to_uint( |
| * uvec2(ivec2( |
| * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); |
| * |
| * It is necessary to first convert the vec2 to ivec2 rather than directly |
| * converting vec2 to uvec2 because the latter conversion is undefined. |
| * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to |
| * convert a negative floating point value to an uint". |
| */ |
| assert(vec2_rval->type == glsl_type::vec2_type); |
| |
| ir_rvalue *result = pack_uvec2_to_uint( |
| i2u(f2i(round_even(mul(clamp(vec2_rval, |
| constant(-1.0f), |
| constant(1.0f)), |
| constant(32767.0f)))))); |
| |
| assert(result->type == glsl_type::uint_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower a packSnorm4x8 expression. |
| * |
| * \param vec4_rval is packSnorm4x8's input |
| * \return packSnorm4x8's output as a uint rvalue |
| */ |
| ir_rvalue* |
| lower_pack_snorm_4x8(ir_rvalue *vec4_rval) |
| { |
| /* From page 137 (143 of pdf) of the GLSL 4.30 spec: |
| * |
| * highp uint packSnorm4x8(vec4 v) |
| * ------------------------------- |
| * First, converts each component of the normalized floating-point value |
| * v into 8-bit integer values. Then, the results are packed into the |
| * returned 32-bit unsigned integer. |
| * |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) |
| * |
| * The first component of the vector will be written to the least |
| * significant bits of the output; the last component will be written to |
| * the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return pack_uvec4_to_uint( |
| * uvec4(ivec4( |
| * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); |
| * |
| * It is necessary to first convert the vec4 to ivec4 rather than directly |
| * converting vec4 to uvec4 because the latter conversion is undefined. |
| * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to |
| * convert a negative floating point value to an uint". |
| */ |
| assert(vec4_rval->type == glsl_type::vec4_type); |
| |
| ir_rvalue *result = pack_uvec4_to_uint( |
| i2u(f2i(round_even(mul(clamp(vec4_rval, |
| constant(-1.0f), |
| constant(1.0f)), |
| constant(127.0f)))))); |
| |
| assert(result->type == glsl_type::uint_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower an unpackSnorm2x16 expression. |
| * |
| * \param uint_rval is unpackSnorm2x16's input |
| * \return unpackSnorm2x16's output as a vec2 rvalue |
| */ |
| ir_rvalue* |
| lower_unpack_snorm_2x16(ir_rvalue *uint_rval) |
| { |
| /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: |
| * |
| * highp vec2 unpackSnorm2x16 (highp uint p) |
| * ----------------------------------------- |
| * First, unpacks a single 32-bit unsigned integer p into a pair of |
| * 16-bit unsigned integers. Then, each component is converted to |
| * a normalized floating-point value to generate the returned |
| * two-component vector. |
| * |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) |
| * |
| * The first component of the returned vector will be extracted from the |
| * least significant bits of the input; the last component will be |
| * extracted from the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return clamp( |
| * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, |
| * -1.0f, 1.0f); |
| * |
| * The above IR may appear unnecessarily complex, but the intermediate |
| * conversion to ivec2 and the bit shifts are necessary to correctly unpack |
| * negative floats. |
| * |
| * To see why, consider packing and then unpacking vec2(-1.0, 0.0). |
| * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we |
| * place that int16 into an int32, which results in the *positive* integer |
| * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather |
| * unimportant bit 16. We must now extend the int16's sign bit into bits |
| * 17-32, which is accomplished by left-shifting then right-shifting. |
| */ |
| |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| ir_rvalue *result = |
| clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), |
| constant(32767.0f)), |
| constant(-1.0f), |
| constant(1.0f)); |
| |
| assert(result->type == glsl_type::vec2_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower an unpackSnorm4x8 expression. |
| * |
| * \param uint_rval is unpackSnorm4x8's input |
| * \return unpackSnorm4x8's output as a vec4 rvalue |
| */ |
| ir_rvalue* |
| lower_unpack_snorm_4x8(ir_rvalue *uint_rval) |
| { |
| /* From page 137 (143 of pdf) of the GLSL 4.30 spec: |
| * |
| * highp vec4 unpackSnorm4x8 (highp uint p) |
| * ---------------------------------------- |
| * First, unpacks a single 32-bit unsigned integer p into four |
| * 8-bit unsigned integers. Then, each component is converted to |
| * a normalized floating-point value to generate the returned |
| * four-component vector. |
| * |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackSnorm4x8: clamp(f / 127.0, -1, +1) |
| * |
| * The first component of the returned vector will be extracted from the |
| * least significant bits of the input; the last component will be |
| * extracted from the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return clamp( |
| * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, |
| * -1.0f, 1.0f); |
| * |
| * The above IR may appear unnecessarily complex, but the intermediate |
| * conversion to ivec4 and the bit shifts are necessary to correctly unpack |
| * negative floats. |
| * |
| * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, |
| * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we |
| * place that int8 into an int32, which results in the *positive* integer |
| * 0x000000ff. The int8's sign bit becomes, in the int32, the rather |
| * unimportant bit 8. We must now extend the int8's sign bit into bits |
| * 9-32, which is accomplished by left-shifting then right-shifting. |
| */ |
| |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| ir_rvalue *result = |
| clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), |
| constant(127.0f)), |
| constant(-1.0f), |
| constant(1.0f)); |
| |
| assert(result->type == glsl_type::vec4_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower a packUnorm2x16 expression. |
| * |
| * \param vec2_rval is packUnorm2x16's input |
| * \return packUnorm2x16's output as a uint rvalue |
| */ |
| ir_rvalue* |
| lower_pack_unorm_2x16(ir_rvalue *vec2_rval) |
| { |
| /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: |
| * |
| * highp uint packUnorm2x16 (vec2 v) |
| * --------------------------------- |
| * First, converts each component of the normalized floating-point value |
| * v into 16-bit integer values. Then, the results are packed into the |
| * returned 32-bit unsigned integer. |
| * |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) |
| * |
| * The first component of the vector will be written to the least |
| * significant bits of the output; the last component will be written to |
| * the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return pack_uvec2_to_uint(uvec2( |
| * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); |
| * |
| * Here it is safe to directly convert the vec2 to uvec2 because the vec2 |
| * has been clamped to a non-negative range. |
| */ |
| |
| assert(vec2_rval->type == glsl_type::vec2_type); |
| |
| ir_rvalue *result = pack_uvec2_to_uint( |
| f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); |
| |
| assert(result->type == glsl_type::uint_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower a packUnorm4x8 expression. |
| * |
| * \param vec4_rval is packUnorm4x8's input |
| * \return packUnorm4x8's output as a uint rvalue |
| */ |
| ir_rvalue* |
| lower_pack_unorm_4x8(ir_rvalue *vec4_rval) |
| { |
| /* From page 137 (143 of pdf) of the GLSL 4.30 spec: |
| * |
| * highp uint packUnorm4x8 (vec4 v) |
| * -------------------------------- |
| * First, converts each component of the normalized floating-point value |
| * v into 8-bit integer values. Then, the results are packed into the |
| * returned 32-bit unsigned integer. |
| * |
| * The conversion for component c of v to fixed point is done as |
| * follows: |
| * |
| * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) |
| * |
| * The first component of the vector will be written to the least |
| * significant bits of the output; the last component will be written to |
| * the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return pack_uvec4_to_uint(uvec4( |
| * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); |
| * |
| * Here it is safe to directly convert the vec4 to uvec4 because the vec4 |
| * has been clamped to a non-negative range. |
| */ |
| |
| assert(vec4_rval->type == glsl_type::vec4_type); |
| |
| ir_rvalue *result = pack_uvec4_to_uint( |
| f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); |
| |
| assert(result->type == glsl_type::uint_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower an unpackUnorm2x16 expression. |
| * |
| * \param uint_rval is unpackUnorm2x16's input |
| * \return unpackUnorm2x16's output as a vec2 rvalue |
| */ |
| ir_rvalue* |
| lower_unpack_unorm_2x16(ir_rvalue *uint_rval) |
| { |
| /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: |
| * |
| * highp vec2 unpackUnorm2x16 (highp uint p) |
| * ----------------------------------------- |
| * First, unpacks a single 32-bit unsigned integer p into a pair of |
| * 16-bit unsigned integers. Then, each component is converted to |
| * a normalized floating-point value to generate the returned |
| * two-component vector. |
| * |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackUnorm2x16: f / 65535.0 |
| * |
| * The first component of the returned vector will be extracted from the |
| * least significant bits of the input; the last component will be |
| * extracted from the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; |
| */ |
| |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), |
| constant(65535.0f)); |
| |
| assert(result->type == glsl_type::vec2_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower an unpackUnorm4x8 expression. |
| * |
| * \param uint_rval is unpackUnorm4x8's input |
| * \return unpackUnorm4x8's output as a vec4 rvalue |
| */ |
| ir_rvalue* |
| lower_unpack_unorm_4x8(ir_rvalue *uint_rval) |
| { |
| /* From page 137 (143 of pdf) of the GLSL 4.30 spec: |
| * |
| * highp vec4 unpackUnorm4x8 (highp uint p) |
| * ---------------------------------------- |
| * First, unpacks a single 32-bit unsigned integer p into four |
| * 8-bit unsigned integers. Then, each component is converted to |
| * a normalized floating-point value to generate the returned |
| * two-component vector. |
| * |
| * The conversion for unpacked fixed-point value f to floating point is |
| * done as follows: |
| * |
| * unpackUnorm4x8: f / 255.0 |
| * |
| * The first component of the returned vector will be extracted from the |
| * least significant bits of the input; the last component will be |
| * extracted from the most significant bits. |
| * |
| * This function generates IR that approximates the following pseudo-GLSL: |
| * |
| * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; |
| */ |
| |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), |
| constant(255.0f)); |
| |
| assert(result->type == glsl_type::vec4_type); |
| return result; |
| } |
| |
| /** |
| * \brief Lower the component-wise calculation of packHalf2x16. |
| * |
| * \param f_rval is one component of packHafl2x16's input |
| * \param e_rval is the unshifted exponent bits of f_rval |
| * \param m_rval is the unshifted mantissa bits of f_rval |
| * |
| * \return a uint rvalue that encodes a float16 in its lower 16 bits |
| */ |
| ir_rvalue* |
| pack_half_1x16_nosign(ir_rvalue *f_rval, |
| ir_rvalue *e_rval, |
| ir_rvalue *m_rval) |
| { |
| assert(e_rval->type == glsl_type::uint_type); |
| assert(m_rval->type == glsl_type::uint_type); |
| |
| /* uint u16; */ |
| ir_variable *u16 = factory.make_temp(glsl_type::uint_type, |
| "tmp_pack_half_1x16_u16"); |
| |
| /* float f = FLOAT_RVAL; */ |
| ir_variable *f = factory.make_temp(glsl_type::float_type, |
| "tmp_pack_half_1x16_f"); |
| factory.emit(assign(f, f_rval)); |
| |
| /* uint e = E_RVAL; */ |
| ir_variable *e = factory.make_temp(glsl_type::uint_type, |
| "tmp_pack_half_1x16_e"); |
| factory.emit(assign(e, e_rval)); |
| |
| /* uint m = M_RVAL; */ |
| ir_variable *m = factory.make_temp(glsl_type::uint_type, |
| "tmp_pack_half_1x16_m"); |
| factory.emit(assign(m, m_rval)); |
| |
| /* Preliminaries |
| * ------------- |
| * |
| * For a float16, the bit layout is: |
| * |
| * sign: 15 |
| * exponent: 10:14 |
| * mantissa: 0:9 |
| * |
| * Let f16 be a float16 value. The sign, exponent, and mantissa |
| * determine its value thus: |
| * |
| * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) |
| * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) |
| * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) |
| * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) |
| * if e16 = 31 and m16 != 0, then NaN (5) |
| * |
| * where 0 <= m16 < 2^10. |
| * |
| * For a float32, the bit layout is: |
| * |
| * sign: 31 |
| * exponent: 23:30 |
| * mantissa: 0:22 |
| * |
| * Let f32 be a float32 value. The sign, exponent, and mantissa |
| * determine its value thus: |
| * |
| * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) |
| * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) |
| * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) |
| * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) |
| * if e32 = 255 and m32 != 0, then NaN (14) |
| * |
| * where 0 <= m32 < 2^23. |
| * |
| * The minimum and maximum normal float16 values are |
| * |
| * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) |
| * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) |
| * |
| * The step at max_norm16 is |
| * |
| * max_step16 = 2^5 (22) |
| * |
| * Observe that the float16 boundary values in equations 20-21 lie in the |
| * range of normal float32 values. |
| * |
| * |
| * Rounding Behavior |
| * ----------------- |
| * Not all float32 values can be exactly represented as a float16. We |
| * round all such intermediate float32 values to the nearest float16; if |
| * the float32 is exactly between to float16 values, we round to the one |
| * with an even mantissa. This rounding behavior has several benefits: |
| * |
| * - It has no sign bias. |
| * |
| * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's |
| * GPU ISA. |
| * |
| * - By reproducing the behavior of the GPU (at least on Intel hardware), |
| * compile-time evaluation of constant packHalf2x16 GLSL expressions will |
| * result in the same value as if the expression were executed on the |
| * GPU. |
| * |
| * Calculation |
| * ----------- |
| * Our task is to compute s16, e16, m16 given f32. Since this function |
| * ignores the sign bit, assume that s32 = s16 = 0. There are several |
| * cases consider. |
| */ |
| |
| factory.emit( |
| |
| /* Case 1) f32 is NaN |
| * |
| * The resultant f16 will also be NaN. |
| */ |
| |
| /* if (e32 == 255 && m32 != 0) { */ |
| if_tree(logic_and(equal(e, constant(0xffu << 23u)), |
| logic_not(equal(m, constant(0u)))), |
| |
| assign(u16, constant(0x7fffu)), |
| |
| /* Case 2) f32 lies in the range [0, min_norm16). |
| * |
| * The resultant float16 will be either zero, subnormal, or normal. |
| * |
| * Solving |
| * |
| * f32 = min_norm16 (30) |
| * |
| * gives |
| * |
| * e32 = 113 and m32 = 0 (31) |
| * |
| * Therefore this case occurs if and only if |
| * |
| * e32 < 113 (32) |
| */ |
| |
| /* } else if (e32 < 113) { */ |
| if_tree(less(e, constant(113u << 23u)), |
| |
| /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ |
| assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), |
| constant((float) (1 << 24)))))), |
| |
| /* Case 3) f32 lies in the range |
| * [min_norm16, max_norm16 + max_step16). |
| * |
| * The resultant float16 will be either normal or infinite. |
| * |
| * Solving |
| * |
| * f32 = max_norm16 + max_step16 (40) |
| * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) |
| * = 2^16 (42) |
| * gives |
| * |
| * e32 = 143 and m32 = 0 (43) |
| * |
| * We already solved the boundary condition f32 = min_norm16 above |
| * in equation 31. Therefore this case occurs if and only if |
| * |
| * 113 <= e32 and e32 < 143 |
| */ |
| |
| /* } else if (e32 < 143) { */ |
| if_tree(less(e, constant(143u << 23u)), |
| |
| /* The addition below handles the case where the mantissa rounds |
| * up to 1024 and bumps the exponent. |
| * |
| * u16 = ((e - (112u << 23u)) >> 13u) |
| * + round_to_even((float(m) / (1u << 13u)); |
| */ |
| assign(u16, add(rshift(sub(e, constant(112u << 23u)), |
| constant(13u)), |
| f2u(round_even( |
| div(u2f(m), constant((float) (1 << 13))))))), |
| |
| /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. |
| * |
| * The resultant float16 will be infinite. |
| * |
| * The cases above caught all float32 values in the range |
| * [0, max_norm16 + max_step16), so this is the fall-through case. |
| */ |
| |
| /* } else { */ |
| |
| assign(u16, constant(31u << 10u)))))); |
| |
| /* } */ |
| |
| return deref(u16).val; |
| } |
| |
| /** |
| * \brief Lower a packHalf2x16 expression. |
| * |
| * \param vec2_rval is packHalf2x16's input |
| * \return packHalf2x16's output as a uint rvalue |
| */ |
| ir_rvalue* |
| lower_pack_half_2x16(ir_rvalue *vec2_rval) |
| { |
| /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: |
| * |
| * highp uint packHalf2x16 (mediump vec2 v) |
| * ---------------------------------------- |
| * Returns an unsigned integer obtained by converting the components of |
| * a two-component floating-point vector to the 16-bit floating-point |
| * representation found in the OpenGL ES Specification, and then packing |
| * these two 16-bit integers into a 32-bit unsigned integer. |
| * |
| * The first vector component specifies the 16 least- significant bits |
| * of the result; the second component specifies the 16 most-significant |
| * bits. |
| */ |
| |
| assert(vec2_rval->type == glsl_type::vec2_type); |
| |
| /* vec2 f = VEC2_RVAL; */ |
| ir_variable *f = factory.make_temp(glsl_type::vec2_type, |
| "tmp_pack_half_2x16_f"); |
| factory.emit(assign(f, vec2_rval)); |
| |
| /* uvec2 f32 = bitcast_f2u(f); */ |
| ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_pack_half_2x16_f32"); |
| factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); |
| |
| /* uvec2 f16; */ |
| ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_pack_half_2x16_f16"); |
| |
| /* Get f32's unshifted exponent bits. |
| * |
| * uvec2 e = f32 & 0x7f800000u; |
| */ |
| ir_variable *e = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_pack_half_2x16_e"); |
| factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); |
| |
| /* Get f32's unshifted mantissa bits. |
| * |
| * uvec2 m = f32 & 0x007fffffu; |
| */ |
| ir_variable *m = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_pack_half_2x16_m"); |
| factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); |
| |
| /* Set f16's exponent and mantissa bits. |
| * |
| * f16.x = pack_half_1x16_nosign(e.x, m.x); |
| * f16.y = pack_half_1y16_nosign(e.y, m.y); |
| */ |
| factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), |
| swizzle_x(e), |
| swizzle_x(m)), |
| WRITEMASK_X)); |
| factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), |
| swizzle_y(e), |
| swizzle_y(m)), |
| WRITEMASK_Y)); |
| |
| /* Set f16's sign bits. |
| * |
| * f16 |= (f32 & (1u << 31u) >> 16u; |
| */ |
| factory.emit( |
| assign(f16, bit_or(f16, |
| rshift(bit_and(f32, constant(1u << 31u)), |
| constant(16u))))); |
| |
| |
| /* return (f16.y << 16u) | f16.x; */ |
| ir_rvalue *result = bit_or(lshift(swizzle_y(f16), |
| constant(16u)), |
| swizzle_x(f16)); |
| |
| assert(result->type == glsl_type::uint_type); |
| return result; |
| } |
| |
| /** |
| * \brief Split packHalf2x16's vec2 operand into two floats. |
| * |
| * \param vec2_rval is packHalf2x16's input |
| * \return a uint rvalue |
| * |
| * Some code generators, such as the i965 fragment shader, require that all |
| * vector expressions be lowered to a sequence of scalar expressions. |
| * However, packHalf2x16 cannot be scalarized by the same mechanism as |
| * a true vector operation because its input and output have a differing |
| * number of vector components. |
| * |
| * This method scalarizes packHalf2x16 by transforming it from an unary |
| * operation having vector input to a binary operation having scalar input. |
| * That is, it transforms |
| * |
| * packHalf2x16(VEC2_RVAL); |
| * |
| * into |
| * |
| * vec2 v = VEC2_RVAL; |
| * return packHalf2x16_split(v.x, v.y); |
| */ |
| ir_rvalue* |
| split_pack_half_2x16(ir_rvalue *vec2_rval) |
| { |
| assert(vec2_rval->type == glsl_type::vec2_type); |
| |
| ir_variable *v = factory.make_temp(glsl_type::vec2_type, |
| "tmp_split_pack_half_2x16_v"); |
| factory.emit(assign(v, vec2_rval)); |
| |
| return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); |
| } |
| |
| /** |
| * \brief Lower the component-wise calculation of unpackHalf2x16. |
| * |
| * Given a uint that encodes a float16 in its lower 16 bits, this function |
| * returns a uint that encodes a float32 with the same value. The sign bit |
| * of the float16 is ignored. |
| * |
| * \param e_rval is the unshifted exponent bits of a float16 |
| * \param m_rval is the unshifted mantissa bits of a float16 |
| * \param a uint rvalue that encodes a float32 |
| */ |
| ir_rvalue* |
| unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) |
| { |
| assert(e_rval->type == glsl_type::uint_type); |
| assert(m_rval->type == glsl_type::uint_type); |
| |
| /* uint u32; */ |
| ir_variable *u32 = factory.make_temp(glsl_type::uint_type, |
| "tmp_unpack_half_1x16_u32"); |
| |
| /* uint e = E_RVAL; */ |
| ir_variable *e = factory.make_temp(glsl_type::uint_type, |
| "tmp_unpack_half_1x16_e"); |
| factory.emit(assign(e, e_rval)); |
| |
| /* uint m = M_RVAL; */ |
| ir_variable *m = factory.make_temp(glsl_type::uint_type, |
| "tmp_unpack_half_1x16_m"); |
| factory.emit(assign(m, m_rval)); |
| |
| /* Preliminaries |
| * ------------- |
| * |
| * For a float16, the bit layout is: |
| * |
| * sign: 15 |
| * exponent: 10:14 |
| * mantissa: 0:9 |
| * |
| * Let f16 be a float16 value. The sign, exponent, and mantissa |
| * determine its value thus: |
| * |
| * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) |
| * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) |
| * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) |
| * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) |
| * if e16 = 31 and m16 != 0, then NaN (5) |
| * |
| * where 0 <= m16 < 2^10. |
| * |
| * For a float32, the bit layout is: |
| * |
| * sign: 31 |
| * exponent: 23:30 |
| * mantissa: 0:22 |
| * |
| * Let f32 be a float32 value. The sign, exponent, and mantissa |
| * determine its value thus: |
| * |
| * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) |
| * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) |
| * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) |
| * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) |
| * if e32 = 255 and m32 != 0, then NaN (14) |
| * |
| * where 0 <= m32 < 2^23. |
| * |
| * Calculation |
| * ----------- |
| * Our task is to compute s32, e32, m32 given f16. Since this function |
| * ignores the sign bit, assume that s32 = s16 = 0. There are several |
| * cases consider. |
| */ |
| |
| factory.emit( |
| |
| /* Case 1) f16 is zero or subnormal. |
| * |
| * The simplest method of calcuating f32 in this case is |
| * |
| * f32 = f16 (20) |
| * = 2^(-14) * (m16 / 2^10) (21) |
| * = m16 / 2^(-24) (22) |
| */ |
| |
| /* if (e16 == 0) { */ |
| if_tree(equal(e, constant(0u)), |
| |
| /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ |
| assign(u32, expr(ir_unop_bitcast_f2u, |
| div(u2f(m), constant((float)(1 << 24))))), |
| |
| /* Case 2) f16 is normal. |
| * |
| * The equation |
| * |
| * f32 = f16 (30) |
| * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) |
| * 2^(e16 - 15) * (1 + m16 / 2^10) |
| * |
| * can be decomposed into two |
| * |
| * 2^(e32 - 127) = 2^(e16 - 15) (32) |
| * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) |
| * |
| * which solve to |
| * |
| * e32 = e16 + 112 (34) |
| * m32 = m16 * 2^13 (35) |
| */ |
| |
| /* } else if (e16 < 31)) { */ |
| if_tree(less(e, constant(31u << 10u)), |
| |
| /* u32 = ((e + (112 << 10)) | m) << 13; |
| */ |
| assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), |
| constant(13u))), |
| |
| |
| /* Case 3) f16 is infinite. */ |
| if_tree(equal(m, constant(0u)), |
| |
| assign(u32, constant(255u << 23u)), |
| |
| /* Case 4) f16 is NaN. */ |
| /* } else { */ |
| |
| assign(u32, constant(0x7fffffffu)))))); |
| |
| /* } */ |
| |
| return deref(u32).val; |
| } |
| |
| /** |
| * \brief Lower an unpackHalf2x16 expression. |
| * |
| * \param uint_rval is unpackHalf2x16's input |
| * \return unpackHalf2x16's output as a vec2 rvalue |
| */ |
| ir_rvalue* |
| lower_unpack_half_2x16(ir_rvalue *uint_rval) |
| { |
| /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: |
| * |
| * mediump vec2 unpackHalf2x16 (highp uint v) |
| * ------------------------------------------ |
| * Returns a two-component floating-point vector with components |
| * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit |
| * values, interpreting those values as 16-bit floating-point numbers |
| * according to the OpenGL ES Specification, and converting them to |
| * 32-bit floating-point values. |
| * |
| * The first component of the vector is obtained from the |
| * 16 least-significant bits of v; the second component is obtained |
| * from the 16 most-significant bits of v. |
| */ |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| /* uint u = RVALUE; |
| * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); |
| */ |
| ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_unpack_half_2x16_f16"); |
| factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); |
| |
| /* uvec2 f32; */ |
| ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_unpack_half_2x16_f32"); |
| |
| /* Get f16's unshifted exponent bits. |
| * |
| * uvec2 e = f16 & 0x7c00u; |
| */ |
| ir_variable *e = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_unpack_half_2x16_e"); |
| factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); |
| |
| /* Get f16's unshifted mantissa bits. |
| * |
| * uvec2 m = f16 & 0x03ffu; |
| */ |
| ir_variable *m = factory.make_temp(glsl_type::uvec2_type, |
| "tmp_unpack_half_2x16_m"); |
| factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); |
| |
| /* Set f32's exponent and mantissa bits. |
| * |
| * f32.x = unpack_half_1x16_nosign(e.x, m.x); |
| * f32.y = unpack_half_1x16_nosign(e.y, m.y); |
| */ |
| factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), |
| swizzle_x(m)), |
| WRITEMASK_X)); |
| factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), |
| swizzle_y(m)), |
| WRITEMASK_Y)); |
| |
| /* Set f32's sign bit. |
| * |
| * f32 |= (f16 & 0x8000u) << 16u; |
| */ |
| factory.emit(assign(f32, bit_or(f32, |
| lshift(bit_and(f16, |
| constant(0x8000u)), |
| constant(16u))))); |
| |
| /* return bitcast_u2f(f32); */ |
| ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); |
| assert(result->type == glsl_type::vec2_type); |
| return result; |
| } |
| |
| /** |
| * \brief Split unpackHalf2x16 into two operations. |
| * |
| * \param uint_rval is unpackHalf2x16's input |
| * \return a vec2 rvalue |
| * |
| * Some code generators, such as the i965 fragment shader, require that all |
| * vector expressions be lowered to a sequence of scalar expressions. |
| * However, unpackHalf2x16 cannot be scalarized by the same method as |
| * a true vector operation because the number of components of its input |
| * and output differ. |
| * |
| * This method scalarizes unpackHalf2x16 by transforming it from a single |
| * operation having vec2 output to a pair of operations each having float |
| * output. That is, it transforms |
| * |
| * unpackHalf2x16(UINT_RVAL) |
| * |
| * into |
| * |
| * uint u = UINT_RVAL; |
| * vec2 v; |
| * |
| * v.x = unpackHalf2x16_split_x(u); |
| * v.y = unpackHalf2x16_split_y(u); |
| * |
| * return v; |
| */ |
| ir_rvalue* |
| split_unpack_half_2x16(ir_rvalue *uint_rval) |
| { |
| assert(uint_rval->type == glsl_type::uint_type); |
| |
| /* uint u = uint_rval; */ |
| ir_variable *u = factory.make_temp(glsl_type::uint_type, |
| "tmp_split_unpack_half_2x16_u"); |
| factory.emit(assign(u, uint_rval)); |
| |
| /* vec2 v; */ |
| ir_variable *v = factory.make_temp(glsl_type::vec2_type, |
| "tmp_split_unpack_half_2x16_v"); |
| |
| /* v.x = unpack_half_2x16_split_x(u); */ |
| factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), |
| WRITEMASK_X)); |
| |
| /* v.y = unpack_half_2x16_split_y(u); */ |
| factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), |
| WRITEMASK_Y)); |
| |
| return deref(v).val; |
| } |
| }; |
| |
| } // namespace anonymous |
| |
| /** |
| * \brief Lower the builtin packing functions. |
| * |
| * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. |
| */ |
| bool |
| lower_packing_builtins(exec_list *instructions, int op_mask) |
| { |
| lower_packing_builtins_visitor v(op_mask); |
| visit_list_elements(&v, instructions, true); |
| return v.get_progress(); |
| } |