| /* |
| * Copyright © 2010 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "brw_cfg.h" |
| #include "brw_eu.h" |
| #include "brw_fs.h" |
| #include "brw_nir.h" |
| #include "brw_vec4_tes.h" |
| #include "common/gen_debug.h" |
| #include "main/uniforms.h" |
| #include "util/macros.h" |
| |
| enum brw_reg_type |
| brw_type_for_base_type(const struct glsl_type *type) |
| { |
| switch (type->base_type) { |
| case GLSL_TYPE_FLOAT16: |
| return BRW_REGISTER_TYPE_HF; |
| case GLSL_TYPE_FLOAT: |
| return BRW_REGISTER_TYPE_F; |
| case GLSL_TYPE_INT: |
| case GLSL_TYPE_BOOL: |
| case GLSL_TYPE_SUBROUTINE: |
| return BRW_REGISTER_TYPE_D; |
| case GLSL_TYPE_INT16: |
| return BRW_REGISTER_TYPE_W; |
| case GLSL_TYPE_UINT: |
| return BRW_REGISTER_TYPE_UD; |
| case GLSL_TYPE_UINT16: |
| return BRW_REGISTER_TYPE_UW; |
| case GLSL_TYPE_ARRAY: |
| return brw_type_for_base_type(type->fields.array); |
| case GLSL_TYPE_STRUCT: |
| case GLSL_TYPE_SAMPLER: |
| case GLSL_TYPE_ATOMIC_UINT: |
| /* These should be overridden with the type of the member when |
| * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely |
| * way to trip up if we don't. |
| */ |
| return BRW_REGISTER_TYPE_UD; |
| case GLSL_TYPE_IMAGE: |
| return BRW_REGISTER_TYPE_UD; |
| case GLSL_TYPE_DOUBLE: |
| return BRW_REGISTER_TYPE_DF; |
| case GLSL_TYPE_UINT64: |
| return BRW_REGISTER_TYPE_UQ; |
| case GLSL_TYPE_INT64: |
| return BRW_REGISTER_TYPE_Q; |
| case GLSL_TYPE_VOID: |
| case GLSL_TYPE_ERROR: |
| case GLSL_TYPE_INTERFACE: |
| case GLSL_TYPE_FUNCTION: |
| unreachable("not reached"); |
| } |
| |
| return BRW_REGISTER_TYPE_F; |
| } |
| |
| enum brw_conditional_mod |
| brw_conditional_for_comparison(unsigned int op) |
| { |
| switch (op) { |
| case ir_binop_less: |
| return BRW_CONDITIONAL_L; |
| case ir_binop_gequal: |
| return BRW_CONDITIONAL_GE; |
| case ir_binop_equal: |
| case ir_binop_all_equal: /* same as equal for scalars */ |
| return BRW_CONDITIONAL_Z; |
| case ir_binop_nequal: |
| case ir_binop_any_nequal: /* same as nequal for scalars */ |
| return BRW_CONDITIONAL_NZ; |
| default: |
| unreachable("not reached: bad operation for comparison"); |
| } |
| } |
| |
| uint32_t |
| brw_math_function(enum opcode op) |
| { |
| switch (op) { |
| case SHADER_OPCODE_RCP: |
| return BRW_MATH_FUNCTION_INV; |
| case SHADER_OPCODE_RSQ: |
| return BRW_MATH_FUNCTION_RSQ; |
| case SHADER_OPCODE_SQRT: |
| return BRW_MATH_FUNCTION_SQRT; |
| case SHADER_OPCODE_EXP2: |
| return BRW_MATH_FUNCTION_EXP; |
| case SHADER_OPCODE_LOG2: |
| return BRW_MATH_FUNCTION_LOG; |
| case SHADER_OPCODE_POW: |
| return BRW_MATH_FUNCTION_POW; |
| case SHADER_OPCODE_SIN: |
| return BRW_MATH_FUNCTION_SIN; |
| case SHADER_OPCODE_COS: |
| return BRW_MATH_FUNCTION_COS; |
| case SHADER_OPCODE_INT_QUOTIENT: |
| return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT; |
| case SHADER_OPCODE_INT_REMAINDER: |
| return BRW_MATH_FUNCTION_INT_DIV_REMAINDER; |
| default: |
| unreachable("not reached: unknown math function"); |
| } |
| } |
| |
| bool |
| brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) |
| { |
| if (!offsets) return false; /* nonconstant offset; caller will handle it. */ |
| |
| /* offset out of bounds; caller will handle it. */ |
| for (unsigned i = 0; i < num_components; i++) |
| if (offsets[i] > 7 || offsets[i] < -8) |
| return false; |
| |
| /* Combine all three offsets into a single unsigned dword: |
| * |
| * bits 11:8 - U Offset (X component) |
| * bits 7:4 - V Offset (Y component) |
| * bits 3:0 - R Offset (Z component) |
| */ |
| *offset_bits = 0; |
| for (unsigned i = 0; i < num_components; i++) { |
| const unsigned shift = 4 * (2 - i); |
| *offset_bits |= (offsets[i] << shift) & (0xF << shift); |
| } |
| return true; |
| } |
| |
| const char * |
| brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) |
| { |
| switch (op) { |
| case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: |
| /* The DO instruction doesn't exist on Gen6+, but we use it to mark the |
| * start of a loop in the IR. |
| */ |
| if (devinfo->gen >= 6 && op == BRW_OPCODE_DO) |
| return "do"; |
| |
| /* The following conversion opcodes doesn't exist on Gen8+, but we use |
| * then to mark that we want to do the conversion. |
| */ |
| if (devinfo->gen > 7 && op == BRW_OPCODE_F32TO16) |
| return "f32to16"; |
| |
| if (devinfo->gen > 7 && op == BRW_OPCODE_F16TO32) |
| return "f16to32"; |
| |
| assert(brw_opcode_desc(devinfo, op)->name); |
| return brw_opcode_desc(devinfo, op)->name; |
| case FS_OPCODE_FB_WRITE: |
| return "fb_write"; |
| case FS_OPCODE_FB_WRITE_LOGICAL: |
| return "fb_write_logical"; |
| case FS_OPCODE_REP_FB_WRITE: |
| return "rep_fb_write"; |
| case FS_OPCODE_FB_READ: |
| return "fb_read"; |
| case FS_OPCODE_FB_READ_LOGICAL: |
| return "fb_read_logical"; |
| |
| case SHADER_OPCODE_RCP: |
| return "rcp"; |
| case SHADER_OPCODE_RSQ: |
| return "rsq"; |
| case SHADER_OPCODE_SQRT: |
| return "sqrt"; |
| case SHADER_OPCODE_EXP2: |
| return "exp2"; |
| case SHADER_OPCODE_LOG2: |
| return "log2"; |
| case SHADER_OPCODE_POW: |
| return "pow"; |
| case SHADER_OPCODE_INT_QUOTIENT: |
| return "int_quot"; |
| case SHADER_OPCODE_INT_REMAINDER: |
| return "int_rem"; |
| case SHADER_OPCODE_SIN: |
| return "sin"; |
| case SHADER_OPCODE_COS: |
| return "cos"; |
| |
| case SHADER_OPCODE_TEX: |
| return "tex"; |
| case SHADER_OPCODE_TEX_LOGICAL: |
| return "tex_logical"; |
| case SHADER_OPCODE_TXD: |
| return "txd"; |
| case SHADER_OPCODE_TXD_LOGICAL: |
| return "txd_logical"; |
| case SHADER_OPCODE_TXF: |
| return "txf"; |
| case SHADER_OPCODE_TXF_LOGICAL: |
| return "txf_logical"; |
| case SHADER_OPCODE_TXF_LZ: |
| return "txf_lz"; |
| case SHADER_OPCODE_TXL: |
| return "txl"; |
| case SHADER_OPCODE_TXL_LOGICAL: |
| return "txl_logical"; |
| case SHADER_OPCODE_TXL_LZ: |
| return "txl_lz"; |
| case SHADER_OPCODE_TXS: |
| return "txs"; |
| case SHADER_OPCODE_TXS_LOGICAL: |
| return "txs_logical"; |
| case FS_OPCODE_TXB: |
| return "txb"; |
| case FS_OPCODE_TXB_LOGICAL: |
| return "txb_logical"; |
| case SHADER_OPCODE_TXF_CMS: |
| return "txf_cms"; |
| case SHADER_OPCODE_TXF_CMS_LOGICAL: |
| return "txf_cms_logical"; |
| case SHADER_OPCODE_TXF_CMS_W: |
| return "txf_cms_w"; |
| case SHADER_OPCODE_TXF_CMS_W_LOGICAL: |
| return "txf_cms_w_logical"; |
| case SHADER_OPCODE_TXF_UMS: |
| return "txf_ums"; |
| case SHADER_OPCODE_TXF_UMS_LOGICAL: |
| return "txf_ums_logical"; |
| case SHADER_OPCODE_TXF_MCS: |
| return "txf_mcs"; |
| case SHADER_OPCODE_TXF_MCS_LOGICAL: |
| return "txf_mcs_logical"; |
| case SHADER_OPCODE_LOD: |
| return "lod"; |
| case SHADER_OPCODE_LOD_LOGICAL: |
| return "lod_logical"; |
| case SHADER_OPCODE_TG4: |
| return "tg4"; |
| case SHADER_OPCODE_TG4_LOGICAL: |
| return "tg4_logical"; |
| case SHADER_OPCODE_TG4_OFFSET: |
| return "tg4_offset"; |
| case SHADER_OPCODE_TG4_OFFSET_LOGICAL: |
| return "tg4_offset_logical"; |
| case SHADER_OPCODE_SAMPLEINFO: |
| return "sampleinfo"; |
| case SHADER_OPCODE_SAMPLEINFO_LOGICAL: |
| return "sampleinfo_logical"; |
| |
| case SHADER_OPCODE_SHADER_TIME_ADD: |
| return "shader_time_add"; |
| |
| case SHADER_OPCODE_UNTYPED_ATOMIC: |
| return "untyped_atomic"; |
| case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: |
| return "untyped_atomic_logical"; |
| case SHADER_OPCODE_UNTYPED_SURFACE_READ: |
| return "untyped_surface_read"; |
| case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: |
| return "untyped_surface_read_logical"; |
| case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: |
| return "untyped_surface_write"; |
| case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: |
| return "untyped_surface_write_logical"; |
| case SHADER_OPCODE_TYPED_ATOMIC: |
| return "typed_atomic"; |
| case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: |
| return "typed_atomic_logical"; |
| case SHADER_OPCODE_TYPED_SURFACE_READ: |
| return "typed_surface_read"; |
| case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: |
| return "typed_surface_read_logical"; |
| case SHADER_OPCODE_TYPED_SURFACE_WRITE: |
| return "typed_surface_write"; |
| case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: |
| return "typed_surface_write_logical"; |
| case SHADER_OPCODE_MEMORY_FENCE: |
| return "memory_fence"; |
| |
| case SHADER_OPCODE_BYTE_SCATTERED_READ: |
| return "byte_scattered_read"; |
| case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: |
| return "byte_scattered_read_logical"; |
| case SHADER_OPCODE_BYTE_SCATTERED_WRITE: |
| return "byte_scattered_write"; |
| case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: |
| return "byte_scattered_write_logical"; |
| |
| case SHADER_OPCODE_LOAD_PAYLOAD: |
| return "load_payload"; |
| case FS_OPCODE_PACK: |
| return "pack"; |
| |
| case SHADER_OPCODE_GEN4_SCRATCH_READ: |
| return "gen4_scratch_read"; |
| case SHADER_OPCODE_GEN4_SCRATCH_WRITE: |
| return "gen4_scratch_write"; |
| case SHADER_OPCODE_GEN7_SCRATCH_READ: |
| return "gen7_scratch_read"; |
| case SHADER_OPCODE_URB_WRITE_SIMD8: |
| return "gen8_urb_write_simd8"; |
| case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: |
| return "gen8_urb_write_simd8_per_slot"; |
| case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: |
| return "gen8_urb_write_simd8_masked"; |
| case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: |
| return "gen8_urb_write_simd8_masked_per_slot"; |
| case SHADER_OPCODE_URB_READ_SIMD8: |
| return "urb_read_simd8"; |
| case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: |
| return "urb_read_simd8_per_slot"; |
| |
| case SHADER_OPCODE_FIND_LIVE_CHANNEL: |
| return "find_live_channel"; |
| case SHADER_OPCODE_BROADCAST: |
| return "broadcast"; |
| case SHADER_OPCODE_SHUFFLE: |
| return "shuffle"; |
| case SHADER_OPCODE_SEL_EXEC: |
| return "sel_exec"; |
| case SHADER_OPCODE_CLUSTER_BROADCAST: |
| return "cluster_broadcast"; |
| |
| case SHADER_OPCODE_GET_BUFFER_SIZE: |
| return "get_buffer_size"; |
| |
| case VEC4_OPCODE_MOV_BYTES: |
| return "mov_bytes"; |
| case VEC4_OPCODE_PACK_BYTES: |
| return "pack_bytes"; |
| case VEC4_OPCODE_UNPACK_UNIFORM: |
| return "unpack_uniform"; |
| case VEC4_OPCODE_DOUBLE_TO_F32: |
| return "double_to_f32"; |
| case VEC4_OPCODE_DOUBLE_TO_D32: |
| return "double_to_d32"; |
| case VEC4_OPCODE_DOUBLE_TO_U32: |
| return "double_to_u32"; |
| case VEC4_OPCODE_TO_DOUBLE: |
| return "single_to_double"; |
| case VEC4_OPCODE_PICK_LOW_32BIT: |
| return "pick_low_32bit"; |
| case VEC4_OPCODE_PICK_HIGH_32BIT: |
| return "pick_high_32bit"; |
| case VEC4_OPCODE_SET_LOW_32BIT: |
| return "set_low_32bit"; |
| case VEC4_OPCODE_SET_HIGH_32BIT: |
| return "set_high_32bit"; |
| |
| case FS_OPCODE_DDX_COARSE: |
| return "ddx_coarse"; |
| case FS_OPCODE_DDX_FINE: |
| return "ddx_fine"; |
| case FS_OPCODE_DDY_COARSE: |
| return "ddy_coarse"; |
| case FS_OPCODE_DDY_FINE: |
| return "ddy_fine"; |
| |
| case FS_OPCODE_CINTERP: |
| return "cinterp"; |
| case FS_OPCODE_LINTERP: |
| return "linterp"; |
| |
| case FS_OPCODE_PIXEL_X: |
| return "pixel_x"; |
| case FS_OPCODE_PIXEL_Y: |
| return "pixel_y"; |
| |
| case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: |
| return "uniform_pull_const"; |
| case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: |
| return "uniform_pull_const_gen7"; |
| case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: |
| return "varying_pull_const_gen4"; |
| case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: |
| return "varying_pull_const_gen7"; |
| case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: |
| return "varying_pull_const_logical"; |
| |
| case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: |
| return "mov_dispatch_to_flags"; |
| case FS_OPCODE_DISCARD_JUMP: |
| return "discard_jump"; |
| |
| case FS_OPCODE_SET_SAMPLE_ID: |
| return "set_sample_id"; |
| |
| case FS_OPCODE_PACK_HALF_2x16_SPLIT: |
| return "pack_half_2x16_split"; |
| case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: |
| return "unpack_half_2x16_split_x"; |
| case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: |
| return "unpack_half_2x16_split_y"; |
| |
| case FS_OPCODE_PLACEHOLDER_HALT: |
| return "placeholder_halt"; |
| |
| case FS_OPCODE_INTERPOLATE_AT_SAMPLE: |
| return "interp_sample"; |
| case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: |
| return "interp_shared_offset"; |
| case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: |
| return "interp_per_slot_offset"; |
| |
| case VS_OPCODE_URB_WRITE: |
| return "vs_urb_write"; |
| case VS_OPCODE_PULL_CONSTANT_LOAD: |
| return "pull_constant_load"; |
| case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7: |
| return "pull_constant_load_gen7"; |
| |
| case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9: |
| return "set_simd4x2_header_gen9"; |
| |
| case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: |
| return "unpack_flags_simd4x2"; |
| |
| case GS_OPCODE_URB_WRITE: |
| return "gs_urb_write"; |
| case GS_OPCODE_URB_WRITE_ALLOCATE: |
| return "gs_urb_write_allocate"; |
| case GS_OPCODE_THREAD_END: |
| return "gs_thread_end"; |
| case GS_OPCODE_SET_WRITE_OFFSET: |
| return "set_write_offset"; |
| case GS_OPCODE_SET_VERTEX_COUNT: |
| return "set_vertex_count"; |
| case GS_OPCODE_SET_DWORD_2: |
| return "set_dword_2"; |
| case GS_OPCODE_PREPARE_CHANNEL_MASKS: |
| return "prepare_channel_masks"; |
| case GS_OPCODE_SET_CHANNEL_MASKS: |
| return "set_channel_masks"; |
| case GS_OPCODE_GET_INSTANCE_ID: |
| return "get_instance_id"; |
| case GS_OPCODE_FF_SYNC: |
| return "ff_sync"; |
| case GS_OPCODE_SET_PRIMITIVE_ID: |
| return "set_primitive_id"; |
| case GS_OPCODE_SVB_WRITE: |
| return "gs_svb_write"; |
| case GS_OPCODE_SVB_SET_DST_INDEX: |
| return "gs_svb_set_dst_index"; |
| case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: |
| return "gs_ff_sync_set_primitives"; |
| case CS_OPCODE_CS_TERMINATE: |
| return "cs_terminate"; |
| case SHADER_OPCODE_BARRIER: |
| return "barrier"; |
| case SHADER_OPCODE_MULH: |
| return "mulh"; |
| case SHADER_OPCODE_MOV_INDIRECT: |
| return "mov_indirect"; |
| |
| case VEC4_OPCODE_URB_READ: |
| return "urb_read"; |
| case TCS_OPCODE_GET_INSTANCE_ID: |
| return "tcs_get_instance_id"; |
| case TCS_OPCODE_URB_WRITE: |
| return "tcs_urb_write"; |
| case TCS_OPCODE_SET_INPUT_URB_OFFSETS: |
| return "tcs_set_input_urb_offsets"; |
| case TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: |
| return "tcs_set_output_urb_offsets"; |
| case TCS_OPCODE_GET_PRIMITIVE_ID: |
| return "tcs_get_primitive_id"; |
| case TCS_OPCODE_CREATE_BARRIER_HEADER: |
| return "tcs_create_barrier_header"; |
| case TCS_OPCODE_SRC0_010_IS_ZERO: |
| return "tcs_src0<0,1,0>_is_zero"; |
| case TCS_OPCODE_RELEASE_INPUT: |
| return "tcs_release_input"; |
| case TCS_OPCODE_THREAD_END: |
| return "tcs_thread_end"; |
| case TES_OPCODE_CREATE_INPUT_READ_HEADER: |
| return "tes_create_input_read_header"; |
| case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: |
| return "tes_add_indirect_urb_offset"; |
| case TES_OPCODE_GET_PRIMITIVE_ID: |
| return "tes_get_primitive_id"; |
| |
| case SHADER_OPCODE_RND_MODE: |
| return "rnd_mode"; |
| } |
| |
| unreachable("not reached"); |
| } |
| |
| bool |
| brw_saturate_immediate(enum brw_reg_type type, struct brw_reg *reg) |
| { |
| union { |
| unsigned ud; |
| int d; |
| float f; |
| double df; |
| } imm, sat_imm = { 0 }; |
| |
| const unsigned size = type_sz(type); |
| |
| /* We want to either do a 32-bit or 64-bit data copy, the type is otherwise |
| * irrelevant, so just check the size of the type and copy from/to an |
| * appropriately sized field. |
| */ |
| if (size < 8) |
| imm.ud = reg->ud; |
| else |
| imm.df = reg->df; |
| |
| switch (type) { |
| case BRW_REGISTER_TYPE_UD: |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UW: |
| case BRW_REGISTER_TYPE_W: |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| /* Nothing to do. */ |
| return false; |
| case BRW_REGISTER_TYPE_F: |
| sat_imm.f = CLAMP(imm.f, 0.0f, 1.0f); |
| break; |
| case BRW_REGISTER_TYPE_DF: |
| sat_imm.df = CLAMP(imm.df, 0.0, 1.0); |
| break; |
| case BRW_REGISTER_TYPE_UB: |
| case BRW_REGISTER_TYPE_B: |
| unreachable("no UB/B immediates"); |
| case BRW_REGISTER_TYPE_V: |
| case BRW_REGISTER_TYPE_UV: |
| case BRW_REGISTER_TYPE_VF: |
| unreachable("unimplemented: saturate vector immediate"); |
| case BRW_REGISTER_TYPE_HF: |
| unreachable("unimplemented: saturate HF immediate"); |
| case BRW_REGISTER_TYPE_NF: |
| unreachable("no NF immediates"); |
| } |
| |
| if (size < 8) { |
| if (imm.ud != sat_imm.ud) { |
| reg->ud = sat_imm.ud; |
| return true; |
| } |
| } else { |
| if (imm.df != sat_imm.df) { |
| reg->df = sat_imm.df; |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool |
| brw_negate_immediate(enum brw_reg_type type, struct brw_reg *reg) |
| { |
| switch (type) { |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UD: |
| reg->d = -reg->d; |
| return true; |
| case BRW_REGISTER_TYPE_W: |
| case BRW_REGISTER_TYPE_UW: |
| reg->d = -(int16_t)reg->ud; |
| return true; |
| case BRW_REGISTER_TYPE_F: |
| reg->f = -reg->f; |
| return true; |
| case BRW_REGISTER_TYPE_VF: |
| reg->ud ^= 0x80808080; |
| return true; |
| case BRW_REGISTER_TYPE_DF: |
| reg->df = -reg->df; |
| return true; |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| reg->d64 = -reg->d64; |
| return true; |
| case BRW_REGISTER_TYPE_UB: |
| case BRW_REGISTER_TYPE_B: |
| unreachable("no UB/B immediates"); |
| case BRW_REGISTER_TYPE_UV: |
| case BRW_REGISTER_TYPE_V: |
| assert(!"unimplemented: negate UV/V immediate"); |
| case BRW_REGISTER_TYPE_HF: |
| assert(!"unimplemented: negate HF immediate"); |
| case BRW_REGISTER_TYPE_NF: |
| unreachable("no NF immediates"); |
| } |
| |
| return false; |
| } |
| |
| bool |
| brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) |
| { |
| switch (type) { |
| case BRW_REGISTER_TYPE_D: |
| reg->d = abs(reg->d); |
| return true; |
| case BRW_REGISTER_TYPE_W: |
| reg->d = abs((int16_t)reg->ud); |
| return true; |
| case BRW_REGISTER_TYPE_F: |
| reg->f = fabsf(reg->f); |
| return true; |
| case BRW_REGISTER_TYPE_DF: |
| reg->df = fabs(reg->df); |
| return true; |
| case BRW_REGISTER_TYPE_VF: |
| reg->ud &= ~0x80808080; |
| return true; |
| case BRW_REGISTER_TYPE_Q: |
| reg->d64 = imaxabs(reg->d64); |
| return true; |
| case BRW_REGISTER_TYPE_UB: |
| case BRW_REGISTER_TYPE_B: |
| unreachable("no UB/B immediates"); |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_UD: |
| case BRW_REGISTER_TYPE_UW: |
| case BRW_REGISTER_TYPE_UV: |
| /* Presumably the absolute value modifier on an unsigned source is a |
| * nop, but it would be nice to confirm. |
| */ |
| assert(!"unimplemented: abs unsigned immediate"); |
| case BRW_REGISTER_TYPE_V: |
| assert(!"unimplemented: abs V immediate"); |
| case BRW_REGISTER_TYPE_HF: |
| assert(!"unimplemented: abs HF immediate"); |
| case BRW_REGISTER_TYPE_NF: |
| unreachable("no NF immediates"); |
| } |
| |
| return false; |
| } |
| |
| backend_shader::backend_shader(const struct brw_compiler *compiler, |
| void *log_data, |
| void *mem_ctx, |
| const nir_shader *shader, |
| struct brw_stage_prog_data *stage_prog_data) |
| : compiler(compiler), |
| log_data(log_data), |
| devinfo(compiler->devinfo), |
| nir(shader), |
| stage_prog_data(stage_prog_data), |
| mem_ctx(mem_ctx), |
| cfg(NULL), |
| stage(shader->info.stage) |
| { |
| debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); |
| stage_name = _mesa_shader_stage_to_string(stage); |
| stage_abbrev = _mesa_shader_stage_to_abbrev(stage); |
| } |
| |
| backend_shader::~backend_shader() |
| { |
| } |
| |
| bool |
| backend_reg::equals(const backend_reg &r) const |
| { |
| return brw_regs_equal(this, &r) && offset == r.offset; |
| } |
| |
| bool |
| backend_reg::is_zero() const |
| { |
| if (file != IMM) |
| return false; |
| |
| switch (type) { |
| case BRW_REGISTER_TYPE_F: |
| return f == 0; |
| case BRW_REGISTER_TYPE_DF: |
| return df == 0; |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UD: |
| return d == 0; |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| return u64 == 0; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_reg::is_one() const |
| { |
| if (file != IMM) |
| return false; |
| |
| switch (type) { |
| case BRW_REGISTER_TYPE_F: |
| return f == 1.0f; |
| case BRW_REGISTER_TYPE_DF: |
| return df == 1.0; |
| case BRW_REGISTER_TYPE_D: |
| case BRW_REGISTER_TYPE_UD: |
| return d == 1; |
| case BRW_REGISTER_TYPE_UQ: |
| case BRW_REGISTER_TYPE_Q: |
| return u64 == 1; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_reg::is_negative_one() const |
| { |
| if (file != IMM) |
| return false; |
| |
| switch (type) { |
| case BRW_REGISTER_TYPE_F: |
| return f == -1.0; |
| case BRW_REGISTER_TYPE_DF: |
| return df == -1.0; |
| case BRW_REGISTER_TYPE_D: |
| return d == -1; |
| case BRW_REGISTER_TYPE_Q: |
| return d64 == -1; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_reg::is_null() const |
| { |
| return file == ARF && nr == BRW_ARF_NULL; |
| } |
| |
| |
| bool |
| backend_reg::is_accumulator() const |
| { |
| return file == ARF && nr == BRW_ARF_ACCUMULATOR; |
| } |
| |
| bool |
| backend_instruction::is_commutative() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_AND: |
| case BRW_OPCODE_OR: |
| case BRW_OPCODE_XOR: |
| case BRW_OPCODE_ADD: |
| case BRW_OPCODE_MUL: |
| case SHADER_OPCODE_MULH: |
| return true; |
| case BRW_OPCODE_SEL: |
| /* MIN and MAX are commutative. */ |
| if (conditional_mod == BRW_CONDITIONAL_GE || |
| conditional_mod == BRW_CONDITIONAL_L) { |
| return true; |
| } |
| /* fallthrough */ |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_instruction::is_3src(const struct gen_device_info *devinfo) const |
| { |
| return ::is_3src(devinfo, opcode); |
| } |
| |
| bool |
| backend_instruction::is_tex() const |
| { |
| return (opcode == SHADER_OPCODE_TEX || |
| opcode == FS_OPCODE_TXB || |
| opcode == SHADER_OPCODE_TXD || |
| opcode == SHADER_OPCODE_TXF || |
| opcode == SHADER_OPCODE_TXF_LZ || |
| opcode == SHADER_OPCODE_TXF_CMS || |
| opcode == SHADER_OPCODE_TXF_CMS_W || |
| opcode == SHADER_OPCODE_TXF_UMS || |
| opcode == SHADER_OPCODE_TXF_MCS || |
| opcode == SHADER_OPCODE_TXL || |
| opcode == SHADER_OPCODE_TXL_LZ || |
| opcode == SHADER_OPCODE_TXS || |
| opcode == SHADER_OPCODE_LOD || |
| opcode == SHADER_OPCODE_TG4 || |
| opcode == SHADER_OPCODE_TG4_OFFSET || |
| opcode == SHADER_OPCODE_SAMPLEINFO); |
| } |
| |
| bool |
| backend_instruction::is_math() const |
| { |
| return (opcode == SHADER_OPCODE_RCP || |
| opcode == SHADER_OPCODE_RSQ || |
| opcode == SHADER_OPCODE_SQRT || |
| opcode == SHADER_OPCODE_EXP2 || |
| opcode == SHADER_OPCODE_LOG2 || |
| opcode == SHADER_OPCODE_SIN || |
| opcode == SHADER_OPCODE_COS || |
| opcode == SHADER_OPCODE_INT_QUOTIENT || |
| opcode == SHADER_OPCODE_INT_REMAINDER || |
| opcode == SHADER_OPCODE_POW); |
| } |
| |
| bool |
| backend_instruction::is_control_flow() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_DO: |
| case BRW_OPCODE_WHILE: |
| case BRW_OPCODE_IF: |
| case BRW_OPCODE_ELSE: |
| case BRW_OPCODE_ENDIF: |
| case BRW_OPCODE_BREAK: |
| case BRW_OPCODE_CONTINUE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_instruction::can_do_source_mods() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_ADDC: |
| case BRW_OPCODE_BFE: |
| case BRW_OPCODE_BFI1: |
| case BRW_OPCODE_BFI2: |
| case BRW_OPCODE_BFREV: |
| case BRW_OPCODE_CBIT: |
| case BRW_OPCODE_FBH: |
| case BRW_OPCODE_FBL: |
| case BRW_OPCODE_SUBB: |
| case SHADER_OPCODE_BROADCAST: |
| case SHADER_OPCODE_CLUSTER_BROADCAST: |
| case SHADER_OPCODE_MOV_INDIRECT: |
| return false; |
| default: |
| return true; |
| } |
| } |
| |
| bool |
| backend_instruction::can_do_saturate() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_ADD: |
| case BRW_OPCODE_ASR: |
| case BRW_OPCODE_AVG: |
| case BRW_OPCODE_DP2: |
| case BRW_OPCODE_DP3: |
| case BRW_OPCODE_DP4: |
| case BRW_OPCODE_DPH: |
| case BRW_OPCODE_F16TO32: |
| case BRW_OPCODE_F32TO16: |
| case BRW_OPCODE_LINE: |
| case BRW_OPCODE_LRP: |
| case BRW_OPCODE_MAC: |
| case BRW_OPCODE_MAD: |
| case BRW_OPCODE_MATH: |
| case BRW_OPCODE_MOV: |
| case BRW_OPCODE_MUL: |
| case SHADER_OPCODE_MULH: |
| case BRW_OPCODE_PLN: |
| case BRW_OPCODE_RNDD: |
| case BRW_OPCODE_RNDE: |
| case BRW_OPCODE_RNDU: |
| case BRW_OPCODE_RNDZ: |
| case BRW_OPCODE_SEL: |
| case BRW_OPCODE_SHL: |
| case BRW_OPCODE_SHR: |
| case FS_OPCODE_LINTERP: |
| case SHADER_OPCODE_COS: |
| case SHADER_OPCODE_EXP2: |
| case SHADER_OPCODE_LOG2: |
| case SHADER_OPCODE_POW: |
| case SHADER_OPCODE_RCP: |
| case SHADER_OPCODE_RSQ: |
| case SHADER_OPCODE_SIN: |
| case SHADER_OPCODE_SQRT: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_instruction::can_do_cmod() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_ADD: |
| case BRW_OPCODE_ADDC: |
| case BRW_OPCODE_AND: |
| case BRW_OPCODE_ASR: |
| case BRW_OPCODE_AVG: |
| case BRW_OPCODE_CMP: |
| case BRW_OPCODE_CMPN: |
| case BRW_OPCODE_DP2: |
| case BRW_OPCODE_DP3: |
| case BRW_OPCODE_DP4: |
| case BRW_OPCODE_DPH: |
| case BRW_OPCODE_F16TO32: |
| case BRW_OPCODE_F32TO16: |
| case BRW_OPCODE_FRC: |
| case BRW_OPCODE_LINE: |
| case BRW_OPCODE_LRP: |
| case BRW_OPCODE_LZD: |
| case BRW_OPCODE_MAC: |
| case BRW_OPCODE_MACH: |
| case BRW_OPCODE_MAD: |
| case BRW_OPCODE_MOV: |
| case BRW_OPCODE_MUL: |
| case BRW_OPCODE_NOT: |
| case BRW_OPCODE_OR: |
| case BRW_OPCODE_PLN: |
| case BRW_OPCODE_RNDD: |
| case BRW_OPCODE_RNDE: |
| case BRW_OPCODE_RNDU: |
| case BRW_OPCODE_RNDZ: |
| case BRW_OPCODE_SAD2: |
| case BRW_OPCODE_SADA2: |
| case BRW_OPCODE_SHL: |
| case BRW_OPCODE_SHR: |
| case BRW_OPCODE_SUBB: |
| case BRW_OPCODE_XOR: |
| case FS_OPCODE_CINTERP: |
| case FS_OPCODE_LINTERP: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_instruction::reads_accumulator_implicitly() const |
| { |
| switch (opcode) { |
| case BRW_OPCODE_MAC: |
| case BRW_OPCODE_MACH: |
| case BRW_OPCODE_SADA2: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| bool |
| backend_instruction::writes_accumulator_implicitly(const struct gen_device_info *devinfo) const |
| { |
| return writes_accumulator || |
| (devinfo->gen < 6 && |
| ((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) || |
| (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP && |
| opcode != FS_OPCODE_CINTERP))); |
| } |
| |
| bool |
| backend_instruction::has_side_effects() const |
| { |
| switch (opcode) { |
| case SHADER_OPCODE_UNTYPED_ATOMIC: |
| case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: |
| case SHADER_OPCODE_GEN4_SCRATCH_WRITE: |
| case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: |
| case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: |
| case SHADER_OPCODE_BYTE_SCATTERED_WRITE: |
| case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: |
| case SHADER_OPCODE_TYPED_ATOMIC: |
| case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: |
| case SHADER_OPCODE_TYPED_SURFACE_WRITE: |
| case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: |
| case SHADER_OPCODE_MEMORY_FENCE: |
| case SHADER_OPCODE_URB_WRITE_SIMD8: |
| case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: |
| case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: |
| case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: |
| case FS_OPCODE_FB_WRITE: |
| case FS_OPCODE_FB_WRITE_LOGICAL: |
| case SHADER_OPCODE_BARRIER: |
| case TCS_OPCODE_URB_WRITE: |
| case TCS_OPCODE_RELEASE_INPUT: |
| case SHADER_OPCODE_RND_MODE: |
| return true; |
| default: |
| return eot; |
| } |
| } |
| |
| bool |
| backend_instruction::is_volatile() const |
| { |
| switch (opcode) { |
| case SHADER_OPCODE_UNTYPED_SURFACE_READ: |
| case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: |
| case SHADER_OPCODE_TYPED_SURFACE_READ: |
| case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: |
| case SHADER_OPCODE_BYTE_SCATTERED_READ: |
| case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: |
| case SHADER_OPCODE_URB_READ_SIMD8: |
| case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: |
| case VEC4_OPCODE_URB_READ: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| #ifndef NDEBUG |
| static bool |
| inst_is_in_block(const bblock_t *block, const backend_instruction *inst) |
| { |
| bool found = false; |
| foreach_inst_in_block (backend_instruction, i, block) { |
| if (inst == i) { |
| found = true; |
| } |
| } |
| return found; |
| } |
| #endif |
| |
| static void |
| adjust_later_block_ips(bblock_t *start_block, int ip_adjustment) |
| { |
| for (bblock_t *block_iter = start_block->next(); |
| block_iter; |
| block_iter = block_iter->next()) { |
| block_iter->start_ip += ip_adjustment; |
| block_iter->end_ip += ip_adjustment; |
| } |
| } |
| |
| void |
| backend_instruction::insert_after(bblock_t *block, backend_instruction *inst) |
| { |
| assert(this != inst); |
| |
| if (!this->is_head_sentinel()) |
| assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
| |
| block->end_ip++; |
| |
| adjust_later_block_ips(block, 1); |
| |
| exec_node::insert_after(inst); |
| } |
| |
| void |
| backend_instruction::insert_before(bblock_t *block, backend_instruction *inst) |
| { |
| assert(this != inst); |
| |
| if (!this->is_tail_sentinel()) |
| assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
| |
| block->end_ip++; |
| |
| adjust_later_block_ips(block, 1); |
| |
| exec_node::insert_before(inst); |
| } |
| |
| void |
| backend_instruction::insert_before(bblock_t *block, exec_list *list) |
| { |
| assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
| |
| unsigned num_inst = list->length(); |
| |
| block->end_ip += num_inst; |
| |
| adjust_later_block_ips(block, num_inst); |
| |
| exec_node::insert_before(list); |
| } |
| |
| void |
| backend_instruction::remove(bblock_t *block) |
| { |
| assert(inst_is_in_block(block, this) || !"Instruction not in block"); |
| |
| adjust_later_block_ips(block, -1); |
| |
| if (block->start_ip == block->end_ip) { |
| block->cfg->remove_block(block); |
| } else { |
| block->end_ip--; |
| } |
| |
| exec_node::remove(); |
| } |
| |
| void |
| backend_shader::dump_instructions() |
| { |
| dump_instructions(NULL); |
| } |
| |
| void |
| backend_shader::dump_instructions(const char *name) |
| { |
| FILE *file = stderr; |
| if (name && geteuid() != 0) { |
| file = fopen(name, "w"); |
| if (!file) |
| file = stderr; |
| } |
| |
| if (cfg) { |
| int ip = 0; |
| foreach_block_and_inst(block, backend_instruction, inst, cfg) { |
| if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) |
| fprintf(file, "%4d: ", ip++); |
| dump_instruction(inst, file); |
| } |
| } else { |
| int ip = 0; |
| foreach_in_list(backend_instruction, inst, &instructions) { |
| if (!unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) |
| fprintf(file, "%4d: ", ip++); |
| dump_instruction(inst, file); |
| } |
| } |
| |
| if (file != stderr) { |
| fclose(file); |
| } |
| } |
| |
| void |
| backend_shader::calculate_cfg() |
| { |
| if (this->cfg) |
| return; |
| cfg = new(mem_ctx) cfg_t(&this->instructions); |
| } |
| |
| extern "C" const unsigned * |
| brw_compile_tes(const struct brw_compiler *compiler, |
| void *log_data, |
| void *mem_ctx, |
| const struct brw_tes_prog_key *key, |
| const struct brw_vue_map *input_vue_map, |
| struct brw_tes_prog_data *prog_data, |
| const nir_shader *src_shader, |
| struct gl_program *prog, |
| int shader_time_index, |
| char **error_str) |
| { |
| const struct gen_device_info *devinfo = compiler->devinfo; |
| const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; |
| const unsigned *assembly; |
| |
| nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); |
| nir->info.inputs_read = key->inputs_read; |
| nir->info.patch_inputs_read = key->patch_inputs_read; |
| |
| nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); |
| brw_nir_lower_tes_inputs(nir, input_vue_map); |
| brw_nir_lower_vue_outputs(nir, is_scalar); |
| nir = brw_postprocess_nir(nir, compiler, is_scalar); |
| |
| brw_compute_vue_map(devinfo, &prog_data->base.vue_map, |
| nir->info.outputs_written, |
| nir->info.separate_shader); |
| |
| unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4; |
| |
| assert(output_size_bytes >= 1); |
| if (output_size_bytes > GEN7_MAX_DS_URB_ENTRY_SIZE_BYTES) { |
| if (error_str) |
| *error_str = ralloc_strdup(mem_ctx, "DS outputs exceed maximum size"); |
| return NULL; |
| } |
| |
| prog_data->base.clip_distance_mask = |
| ((1 << nir->info.clip_distance_array_size) - 1); |
| prog_data->base.cull_distance_mask = |
| ((1 << nir->info.cull_distance_array_size) - 1) << |
| nir->info.clip_distance_array_size; |
| |
| /* URB entry sizes are stored as a multiple of 64 bytes. */ |
| prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64; |
| |
| /* On Cannonlake software shall not program an allocation size that |
| * specifies a size that is a multiple of 3 64B (512-bit) cachelines. |
| */ |
| if (devinfo->gen == 10 && |
| prog_data->base.urb_entry_size % 3 == 0) |
| prog_data->base.urb_entry_size++; |
| |
| prog_data->base.urb_read_length = 0; |
| |
| STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1); |
| STATIC_ASSERT(BRW_TESS_PARTITIONING_ODD_FRACTIONAL == |
| TESS_SPACING_FRACTIONAL_ODD - 1); |
| STATIC_ASSERT(BRW_TESS_PARTITIONING_EVEN_FRACTIONAL == |
| TESS_SPACING_FRACTIONAL_EVEN - 1); |
| |
| prog_data->partitioning = |
| (enum brw_tess_partitioning) (nir->info.tess.spacing - 1); |
| |
| switch (nir->info.tess.primitive_mode) { |
| case GL_QUADS: |
| prog_data->domain = BRW_TESS_DOMAIN_QUAD; |
| break; |
| case GL_TRIANGLES: |
| prog_data->domain = BRW_TESS_DOMAIN_TRI; |
| break; |
| case GL_ISOLINES: |
| prog_data->domain = BRW_TESS_DOMAIN_ISOLINE; |
| break; |
| default: |
| unreachable("invalid domain shader primitive mode"); |
| } |
| |
| if (nir->info.tess.point_mode) { |
| prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT; |
| } else if (nir->info.tess.primitive_mode == GL_ISOLINES) { |
| prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE; |
| } else { |
| /* Hardware winding order is backwards from OpenGL */ |
| prog_data->output_topology = |
| nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW |
| : BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW; |
| } |
| |
| if (unlikely(INTEL_DEBUG & DEBUG_TES)) { |
| fprintf(stderr, "TES Input "); |
| brw_print_vue_map(stderr, input_vue_map); |
| fprintf(stderr, "TES Output "); |
| brw_print_vue_map(stderr, &prog_data->base.vue_map); |
| } |
| |
| if (is_scalar) { |
| fs_visitor v(compiler, log_data, mem_ctx, (void *) key, |
| &prog_data->base.base, NULL, nir, 8, |
| shader_time_index, input_vue_map); |
| if (!v.run_tes()) { |
| if (error_str) |
| *error_str = ralloc_strdup(mem_ctx, v.fail_msg); |
| return NULL; |
| } |
| |
| prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs; |
| prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; |
| |
| fs_generator g(compiler, log_data, mem_ctx, (void *) key, |
| &prog_data->base.base, v.promoted_constants, false, |
| MESA_SHADER_TESS_EVAL); |
| if (unlikely(INTEL_DEBUG & DEBUG_TES)) { |
| g.enable_debug(ralloc_asprintf(mem_ctx, |
| "%s tessellation evaluation shader %s", |
| nir->info.label ? nir->info.label |
| : "unnamed", |
| nir->info.name)); |
| } |
| |
| g.generate_code(v.cfg, 8); |
| |
| assembly = g.get_assembly(); |
| } else { |
| brw::vec4_tes_visitor v(compiler, log_data, key, prog_data, |
| nir, mem_ctx, shader_time_index); |
| if (!v.run()) { |
| if (error_str) |
| *error_str = ralloc_strdup(mem_ctx, v.fail_msg); |
| return NULL; |
| } |
| |
| if (unlikely(INTEL_DEBUG & DEBUG_TES)) |
| v.dump_instructions(); |
| |
| assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, |
| &prog_data->base, v.cfg); |
| } |
| |
| return assembly; |
| } |