| /* |
| * Copyright © 2014 Intel Corporation |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
| * IN THE SOFTWARE. |
| */ |
| |
| #include "main/mtypes.h" |
| #include "main/macros.h" |
| #include "main/context.h" |
| #include "main/objectlabel.h" |
| #include "main/shaderapi.h" |
| #include "main/shaderobj.h" |
| #include "main/arrayobj.h" |
| #include "main/bufferobj.h" |
| #include "main/buffers.h" |
| #include "main/blend.h" |
| #include "main/enable.h" |
| #include "main/depth.h" |
| #include "main/stencil.h" |
| #include "main/varray.h" |
| #include "main/uniforms.h" |
| #include "main/fbobject.h" |
| #include "main/framebuffer.h" |
| #include "main/renderbuffer.h" |
| #include "main/texobj.h" |
| |
| #include "main/api_validate.h" |
| #include "main/state.h" |
| |
| #include "util/format_srgb.h" |
| |
| #include "vbo/vbo_context.h" |
| |
| #include "drivers/common/meta.h" |
| |
| #include "brw_defines.h" |
| #include "brw_context.h" |
| #include "brw_draw.h" |
| #include "brw_state.h" |
| #include "intel_fbo.h" |
| #include "intel_batchbuffer.h" |
| |
| #include "brw_blorp.h" |
| #include "brw_meta_util.h" |
| |
| struct brw_fast_clear_state { |
| struct gl_buffer_object *buf_obj; |
| struct gl_vertex_array_object *array_obj; |
| struct gl_shader_program *shader_prog; |
| GLuint vao; |
| GLint color_location; |
| }; |
| |
| static bool |
| brw_fast_clear_init(struct brw_context *brw) |
| { |
| struct brw_fast_clear_state *clear; |
| struct gl_context *ctx = &brw->ctx; |
| |
| if (brw->fast_clear_state) { |
| clear = brw->fast_clear_state; |
| _mesa_BindVertexArray(clear->vao); |
| return true; |
| } |
| |
| brw->fast_clear_state = clear = malloc(sizeof *clear); |
| if (clear == NULL) |
| return false; |
| |
| memset(clear, 0, sizeof *clear); |
| _mesa_GenVertexArrays(1, &clear->vao); |
| _mesa_BindVertexArray(clear->vao); |
| |
| clear->buf_obj = ctx->Driver.NewBufferObject(ctx, 0xDEADBEEF); |
| if (clear->buf_obj == NULL) |
| return false; |
| |
| clear->array_obj = _mesa_lookup_vao(ctx, clear->vao); |
| assert(clear->array_obj != NULL); |
| |
| _mesa_update_array_format(ctx, clear->array_obj, VERT_ATTRIB_GENERIC(0), |
| 2, GL_FLOAT, GL_RGBA, GL_FALSE, GL_FALSE, GL_FALSE, |
| 0, true); |
| _mesa_bind_vertex_buffer(ctx, clear->array_obj, VERT_ATTRIB_GENERIC(0), |
| clear->buf_obj, 0, sizeof(float) * 2); |
| _mesa_enable_vertex_array_attrib(ctx, clear->array_obj, |
| VERT_ATTRIB_GENERIC(0)); |
| |
| return true; |
| } |
| |
| static void |
| brw_bind_rep_write_shader(struct brw_context *brw, float *color) |
| { |
| const char *vs_source = |
| "#extension GL_AMD_vertex_shader_layer : enable\n" |
| "#extension GL_ARB_draw_instanced : enable\n" |
| "#extension GL_ARB_explicit_attrib_location : enable\n" |
| "layout(location = 0) in vec4 position;\n" |
| "uniform int layer;\n" |
| "void main()\n" |
| "{\n" |
| "#ifdef GL_AMD_vertex_shader_layer\n" |
| " gl_Layer = gl_InstanceID;\n" |
| "#endif\n" |
| " gl_Position = position;\n" |
| "}\n"; |
| const char *fs_source = |
| "uniform vec4 color;\n" |
| "void main()\n" |
| "{\n" |
| " gl_FragColor = color;\n" |
| "}\n"; |
| |
| struct brw_fast_clear_state *clear = brw->fast_clear_state; |
| struct gl_context *ctx = &brw->ctx; |
| |
| if (clear->shader_prog) { |
| _mesa_meta_use_program(ctx, clear->shader_prog); |
| _mesa_Uniform4fv(clear->color_location, 1, color); |
| return; |
| } |
| |
| _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, |
| "meta repclear", |
| &clear->shader_prog); |
| |
| clear->color_location = |
| _mesa_program_resource_location(clear->shader_prog, GL_UNIFORM, "color"); |
| |
| _mesa_meta_use_program(ctx, clear->shader_prog); |
| _mesa_Uniform4fv(clear->color_location, 1, color); |
| } |
| |
| void |
| brw_meta_fast_clear_free(struct brw_context *brw) |
| { |
| struct brw_fast_clear_state *clear = brw->fast_clear_state; |
| GET_CURRENT_CONTEXT(old_context); |
| |
| if (clear == NULL) |
| return; |
| |
| _mesa_make_current(&brw->ctx, NULL, NULL); |
| |
| _mesa_DeleteVertexArrays(1, &clear->vao); |
| _mesa_reference_buffer_object(&brw->ctx, &clear->buf_obj, NULL); |
| _mesa_reference_shader_program(&brw->ctx, &clear->shader_prog, NULL); |
| free(clear); |
| |
| if (old_context) |
| _mesa_make_current(old_context, old_context->WinSysDrawBuffer, old_context->WinSysReadBuffer); |
| else |
| _mesa_make_current(NULL, NULL, NULL); |
| } |
| |
| struct rect { |
| unsigned x0, y0, x1, y1; |
| }; |
| |
| static void |
| brw_draw_rectlist(struct brw_context *brw, struct rect *rect, int num_instances) |
| { |
| struct gl_context *ctx = &brw->ctx; |
| struct brw_fast_clear_state *clear = brw->fast_clear_state; |
| int start = 0, count = 3; |
| struct _mesa_prim prim; |
| float verts[6]; |
| |
| verts[0] = rect->x1; |
| verts[1] = rect->y1; |
| verts[2] = rect->x0; |
| verts[3] = rect->y1; |
| verts[4] = rect->x0; |
| verts[5] = rect->y0; |
| |
| /* upload new vertex data */ |
| _mesa_buffer_data(ctx, clear->buf_obj, GL_NONE, sizeof(verts), verts, |
| GL_DYNAMIC_DRAW, __func__); |
| |
| if (ctx->NewState) |
| _mesa_update_state(ctx); |
| |
| vbo_bind_arrays(ctx); |
| |
| memset(&prim, 0, sizeof prim); |
| prim.begin = 1; |
| prim.end = 1; |
| prim.mode = BRW_PRIM_OFFSET + _3DPRIM_RECTLIST; |
| prim.num_instances = num_instances; |
| prim.start = start; |
| prim.count = count; |
| |
| /* Make sure our internal prim value doesn't clash with a valid GL value. */ |
| assert(!_mesa_is_valid_prim_mode(ctx, prim.mode)); |
| |
| brw_draw_prims(ctx, &prim, 1, NULL, |
| GL_TRUE, start, start + count - 1, |
| NULL, 0, NULL); |
| } |
| |
| void |
| brw_get_fast_clear_rect(const struct brw_context *brw, |
| const struct gl_framebuffer *fb, |
| const struct intel_mipmap_tree* mt, |
| unsigned *x0, unsigned *y0, |
| unsigned *x1, unsigned *y1) |
| { |
| unsigned int x_align, y_align; |
| unsigned int x_scaledown, y_scaledown; |
| |
| /* Only single sampled surfaces need to (and actually can) be resolved. */ |
| if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE || |
| intel_miptree_is_lossless_compressed(brw, mt)) { |
| /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render |
| * Target(s)", beneath the "Fast Color Clear" bullet (p327): |
| * |
| * Clear pass must have a clear rectangle that must follow |
| * alignment rules in terms of pixels and lines as shown in the |
| * table below. Further, the clear-rectangle height and width |
| * must be multiple of the following dimensions. If the height |
| * and width of the render target being cleared do not meet these |
| * requirements, an MCS buffer can be created such that it |
| * follows the requirement and covers the RT. |
| * |
| * The alignment size in the table that follows is related to the |
| * alignment size returned by intel_get_non_msrt_mcs_alignment(), but |
| * with X alignment multiplied by 16 and Y alignment multiplied by 32. |
| */ |
| intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align); |
| x_align *= 16; |
| |
| /* SKL+ line alignment requirement for Y-tiled are half those of the prior |
| * generations. |
| */ |
| if (brw->gen >= 9) |
| y_align *= 16; |
| else |
| y_align *= 32; |
| |
| /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render |
| * Target(s)", beneath the "Fast Color Clear" bullet (p327): |
| * |
| * In order to optimize the performance MCS buffer (when bound to |
| * 1X RT) clear similarly to MCS buffer clear for MSRT case, |
| * clear rect is required to be scaled by the following factors |
| * in the horizontal and vertical directions: |
| * |
| * The X and Y scale down factors in the table that follows are each |
| * equal to half the alignment value computed above. |
| */ |
| x_scaledown = x_align / 2; |
| y_scaledown = y_align / 2; |
| |
| /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel |
| * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color |
| * Clear of Non-MultiSampled Render Target Restrictions": |
| * |
| * Clear rectangle must be aligned to two times the number of |
| * pixels in the table shown below due to 16x16 hashing across the |
| * slice. |
| */ |
| x_align *= 2; |
| y_align *= 2; |
| } else { |
| /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render |
| * Target(s)", beneath the "MSAA Compression" bullet (p326): |
| * |
| * Clear pass for this case requires that scaled down primitive |
| * is sent down with upper left co-ordinate to coincide with |
| * actual rectangle being cleared. For MSAA, clear rectangle’s |
| * height and width need to as show in the following table in |
| * terms of (width,height) of the RT. |
| * |
| * MSAA Width of Clear Rect Height of Clear Rect |
| * 2X Ceil(1/8*width) Ceil(1/2*height) |
| * 4X Ceil(1/8*width) Ceil(1/2*height) |
| * 8X Ceil(1/2*width) Ceil(1/2*height) |
| * 16X width Ceil(1/2*height) |
| * |
| * The text "with upper left co-ordinate to coincide with actual |
| * rectangle being cleared" is a little confusing--it seems to imply |
| * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to |
| * feed the pipeline using the rectangle (x,y) to |
| * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on |
| * the number of samples. Experiments indicate that this is not |
| * quite correct; actually, what the hardware appears to do is to |
| * align whatever rectangle is sent down the pipeline to the nearest |
| * multiple of 2x2 blocks, and then scale it up by a factor of N |
| * horizontally and 2 vertically. So the resulting alignment is 4 |
| * vertically and either 4 or 16 horizontally, and the scaledown |
| * factor is 2 vertically and either 2 or 8 horizontally. |
| */ |
| switch (mt->num_samples) { |
| case 2: |
| case 4: |
| x_scaledown = 8; |
| break; |
| case 8: |
| x_scaledown = 2; |
| break; |
| case 16: |
| x_scaledown = 1; |
| break; |
| default: |
| unreachable("Unexpected sample count for fast clear"); |
| } |
| y_scaledown = 2; |
| x_align = x_scaledown * 2; |
| y_align = y_scaledown * 2; |
| } |
| |
| *x0 = fb->_Xmin; |
| *x1 = fb->_Xmax; |
| if (fb->Name != 0) { |
| *y0 = fb->_Ymin; |
| *y1 = fb->_Ymax; |
| } else { |
| *y0 = fb->Height - fb->_Ymax; |
| *y1 = fb->Height - fb->_Ymin; |
| } |
| |
| *x0 = ROUND_DOWN_TO(*x0, x_align) / x_scaledown; |
| *y0 = ROUND_DOWN_TO(*y0, y_align) / y_scaledown; |
| *x1 = ALIGN(*x1, x_align) / x_scaledown; |
| *y1 = ALIGN(*y1, y_align) / y_scaledown; |
| } |
| |
| void |
| brw_meta_get_buffer_rect(const struct gl_framebuffer *fb, |
| unsigned *x0, unsigned *y0, |
| unsigned *x1, unsigned *y1) |
| { |
| *x0 = fb->_Xmin; |
| *x1 = fb->_Xmax; |
| if (fb->Name != 0) { |
| *y0 = fb->_Ymin; |
| *y1 = fb->_Ymax; |
| } else { |
| *y0 = fb->Height - fb->_Ymax; |
| *y1 = fb->Height - fb->_Ymin; |
| } |
| } |
| |
| /** |
| * Determine if fast color clear supports the given clear color. |
| * |
| * Fast color clear can only clear to color values of 1.0 or 0.0. At the |
| * moment we only support floating point, unorm, and snorm buffers. |
| */ |
| bool |
| brw_is_color_fast_clear_compatible(struct brw_context *brw, |
| const struct intel_mipmap_tree *mt, |
| const union gl_color_union *color) |
| { |
| const struct gl_context *ctx = &brw->ctx; |
| |
| /* If we're mapping the render format to a different format than the |
| * format we use for texturing then it is a bit questionable whether it |
| * should be possible to use a fast clear. Although we only actually |
| * render using a renderable format, without the override workaround it |
| * wouldn't be possible to have a non-renderable surface in a fast clear |
| * state so the hardware probably legitimately doesn't need to support |
| * this case. At least on Gen9 this really does seem to cause problems. |
| */ |
| if (brw->gen >= 9 && |
| brw_format_for_mesa_format(mt->format) != |
| brw->render_target_format[mt->format]) |
| return false; |
| |
| /* Gen9 doesn't support fast clear on single-sampled SRGB buffers. When |
| * GL_FRAMEBUFFER_SRGB is enabled any color renderbuffers will be |
| * resolved in intel_update_state. In that case it's pointless to do a |
| * fast clear because it's very likely to be immediately resolved. |
| */ |
| if (brw->gen >= 9 && |
| mt->num_samples <= 1 && |
| ctx->Color.sRGBEnabled && |
| _mesa_get_srgb_format_linear(mt->format) != mt->format) |
| return false; |
| |
| const mesa_format format = _mesa_get_render_format(ctx, mt->format); |
| if (_mesa_is_format_integer_color(format)) { |
| if (brw->gen >= 8) { |
| perf_debug("Integer fast clear not enabled for (%s)", |
| _mesa_get_format_name(format)); |
| } |
| return false; |
| } |
| |
| for (int i = 0; i < 4; i++) { |
| if (!_mesa_format_has_color_component(format, i)) { |
| continue; |
| } |
| |
| if (brw->gen < 9 && |
| color->f[i] != 0.0f && color->f[i] != 1.0f) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /** |
| * Convert the given color to a bitfield suitable for ORing into DWORD 7 of |
| * SURFACE_STATE (DWORD 12-15 on SKL+). |
| * |
| * Returned boolean tells if the given color differs from the stored. |
| */ |
| bool |
| brw_meta_set_fast_clear_color(struct brw_context *brw, |
| struct intel_mipmap_tree *mt, |
| const union gl_color_union *color) |
| { |
| union gl_color_union override_color = *color; |
| |
| /* The sampler doesn't look at the format of the surface when the fast |
| * clear color is used so we need to implement luminance, intensity and |
| * missing components manually. |
| */ |
| switch (_mesa_get_format_base_format(mt->format)) { |
| case GL_INTENSITY: |
| override_color.ui[3] = override_color.ui[0]; |
| /* flow through */ |
| case GL_LUMINANCE: |
| case GL_LUMINANCE_ALPHA: |
| override_color.ui[1] = override_color.ui[0]; |
| override_color.ui[2] = override_color.ui[0]; |
| break; |
| default: |
| for (int i = 0; i < 3; i++) { |
| if (!_mesa_format_has_color_component(mt->format, i)) |
| override_color.ui[i] = 0; |
| } |
| break; |
| } |
| |
| if (!_mesa_format_has_color_component(mt->format, 3)) { |
| if (_mesa_is_format_integer_color(mt->format)) |
| override_color.ui[3] = 1; |
| else |
| override_color.f[3] = 1.0f; |
| } |
| |
| /* Handle linear→SRGB conversion */ |
| if (brw->ctx.Color.sRGBEnabled && |
| _mesa_get_srgb_format_linear(mt->format) != mt->format) { |
| for (int i = 0; i < 3; i++) { |
| override_color.f[i] = |
| util_format_linear_to_srgb_float(override_color.f[i]); |
| } |
| } |
| |
| bool updated; |
| if (brw->gen >= 9) { |
| updated = memcmp(&mt->gen9_fast_clear_color, &override_color, |
| sizeof(mt->gen9_fast_clear_color)); |
| mt->gen9_fast_clear_color = override_color; |
| } else { |
| const uint32_t old_color_value = mt->fast_clear_color_value; |
| |
| mt->fast_clear_color_value = 0; |
| for (int i = 0; i < 4; i++) { |
| /* Testing for non-0 works for integer and float colors */ |
| if (override_color.f[i] != 0.0f) { |
| mt->fast_clear_color_value |= |
| 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT + (3 - i)); |
| } |
| } |
| |
| updated = (old_color_value != mt->fast_clear_color_value); |
| } |
| |
| return updated; |
| } |
| |
| static const uint32_t fast_clear_color[4] = { ~0, ~0, ~0, ~0 }; |
| |
| static void |
| set_fast_clear_op(struct brw_context *brw, uint32_t op) |
| { |
| /* Set op and dirty BRW_NEW_FRAGMENT_PROGRAM to make sure we re-emit |
| * 3DSTATE_PS. |
| */ |
| brw->wm.fast_clear_op = op; |
| brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; |
| } |
| |
| static void |
| use_rectlist(struct brw_context *brw, bool enable) |
| { |
| /* Set custom state to let us use _3DPRIM_RECTLIST and the replicated |
| * rendertarget write. When we enable reclist mode, we disable the |
| * viewport transform, disable clipping, enable the rep16 write |
| * optimization and disable simd8 dispatch in the PS. |
| */ |
| brw->sf.viewport_transform_enable = !enable; |
| brw->use_rep_send = enable; |
| brw->no_simd8 = enable; |
| |
| /* Dirty state to make sure we reemit the state packages affected by the |
| * custom state. We dirty BRW_NEW_FRAGMENT_PROGRAM to emit 3DSTATE_PS for |
| * disabling simd8 dispatch, _NEW_LIGHT to emit 3DSTATE_SF for disabling |
| * the viewport transform and 3DSTATE_CLIP to disable clipping for the |
| * reclist primitive. This is a little messy - it would be nicer to |
| * BRW_NEW_FAST_CLEAR flag or so, but we're out of brw state bits. Dirty |
| * _NEW_BUFFERS to make sure we emit new SURFACE_STATE with the new fast |
| * clear color value. |
| */ |
| brw->NewGLState |= _NEW_LIGHT | _NEW_BUFFERS; |
| brw->ctx.NewDriverState |= BRW_NEW_FRAGMENT_PROGRAM; |
| } |
| |
| /** |
| * Individually fast clear each color buffer attachment. On previous gens this |
| * isn't required. The motivation for this comes from one line (which seems to |
| * be specific to SKL+). The list item is in section titled _MCS Buffer for |
| * Render Target(s)_ |
| * |
| * "Since only one RT is bound with a clear pass, only one RT can be cleared |
| * at a time. To clear multiple RTs, multiple clear passes are required." |
| * |
| * The code follows the same idea as the resolve code which creates a fake FBO |
| * to avoid interfering with too much of the GL state. |
| */ |
| static void |
| fast_clear_attachments(struct brw_context *brw, |
| struct gl_framebuffer *fb, |
| uint32_t fast_clear_buffers, |
| struct rect fast_clear_rect) |
| { |
| struct gl_context *ctx = &brw->ctx; |
| const bool srgb_enabled = ctx->Color.sRGBEnabled; |
| |
| assert(brw->gen >= 9); |
| |
| /* Make sure the GL_FRAMEBUFFER_SRGB is disabled during fast clear so that |
| * the surface state will always be uploaded with a linear buffer. SRGB |
| * buffers are not supported on Gen9 because they are not marked as |
| * losslessly compressible. This shouldn't matter for the fast clear |
| * because the color is not written to the framebuffer yet so the hardware |
| * doesn't need to do any SRGB conversion. |
| */ |
| if (srgb_enabled) |
| _mesa_set_framebuffer_srgb(ctx, GL_FALSE); |
| |
| brw_bind_rep_write_shader(brw, (float *) fast_clear_color); |
| |
| /* SKL+ also has a resolve mode for compressed render targets and thus more |
| * bits to let us select the type of resolve. For fast clear resolves, it |
| * turns out we can use the same value as pre-SKL though. |
| */ |
| set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE); |
| |
| while (fast_clear_buffers) { |
| int index = ffs(fast_clear_buffers) - 1; |
| |
| fast_clear_buffers &= ~(1 << index); |
| |
| _mesa_meta_drawbuffers_from_bitfield(1 << index); |
| |
| brw_draw_rectlist(brw, &fast_clear_rect, MAX2(1, fb->MaxNumLayers)); |
| |
| /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll |
| * resolve them eventually. |
| */ |
| struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[0]; |
| struct intel_renderbuffer *irb = intel_renderbuffer(rb); |
| irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; |
| } |
| |
| set_fast_clear_op(brw, 0); |
| |
| if (srgb_enabled) |
| _mesa_set_framebuffer_srgb(ctx, GL_TRUE); |
| } |
| |
| bool |
| brw_meta_fast_clear(struct brw_context *brw, struct gl_framebuffer *fb, |
| GLbitfield buffers, bool partial_clear) |
| { |
| struct gl_context *ctx = &brw->ctx; |
| enum { FAST_CLEAR, REP_CLEAR, PLAIN_CLEAR } clear_type; |
| GLbitfield plain_clear_buffers, meta_save, rep_clear_buffers, fast_clear_buffers; |
| struct rect fast_clear_rect, clear_rect; |
| int layers; |
| |
| fast_clear_buffers = rep_clear_buffers = plain_clear_buffers = 0; |
| |
| /* First we loop through the color draw buffers and determine which ones |
| * can be fast cleared, which ones can use the replicated write and which |
| * ones have to fall back to regular color clear. |
| */ |
| for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { |
| struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; |
| struct intel_renderbuffer *irb = intel_renderbuffer(rb); |
| int index = fb->_ColorDrawBufferIndexes[buf]; |
| |
| /* Only clear the buffers present in the provided mask */ |
| if (((1 << index) & buffers) == 0) |
| continue; |
| |
| /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported, |
| * the framebuffer can be complete with some attachments missing. In |
| * this case the _ColorDrawBuffers pointer will be NULL. |
| */ |
| if (rb == NULL) |
| continue; |
| |
| clear_type = FAST_CLEAR; |
| |
| /* We don't have fast clear until gen7. */ |
| if (brw->gen < 7) |
| clear_type = REP_CLEAR; |
| |
| if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_NO_MCS) |
| clear_type = REP_CLEAR; |
| |
| /* We can't do scissored fast clears because of the restrictions on the |
| * fast clear rectangle size. |
| */ |
| if (partial_clear) |
| clear_type = REP_CLEAR; |
| |
| /* Fast clear is only supported for colors where all components are |
| * either 0 or 1. |
| */ |
| if (!brw_is_color_fast_clear_compatible(brw, irb->mt, |
| &ctx->Color.ClearColor)) |
| clear_type = REP_CLEAR; |
| |
| /* From the SNB PRM (Vol4_Part1): |
| * |
| * "Replicated data (Message Type = 111) is only supported when |
| * accessing tiled memory. Using this Message Type to access |
| * linear (untiled) memory is UNDEFINED." |
| */ |
| if (irb->mt->tiling == I915_TILING_NONE) { |
| perf_debug("Falling back to plain clear because %dx%d buffer is untiled\n", |
| irb->mt->logical_width0, irb->mt->logical_height0); |
| clear_type = PLAIN_CLEAR; |
| } |
| |
| /* Constant color writes ignore everything in blend and color calculator |
| * state. This is not documented. |
| */ |
| GLubyte *color_mask = ctx->Color.ColorMask[buf]; |
| for (int i = 0; i < 4; i++) { |
| if (_mesa_format_has_color_component(irb->mt->format, i) && |
| !(i == 3 && irb->Base.Base._BaseFormat == GL_RGB) && |
| !color_mask[i]) { |
| perf_debug("Falling back to plain clear on %dx%d buffer because of color mask\n", |
| irb->mt->logical_width0, irb->mt->logical_height0); |
| clear_type = PLAIN_CLEAR; |
| } |
| } |
| |
| /* Allocate the MCS for non MSRT surfaces now if we're doing a fast |
| * clear and we don't have the MCS yet. On failure, fall back to |
| * replicated clear. |
| */ |
| if (clear_type == FAST_CLEAR && irb->mt->mcs_mt == NULL) |
| if (!intel_miptree_alloc_non_msrt_mcs(brw, irb->mt)) |
| clear_type = REP_CLEAR; |
| |
| switch (clear_type) { |
| case FAST_CLEAR: |
| brw_meta_set_fast_clear_color(brw, irb->mt, &ctx->Color.ClearColor); |
| irb->need_downsample = true; |
| |
| /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the |
| * clear is redundant and can be skipped. Only skip after we've |
| * updated the fast clear color above though. |
| */ |
| if (irb->mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_CLEAR) |
| continue; |
| |
| /* Set fast_clear_state to RESOLVED so we don't try resolve them when |
| * we draw, in case the mt is also bound as a texture. |
| */ |
| irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; |
| irb->need_downsample = true; |
| fast_clear_buffers |= 1 << index; |
| brw_get_fast_clear_rect(brw, fb, irb->mt, |
| &fast_clear_rect.x0, &fast_clear_rect.y0, |
| &fast_clear_rect.x1, &fast_clear_rect.y1); |
| break; |
| |
| case REP_CLEAR: |
| rep_clear_buffers |= 1 << index; |
| brw_meta_get_buffer_rect(fb, |
| &clear_rect.x0, &clear_rect.y0, |
| &clear_rect.x1, &clear_rect.y1); |
| break; |
| |
| case PLAIN_CLEAR: |
| plain_clear_buffers |= 1 << index; |
| brw_meta_get_buffer_rect(fb, |
| &clear_rect.x0, &clear_rect.y0, |
| &clear_rect.x1, &clear_rect.y1); |
| continue; |
| } |
| } |
| |
| assert((fast_clear_buffers & rep_clear_buffers) == 0); |
| |
| if (!(fast_clear_buffers | rep_clear_buffers)) { |
| if (plain_clear_buffers) |
| /* If we only have plain clears, skip the meta save/restore. */ |
| goto out; |
| else |
| /* Nothing left to do. This happens when we hit the redundant fast |
| * clear case above and nothing else. |
| */ |
| return true; |
| } |
| |
| meta_save = |
| MESA_META_ALPHA_TEST | |
| MESA_META_BLEND | |
| MESA_META_DEPTH_TEST | |
| MESA_META_RASTERIZATION | |
| MESA_META_SHADER | |
| MESA_META_STENCIL_TEST | |
| MESA_META_VERTEX | |
| MESA_META_VIEWPORT | |
| MESA_META_CLIP | |
| MESA_META_CLAMP_FRAGMENT_COLOR | |
| MESA_META_MULTISAMPLE | |
| MESA_META_OCCLUSION_QUERY | |
| MESA_META_DRAW_BUFFERS; |
| |
| _mesa_meta_begin(ctx, meta_save); |
| |
| if (!brw_fast_clear_init(brw)) { |
| /* This is going to be hard to recover from, most likely out of memory. |
| * Bail and let meta try and (probably) fail for us. |
| */ |
| plain_clear_buffers = buffers; |
| goto bail_to_meta; |
| } |
| |
| /* Clears never have the color clamped. */ |
| if (ctx->Extensions.ARB_color_buffer_float) |
| _mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE); |
| |
| _mesa_set_enable(ctx, GL_DEPTH_TEST, GL_FALSE); |
| _mesa_DepthMask(GL_FALSE); |
| _mesa_set_enable(ctx, GL_STENCIL_TEST, GL_FALSE); |
| |
| use_rectlist(brw, true); |
| |
| layers = MAX2(1, fb->MaxNumLayers); |
| |
| if (brw->gen >= 9 && fast_clear_buffers) { |
| fast_clear_attachments(brw, fb, fast_clear_buffers, fast_clear_rect); |
| } else if (fast_clear_buffers) { |
| _mesa_meta_drawbuffers_from_bitfield(fast_clear_buffers); |
| brw_bind_rep_write_shader(brw, (float *) fast_clear_color); |
| set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE); |
| brw_draw_rectlist(brw, &fast_clear_rect, layers); |
| set_fast_clear_op(brw, 0); |
| |
| /* Now set the mcs we cleared to INTEL_FAST_CLEAR_STATE_CLEAR so we'll |
| * resolve them eventually. |
| */ |
| for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) { |
| struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf]; |
| struct intel_renderbuffer *irb = intel_renderbuffer(rb); |
| int index = fb->_ColorDrawBufferIndexes[buf]; |
| |
| if ((1 << index) & fast_clear_buffers) |
| irb->mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_CLEAR; |
| } |
| } |
| |
| if (rep_clear_buffers) { |
| _mesa_meta_drawbuffers_from_bitfield(rep_clear_buffers); |
| brw_bind_rep_write_shader(brw, ctx->Color.ClearColor.f); |
| brw_draw_rectlist(brw, &clear_rect, layers); |
| } |
| |
| bail_to_meta: |
| /* Dirty _NEW_BUFFERS so we reemit SURFACE_STATE which sets the fast clear |
| * color before resolve and sets irb->mt->fast_clear_state to UNRESOLVED if |
| * we render to it. |
| */ |
| brw->NewGLState |= _NEW_BUFFERS; |
| |
| |
| /* Set the custom state back to normal and dirty the same bits as above */ |
| use_rectlist(brw, false); |
| |
| _mesa_meta_end(ctx); |
| |
| /* From BSpec: Render Target Fast Clear: |
| * |
| * After Render target fast clear, pipe-control with color cache |
| * write-flush must be issued before sending any DRAW commands on that |
| * render target. |
| */ |
| brw_emit_mi_flush(brw); |
| |
| /* If we had to fall back to plain clear for any buffers, clear those now |
| * by calling into meta. |
| */ |
| out: |
| if (plain_clear_buffers) |
| _mesa_meta_glsl_Clear(&brw->ctx, plain_clear_buffers); |
| |
| return true; |
| } |
| |
| void |
| brw_get_resolve_rect(const struct brw_context *brw, |
| const struct intel_mipmap_tree *mt, |
| unsigned *x0, unsigned *y0, |
| unsigned *x1, unsigned *y1) |
| { |
| unsigned x_align, y_align; |
| unsigned x_scaledown, y_scaledown; |
| |
| /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve": |
| * |
| * A rectangle primitive must be scaled down by the following factors |
| * with respect to render target being resolved. |
| * |
| * The scaledown factors in the table that follows are related to the |
| * alignment size returned by intel_get_non_msrt_mcs_alignment() by a |
| * multiplier. For IVB and HSW, we divide by two, for BDW we multiply |
| * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling |
| * by a factor of 2. |
| */ |
| |
| intel_get_non_msrt_mcs_alignment(mt, &x_align, &y_align); |
| if (brw->gen >= 9) { |
| x_scaledown = x_align * 8; |
| y_scaledown = y_align * 8; |
| } else if (brw->gen >= 8) { |
| x_scaledown = x_align * 8; |
| y_scaledown = y_align * 16; |
| } else { |
| x_scaledown = x_align / 2; |
| y_scaledown = y_align / 2; |
| } |
| *x0 = *y0 = 0; |
| *x1 = ALIGN(mt->logical_width0, x_scaledown) / x_scaledown; |
| *y1 = ALIGN(mt->logical_height0, y_scaledown) / y_scaledown; |
| } |
| |
| void |
| brw_meta_resolve_color(struct brw_context *brw, |
| struct intel_mipmap_tree *mt) |
| { |
| struct gl_context *ctx = &brw->ctx; |
| struct gl_framebuffer *drawFb; |
| struct gl_renderbuffer *rb; |
| struct rect rect; |
| |
| brw_emit_mi_flush(brw); |
| |
| drawFb = ctx->Driver.NewFramebuffer(ctx, 0xDEADBEEF); |
| if (drawFb == NULL) { |
| _mesa_error(ctx, GL_OUT_OF_MEMORY, "in %s", __func__); |
| return; |
| } |
| |
| _mesa_meta_begin(ctx, MESA_META_ALL); |
| |
| rb = brw_get_rb_for_slice(brw, mt, 0, 0, false); |
| |
| _mesa_bind_framebuffers(ctx, drawFb, ctx->ReadBuffer); |
| _mesa_framebuffer_renderbuffer(ctx, ctx->DrawBuffer, GL_COLOR_ATTACHMENT0, |
| rb); |
| _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0); |
| |
| brw_fast_clear_init(brw); |
| |
| use_rectlist(brw, true); |
| |
| brw_bind_rep_write_shader(brw, (float *) fast_clear_color); |
| |
| /* SKL+ also has a resolve mode for compressed render targets and thus more |
| * bits to let us select the type of resolve. For fast clear resolves, it |
| * turns out we can use the same value as pre-SKL though. |
| */ |
| if (intel_miptree_is_lossless_compressed(brw, mt)) |
| set_fast_clear_op(brw, GEN9_PS_RENDER_TARGET_RESOLVE_FULL); |
| else |
| set_fast_clear_op(brw, GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE); |
| |
| mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; |
| brw_get_resolve_rect(brw, mt, &rect.x0, &rect.y0, &rect.x1, &rect.y1); |
| |
| brw_draw_rectlist(brw, &rect, 1); |
| |
| set_fast_clear_op(brw, 0); |
| use_rectlist(brw, false); |
| |
| _mesa_reference_renderbuffer(&rb, NULL); |
| _mesa_reference_framebuffer(&drawFb, NULL); |
| |
| _mesa_meta_end(ctx); |
| |
| /* We're typically called from intel_update_state() and we're supposed to |
| * return with the state all updated to what it was before |
| * brw_meta_resolve_color() was called. The meta rendering will have |
| * messed up the state and we need to call _mesa_update_state() again to |
| * get back to where we were supposed to be when resolve was called. |
| */ |
| if (ctx->NewState) |
| _mesa_update_state(ctx); |
| } |