blob: 3dd79f48b0730bfa157400921df96b24849f283a [file] [log] [blame]
Chia-I Wu525c6602014-08-27 10:22:34 +08001/*
2 * XGL
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
Chia-I Wu44e42362014-09-02 08:32:09 +080023 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
Chia-I Wu525c6602014-08-27 10:22:34 +080026 */
27
28#include "genhw/genhw.h"
29#include "img.h"
Chia-I Wu714df452015-01-01 07:55:04 +080030#include "buf.h"
Chia-I Wu525c6602014-08-27 10:22:34 +080031#include "cmd_priv.h"
32
33enum {
34 MEM_CACHE = 1 << 0,
35 DATA_READ_CACHE = 1 << 1,
36 DATA_WRITE_CACHE = 1 << 2,
37 RENDER_CACHE = 1 << 3,
38 SAMPLER_CACHE = 1 << 4,
39};
40
Mike Stroyan55658c22014-12-04 11:08:39 +000041static uint32_t img_get_layout_caches(const struct intel_img *img,
42 XGL_IMAGE_LAYOUT layout)
Chia-I Wu525c6602014-08-27 10:22:34 +080043{
44 uint32_t caches;
45
Mike Stroyan55658c22014-12-04 11:08:39 +000046 switch (layout) {
47 case XGL_IMAGE_LAYOUT_GENERAL:
48 // General layout when image can be used for any kind of access
49 caches = MEM_CACHE | DATA_READ_CACHE | DATA_WRITE_CACHE | RENDER_CACHE | SAMPLER_CACHE;
Chia-I Wub5c1cdf2014-11-22 03:17:45 +080050 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000051 case XGL_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
52 // Optimal layout when image is only used for color attachment read/write
53 caches = DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +080054 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000055 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
56 // Optimal layout when image is only used for depth/stencil attachment read/write
57 caches = DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +080058 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000059 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
60 // Optimal layout when image is used for read only depth/stencil attachment and shader access
Chia-I Wu525c6602014-08-27 10:22:34 +080061 caches = RENDER_CACHE;
62 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000063 case XGL_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
64 // Optimal layout when image is used for read only shader access
65 caches = DATA_READ_CACHE | SAMPLER_CACHE;
66 break;
67 case XGL_IMAGE_LAYOUT_CLEAR_OPTIMAL:
68 // Optimal layout when image is used only for clear operations
69 caches = RENDER_CACHE;
70 break;
71 case XGL_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL:
72 // Optimal layout when image is used only as source of transfer operations
73 caches = MEM_CACHE | DATA_READ_CACHE | RENDER_CACHE | SAMPLER_CACHE;
74 break;
75 case XGL_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL:
76 // Optimal layout when image is used only as destination of transfer operations
77 caches = MEM_CACHE | DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +080078 break;
79 default:
80 caches = 0;
81 break;
82 }
83
84 return caches;
85}
86
Chia-I Wub5c1cdf2014-11-22 03:17:45 +080087static uint32_t cmd_get_flush_flags(const struct intel_cmd *cmd,
88 uint32_t old_caches,
89 uint32_t new_caches,
90 bool is_ds)
Chia-I Wu525c6602014-08-27 10:22:34 +080091{
92 uint32_t flags = 0;
93
94 /* not dirty */
95 if (!(old_caches & (MEM_CACHE | RENDER_CACHE | DATA_WRITE_CACHE)))
96 return 0;
97
98 if ((old_caches & RENDER_CACHE) && (new_caches & ~RENDER_CACHE)) {
Chia-I Wub5c1cdf2014-11-22 03:17:45 +080099 if (is_ds)
Chia-I Wu525c6602014-08-27 10:22:34 +0800100 flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
101 else
102 flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
103 }
104
105 if ((old_caches & DATA_WRITE_CACHE) &&
106 (new_caches & ~(DATA_READ_CACHE | DATA_WRITE_CACHE))) {
107 if (cmd_gen(cmd) >= INTEL_GEN(7))
108 flags |= GEN7_PIPE_CONTROL_DC_FLUSH_ENABLE;
109 }
110
111 if (new_caches & SAMPLER_CACHE)
112 flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
113
114 if ((new_caches & DATA_READ_CACHE) && old_caches != DATA_WRITE_CACHE)
115 flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
116
117 if (!flags)
118 return 0;
119
120 flags |= GEN6_PIPE_CONTROL_CS_STALL;
121
122 return flags;
123}
124
Mike Stroyan55658c22014-12-04 11:08:39 +0000125static void cmd_memory_barriers(struct intel_cmd *cmd,
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600126 uint32_t flush_flags,
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600127 uint32_t memory_barrier_count,
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600128 const void** memory_barriers)
Chia-I Wu525c6602014-08-27 10:22:34 +0800129{
Mike Stroyan55658c22014-12-04 11:08:39 +0000130 uint32_t i;
Mike Stroyan55658c22014-12-04 11:08:39 +0000131 XGL_FLAGS input_mask = 0;
132 XGL_FLAGS output_mask = 0;
Chia-I Wu525c6602014-08-27 10:22:34 +0800133
Mike Stroyan55658c22014-12-04 11:08:39 +0000134 for (i = 0; i < memory_barrier_count; i++) {
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600135
136 const union {
137 XGL_STRUCTURE_TYPE type;
138
139 XGL_MEMORY_BARRIER mem;
140 XGL_BUFFER_MEMORY_BARRIER buf;
141 XGL_IMAGE_MEMORY_BARRIER img;
142 } *u = memory_barriers[i];
143
144 switch(u->type)
Mike Stroyan55658c22014-12-04 11:08:39 +0000145 {
146 case XGL_STRUCTURE_TYPE_MEMORY_BARRIER:
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600147 output_mask |= u->mem.outputMask;
148 input_mask |= u->mem.inputMask;
Mike Stroyan55658c22014-12-04 11:08:39 +0000149 break;
150 case XGL_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER:
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600151 output_mask |= u->buf.outputMask;
152 input_mask |= u->buf.inputMask;
Mike Stroyan55658c22014-12-04 11:08:39 +0000153 break;
154 case XGL_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER:
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600155 output_mask |= u->img.outputMask;
156 input_mask |= u->img.inputMask;
Mike Stroyan55658c22014-12-04 11:08:39 +0000157 {
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600158 struct intel_img *img = intel_img(u->img.image);
Mike Stroyan55658c22014-12-04 11:08:39 +0000159 flush_flags |= cmd_get_flush_flags(cmd,
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600160 img_get_layout_caches(img, u->img.oldLayout),
161 img_get_layout_caches(img, u->img.newLayout),
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700162 icd_format_is_ds(img->layout.format));
Mike Stroyan55658c22014-12-04 11:08:39 +0000163 }
164 break;
165 default:
166 break;
167 }
Chia-I Wu525c6602014-08-27 10:22:34 +0800168 }
169
Mike Stroyan55658c22014-12-04 11:08:39 +0000170 if (output_mask & XGL_MEMORY_OUTPUT_SHADER_WRITE_BIT) {
171 flush_flags |= GEN7_PIPE_CONTROL_DC_FLUSH_ENABLE;
172 }
173 if (output_mask & XGL_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT) {
174 flush_flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
175 }
176 if (output_mask & XGL_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT) {
177 flush_flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
178 }
179
180 /* CPU write is cache coherent, so XGL_MEMORY_OUTPUT_CPU_WRITE_BIT needs no flush. */
181 /* Meta handles flushes, so XGL_MEMORY_OUTPUT_COPY_BIT needs no flush. */
182
183 if (input_mask & (XGL_MEMORY_INPUT_SHADER_READ_BIT | XGL_MEMORY_INPUT_UNIFORM_READ_BIT)) {
184 flush_flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
185 }
186
187 if (input_mask & XGL_MEMORY_INPUT_UNIFORM_READ_BIT) {
188 flush_flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
189 }
190
191 if (input_mask & XGL_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT) {
192 flush_flags |= GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE;
193 }
194
195 /* These bits have no corresponding cache invalidate operation.
196 * XGL_MEMORY_INPUT_CPU_READ_BIT
197 * XGL_MEMORY_INPUT_INDIRECT_COMMAND_BIT
198 * XGL_MEMORY_INPUT_INDEX_FETCH_BIT
199 * XGL_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
200 * XGL_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
201 * XGL_MEMORY_INPUT_COPY_BIT
202 */
203
Chia-I Wu525c6602014-08-27 10:22:34 +0800204 cmd_batch_flush(cmd, flush_flags);
205}
206
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600207ICD_EXPORT void XGLAPI xglCmdWaitEvents(
Chia-I Wu525c6602014-08-27 10:22:34 +0800208 XGL_CMD_BUFFER cmdBuffer,
Mike Stroyan55658c22014-12-04 11:08:39 +0000209 const XGL_EVENT_WAIT_INFO* pWaitInfo)
Chia-I Wu525c6602014-08-27 10:22:34 +0800210{
211 struct intel_cmd *cmd = intel_cmd(cmdBuffer);
Chia-I Wu525c6602014-08-27 10:22:34 +0800212
Mike Stroyan55658c22014-12-04 11:08:39 +0000213 /* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
214 * Passing a pWaitInfo->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
215 * does not change that.
216 */
Chia-I Wu525c6602014-08-27 10:22:34 +0800217
Mike Stroyan55658c22014-12-04 11:08:39 +0000218 /* Because the command buffer is serialized, reaching
219 * a pipelined wait is always after completion of prior events.
220 * pWaitInfo->pEvents need not be examined.
221 * xglCmdWaitEvents is equivalent to memory barrier part of xglCmdPipelineBarrier.
222 * cmd_memory_barriers will wait for GEN6_PIPE_CONTROL_CS_STALL and perform
223 * appropriate cache control.
224 */
225 cmd_memory_barriers(cmd,
226 GEN6_PIPE_CONTROL_CS_STALL,
227 pWaitInfo->memBarrierCount, pWaitInfo->pMemBarriers);
228}
229
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600230ICD_EXPORT void XGLAPI xglCmdPipelineBarrier(
Mike Stroyan55658c22014-12-04 11:08:39 +0000231 XGL_CMD_BUFFER cmdBuffer,
232 const XGL_PIPELINE_BARRIER* pBarrier)
233{
234 struct intel_cmd *cmd = intel_cmd(cmdBuffer);
235 uint32_t pipe_control_flags = 0;
236 uint32_t i;
237
238 /* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
239 * Passing a pBarrier->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
240 * does not change that.
241 */
242
243 /* Cache control is done with PIPE_CONTROL flags.
244 * With no GEN6_PIPE_CONTROL_CS_STALL flag set, it behaves as XGL_SET_EVENT_TOP_OF_PIPE.
245 * All other pEvents values will behave as XGL_SET_EVENT_GPU_COMMANDS_COMPLETE.
246 */
247 for (i = 0; i < pBarrier->eventCount; i++) {
248 switch(pBarrier->pEvents[i])
249 {
250 case XGL_SET_EVENT_TOP_OF_PIPE:
251 break;
252 case XGL_SET_EVENT_VERTEX_PROCESSING_COMPLETE:
253 case XGL_SET_EVENT_FRAGMENT_PROCESSING_COMPLETE:
254 case XGL_SET_EVENT_GRAPHICS_PIPELINE_COMPLETE:
255 case XGL_SET_EVENT_COMPUTE_PIPELINE_COMPLETE:
256 case XGL_SET_EVENT_TRANSFER_COMPLETE:
257 case XGL_SET_EVENT_GPU_COMMANDS_COMPLETE:
258 pipe_control_flags |= GEN6_PIPE_CONTROL_CS_STALL;
259 break;
260 default:
261 cmd->result = XGL_ERROR_UNKNOWN;
262 return;
263 break;
264 }
Chia-I Wu525c6602014-08-27 10:22:34 +0800265 }
266
Mike Stroyan55658c22014-12-04 11:08:39 +0000267 /* cmd_memory_barriers can wait for GEN6_PIPE_CONTROL_CS_STALL and perform
268 * appropriate cache control.
269 */
270 cmd_memory_barriers(cmd,
271 pipe_control_flags,
272 pBarrier->memBarrierCount, pBarrier->pMemBarriers);
Chia-I Wu525c6602014-08-27 10:22:34 +0800273}