blob: da46bca20c16e1a9ec4936091b782d58401b1557 [file] [log] [blame]
Chia-I Wu525c6602014-08-27 10:22:34 +08001/*
2 * XGL
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
Chia-I Wu44e42362014-09-02 08:32:09 +080023 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
Chia-I Wu525c6602014-08-27 10:22:34 +080026 */
27
28#include "genhw/genhw.h"
29#include "img.h"
Chia-I Wu714df452015-01-01 07:55:04 +080030#include "buf.h"
Chia-I Wu525c6602014-08-27 10:22:34 +080031#include "cmd_priv.h"
32
33enum {
Chia-I Wuc45db532015-02-19 11:20:38 -070034 READ_OP = 1 << 0,
35 WRITE_OP = 1 << 1,
36 HIZ_OP = 1 << 2,
37};
38
39enum {
Chia-I Wu525c6602014-08-27 10:22:34 +080040 MEM_CACHE = 1 << 0,
41 DATA_READ_CACHE = 1 << 1,
42 DATA_WRITE_CACHE = 1 << 2,
43 RENDER_CACHE = 1 << 3,
44 SAMPLER_CACHE = 1 << 4,
45};
46
Chia-I Wuc45db532015-02-19 11:20:38 -070047static uint32_t img_get_layout_ops(const struct intel_img *img,
48 XGL_IMAGE_LAYOUT layout)
49{
50 uint32_t ops;
51
52 switch (layout) {
53 case XGL_IMAGE_LAYOUT_GENERAL:
54 ops = READ_OP | WRITE_OP;
55 break;
56 case XGL_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
57 ops = READ_OP | WRITE_OP;
58 break;
59 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
60 ops = READ_OP | WRITE_OP | HIZ_OP;
61 break;
62 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
63 ops = READ_OP | HIZ_OP;
64 break;
65 case XGL_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
66 ops = READ_OP;
67 break;
68 case XGL_IMAGE_LAYOUT_CLEAR_OPTIMAL:
69 ops = WRITE_OP | HIZ_OP;
70 break;
71 case XGL_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL:
72 ops = READ_OP;
73 break;
74 case XGL_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL:
75 ops = WRITE_OP;
76 break;
Courtney Goeltzenleuchter51cbf302015-03-25 11:25:10 -060077 case XGL_IMAGE_LAYOUT_UNDEFINED:
Chia-I Wuc45db532015-02-19 11:20:38 -070078 default:
79 ops = 0;
80 break;
81 }
82
83 return ops;
84}
85
Mike Stroyan55658c22014-12-04 11:08:39 +000086static uint32_t img_get_layout_caches(const struct intel_img *img,
87 XGL_IMAGE_LAYOUT layout)
Chia-I Wu525c6602014-08-27 10:22:34 +080088{
89 uint32_t caches;
90
Mike Stroyan55658c22014-12-04 11:08:39 +000091 switch (layout) {
92 case XGL_IMAGE_LAYOUT_GENERAL:
93 // General layout when image can be used for any kind of access
94 caches = MEM_CACHE | DATA_READ_CACHE | DATA_WRITE_CACHE | RENDER_CACHE | SAMPLER_CACHE;
Chia-I Wub5c1cdf2014-11-22 03:17:45 +080095 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000096 case XGL_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
97 // Optimal layout when image is only used for color attachment read/write
98 caches = DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +080099 break;
Mike Stroyan55658c22014-12-04 11:08:39 +0000100 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
101 // Optimal layout when image is only used for depth/stencil attachment read/write
102 caches = DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +0800103 break;
Mike Stroyan55658c22014-12-04 11:08:39 +0000104 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
105 // Optimal layout when image is used for read only depth/stencil attachment and shader access
Chia-I Wu525c6602014-08-27 10:22:34 +0800106 caches = RENDER_CACHE;
107 break;
Mike Stroyan55658c22014-12-04 11:08:39 +0000108 case XGL_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
109 // Optimal layout when image is used for read only shader access
110 caches = DATA_READ_CACHE | SAMPLER_CACHE;
111 break;
112 case XGL_IMAGE_LAYOUT_CLEAR_OPTIMAL:
113 // Optimal layout when image is used only for clear operations
114 caches = RENDER_CACHE;
115 break;
116 case XGL_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL:
117 // Optimal layout when image is used only as source of transfer operations
118 caches = MEM_CACHE | DATA_READ_CACHE | RENDER_CACHE | SAMPLER_CACHE;
119 break;
120 case XGL_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL:
121 // Optimal layout when image is used only as destination of transfer operations
122 caches = MEM_CACHE | DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +0800123 break;
124 default:
125 caches = 0;
126 break;
127 }
128
129 return caches;
130}
131
Chia-I Wuc45db532015-02-19 11:20:38 -0700132static void cmd_resolve_depth(struct intel_cmd *cmd,
133 struct intel_img *img,
134 XGL_IMAGE_LAYOUT old_layout,
135 XGL_IMAGE_LAYOUT new_layout,
136 const XGL_IMAGE_SUBRESOURCE_RANGE *range)
137{
138 const uint32_t old_ops = img_get_layout_ops(img, old_layout);
139 const uint32_t new_ops = img_get_layout_ops(img, new_layout);
140
141 if (old_ops & WRITE_OP) {
142 if ((old_ops & HIZ_OP) && !(new_ops & HIZ_OP))
143 cmd_meta_ds_op(cmd, INTEL_CMD_META_DS_RESOLVE, img, range);
144 else if (!(old_ops & HIZ_OP) && (new_ops & HIZ_OP))
145 cmd_meta_ds_op(cmd, INTEL_CMD_META_DS_HIZ_RESOLVE, img, range);
146 }
147}
148
Chia-I Wub5c1cdf2014-11-22 03:17:45 +0800149static uint32_t cmd_get_flush_flags(const struct intel_cmd *cmd,
150 uint32_t old_caches,
151 uint32_t new_caches,
152 bool is_ds)
Chia-I Wu525c6602014-08-27 10:22:34 +0800153{
154 uint32_t flags = 0;
155
156 /* not dirty */
157 if (!(old_caches & (MEM_CACHE | RENDER_CACHE | DATA_WRITE_CACHE)))
158 return 0;
159
160 if ((old_caches & RENDER_CACHE) && (new_caches & ~RENDER_CACHE)) {
Chia-I Wub5c1cdf2014-11-22 03:17:45 +0800161 if (is_ds)
Chia-I Wu525c6602014-08-27 10:22:34 +0800162 flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
163 else
164 flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
165 }
166
167 if ((old_caches & DATA_WRITE_CACHE) &&
168 (new_caches & ~(DATA_READ_CACHE | DATA_WRITE_CACHE))) {
169 if (cmd_gen(cmd) >= INTEL_GEN(7))
Chia-I Wu97aa4de2015-03-05 15:43:16 -0700170 flags |= GEN7_PIPE_CONTROL_DC_FLUSH;
Chia-I Wu525c6602014-08-27 10:22:34 +0800171 }
172
173 if (new_caches & SAMPLER_CACHE)
174 flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
175
176 if ((new_caches & DATA_READ_CACHE) && old_caches != DATA_WRITE_CACHE)
177 flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
178
179 if (!flags)
180 return 0;
181
182 flags |= GEN6_PIPE_CONTROL_CS_STALL;
183
184 return flags;
185}
186
Mike Stroyan55658c22014-12-04 11:08:39 +0000187static void cmd_memory_barriers(struct intel_cmd *cmd,
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600188 uint32_t flush_flags,
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600189 uint32_t memory_barrier_count,
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600190 const void** memory_barriers)
Chia-I Wu525c6602014-08-27 10:22:34 +0800191{
Mike Stroyan55658c22014-12-04 11:08:39 +0000192 uint32_t i;
Mike Stroyan55658c22014-12-04 11:08:39 +0000193 XGL_FLAGS input_mask = 0;
194 XGL_FLAGS output_mask = 0;
Chia-I Wu525c6602014-08-27 10:22:34 +0800195
Mike Stroyan55658c22014-12-04 11:08:39 +0000196 for (i = 0; i < memory_barrier_count; i++) {
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600197
198 const union {
199 XGL_STRUCTURE_TYPE type;
200
201 XGL_MEMORY_BARRIER mem;
202 XGL_BUFFER_MEMORY_BARRIER buf;
203 XGL_IMAGE_MEMORY_BARRIER img;
204 } *u = memory_barriers[i];
205
206 switch(u->type)
Mike Stroyan55658c22014-12-04 11:08:39 +0000207 {
208 case XGL_STRUCTURE_TYPE_MEMORY_BARRIER:
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600209 output_mask |= u->mem.outputMask;
210 input_mask |= u->mem.inputMask;
Mike Stroyan55658c22014-12-04 11:08:39 +0000211 break;
212 case XGL_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER:
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600213 output_mask |= u->buf.outputMask;
214 input_mask |= u->buf.inputMask;
Mike Stroyan55658c22014-12-04 11:08:39 +0000215 break;
216 case XGL_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER:
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600217 output_mask |= u->img.outputMask;
218 input_mask |= u->img.inputMask;
Mike Stroyan55658c22014-12-04 11:08:39 +0000219 {
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600220 struct intel_img *img = intel_img(u->img.image);
Chia-I Wuc45db532015-02-19 11:20:38 -0700221
222 cmd_resolve_depth(cmd, img, u->img.oldLayout,
223 u->img.newLayout, &u->img.subresourceRange);
224
Mike Stroyan55658c22014-12-04 11:08:39 +0000225 flush_flags |= cmd_get_flush_flags(cmd,
Mark Lobodzinskid3eabd72015-01-29 14:24:14 -0600226 img_get_layout_caches(img, u->img.oldLayout),
227 img_get_layout_caches(img, u->img.newLayout),
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700228 icd_format_is_ds(img->layout.format));
Mike Stroyan55658c22014-12-04 11:08:39 +0000229 }
230 break;
231 default:
232 break;
233 }
Chia-I Wu525c6602014-08-27 10:22:34 +0800234 }
235
Mike Stroyan55658c22014-12-04 11:08:39 +0000236 if (output_mask & XGL_MEMORY_OUTPUT_SHADER_WRITE_BIT) {
Chia-I Wu97aa4de2015-03-05 15:43:16 -0700237 flush_flags |= GEN7_PIPE_CONTROL_DC_FLUSH;
Mike Stroyan55658c22014-12-04 11:08:39 +0000238 }
239 if (output_mask & XGL_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT) {
240 flush_flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
241 }
242 if (output_mask & XGL_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT) {
243 flush_flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
244 }
245
246 /* CPU write is cache coherent, so XGL_MEMORY_OUTPUT_CPU_WRITE_BIT needs no flush. */
247 /* Meta handles flushes, so XGL_MEMORY_OUTPUT_COPY_BIT needs no flush. */
248
249 if (input_mask & (XGL_MEMORY_INPUT_SHADER_READ_BIT | XGL_MEMORY_INPUT_UNIFORM_READ_BIT)) {
250 flush_flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
251 }
252
253 if (input_mask & XGL_MEMORY_INPUT_UNIFORM_READ_BIT) {
254 flush_flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
255 }
256
257 if (input_mask & XGL_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT) {
258 flush_flags |= GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE;
259 }
260
261 /* These bits have no corresponding cache invalidate operation.
262 * XGL_MEMORY_INPUT_CPU_READ_BIT
263 * XGL_MEMORY_INPUT_INDIRECT_COMMAND_BIT
264 * XGL_MEMORY_INPUT_INDEX_FETCH_BIT
265 * XGL_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
266 * XGL_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
267 * XGL_MEMORY_INPUT_COPY_BIT
268 */
269
Chia-I Wu525c6602014-08-27 10:22:34 +0800270 cmd_batch_flush(cmd, flush_flags);
271}
272
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600273ICD_EXPORT void XGLAPI xglCmdWaitEvents(
Chia-I Wu525c6602014-08-27 10:22:34 +0800274 XGL_CMD_BUFFER cmdBuffer,
Mike Stroyan55658c22014-12-04 11:08:39 +0000275 const XGL_EVENT_WAIT_INFO* pWaitInfo)
Chia-I Wu525c6602014-08-27 10:22:34 +0800276{
277 struct intel_cmd *cmd = intel_cmd(cmdBuffer);
Chia-I Wu525c6602014-08-27 10:22:34 +0800278
Mike Stroyan55658c22014-12-04 11:08:39 +0000279 /* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
280 * Passing a pWaitInfo->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
281 * does not change that.
282 */
Chia-I Wu525c6602014-08-27 10:22:34 +0800283
Mike Stroyan55658c22014-12-04 11:08:39 +0000284 /* Because the command buffer is serialized, reaching
285 * a pipelined wait is always after completion of prior events.
286 * pWaitInfo->pEvents need not be examined.
287 * xglCmdWaitEvents is equivalent to memory barrier part of xglCmdPipelineBarrier.
288 * cmd_memory_barriers will wait for GEN6_PIPE_CONTROL_CS_STALL and perform
289 * appropriate cache control.
290 */
291 cmd_memory_barriers(cmd,
292 GEN6_PIPE_CONTROL_CS_STALL,
Mark Lobodzinski628a8a52015-02-02 11:55:52 -0600293 pWaitInfo->memBarrierCount, pWaitInfo->ppMemBarriers);
Mike Stroyan55658c22014-12-04 11:08:39 +0000294}
295
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600296ICD_EXPORT void XGLAPI xglCmdPipelineBarrier(
Mike Stroyan55658c22014-12-04 11:08:39 +0000297 XGL_CMD_BUFFER cmdBuffer,
298 const XGL_PIPELINE_BARRIER* pBarrier)
299{
300 struct intel_cmd *cmd = intel_cmd(cmdBuffer);
301 uint32_t pipe_control_flags = 0;
302 uint32_t i;
303
304 /* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
305 * Passing a pBarrier->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
306 * does not change that.
307 */
308
309 /* Cache control is done with PIPE_CONTROL flags.
Courtney Goeltzenleuchteraa86e0e2015-03-24 18:02:34 -0600310 * With no GEN6_PIPE_CONTROL_CS_STALL flag set, it behaves as XGL_PIPE_EVENT_TOP_OF_PIPE.
311 * All other pEvents values will behave as XGL_PIPE_EVENT_GPU_COMMANDS_COMPLETE.
Mike Stroyan55658c22014-12-04 11:08:39 +0000312 */
313 for (i = 0; i < pBarrier->eventCount; i++) {
314 switch(pBarrier->pEvents[i])
315 {
Courtney Goeltzenleuchteraa86e0e2015-03-24 18:02:34 -0600316 case XGL_PIPE_EVENT_TOP_OF_PIPE:
Mike Stroyan55658c22014-12-04 11:08:39 +0000317 break;
Courtney Goeltzenleuchteraa86e0e2015-03-24 18:02:34 -0600318 case XGL_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE:
Tony Barbour68570762015-04-08 13:11:36 -0600319 case XGL_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE:
Courtney Goeltzenleuchteraa86e0e2015-03-24 18:02:34 -0600320 case XGL_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE:
321 case XGL_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE:
322 case XGL_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE:
323 case XGL_PIPE_EVENT_TRANSFER_COMPLETE:
324 case XGL_PIPE_EVENT_GPU_COMMANDS_COMPLETE:
Mike Stroyan55658c22014-12-04 11:08:39 +0000325 pipe_control_flags |= GEN6_PIPE_CONTROL_CS_STALL;
326 break;
327 default:
Chia-I Wu4e5577a2015-02-10 11:04:44 -0700328 cmd_fail(cmd, XGL_ERROR_UNKNOWN);
Mike Stroyan55658c22014-12-04 11:08:39 +0000329 return;
330 break;
331 }
Chia-I Wu525c6602014-08-27 10:22:34 +0800332 }
333
Mike Stroyan55658c22014-12-04 11:08:39 +0000334 /* cmd_memory_barriers can wait for GEN6_PIPE_CONTROL_CS_STALL and perform
335 * appropriate cache control.
336 */
337 cmd_memory_barriers(cmd,
338 pipe_control_flags,
Mark Lobodzinski628a8a52015-02-02 11:55:52 -0600339 pBarrier->memBarrierCount, pBarrier->ppMemBarriers);
Chia-I Wu525c6602014-08-27 10:22:34 +0800340}