blob: 983e8bf24cd8f9fc14b3160bb4fffd80958dd8a4 [file] [log] [blame]
Chia-I Wu525c6602014-08-27 10:22:34 +08001/*
2 * XGL
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
Chia-I Wu44e42362014-09-02 08:32:09 +080023 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
Chia-I Wu525c6602014-08-27 10:22:34 +080026 */
27
28#include "genhw/genhw.h"
29#include "img.h"
Chia-I Wu714df452015-01-01 07:55:04 +080030#include "buf.h"
Chia-I Wu525c6602014-08-27 10:22:34 +080031#include "cmd_priv.h"
32
33enum {
34 MEM_CACHE = 1 << 0,
35 DATA_READ_CACHE = 1 << 1,
36 DATA_WRITE_CACHE = 1 << 2,
37 RENDER_CACHE = 1 << 3,
38 SAMPLER_CACHE = 1 << 4,
39};
40
Mike Stroyan55658c22014-12-04 11:08:39 +000041static uint32_t img_get_layout_caches(const struct intel_img *img,
42 XGL_IMAGE_LAYOUT layout)
Chia-I Wu525c6602014-08-27 10:22:34 +080043{
44 uint32_t caches;
45
Mike Stroyan55658c22014-12-04 11:08:39 +000046 switch (layout) {
47 case XGL_IMAGE_LAYOUT_GENERAL:
48 // General layout when image can be used for any kind of access
49 caches = MEM_CACHE | DATA_READ_CACHE | DATA_WRITE_CACHE | RENDER_CACHE | SAMPLER_CACHE;
Chia-I Wub5c1cdf2014-11-22 03:17:45 +080050 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000051 case XGL_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL:
52 // Optimal layout when image is only used for color attachment read/write
53 caches = DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +080054 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000055 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
56 // Optimal layout when image is only used for depth/stencil attachment read/write
57 caches = DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +080058 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000059 case XGL_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
60 // Optimal layout when image is used for read only depth/stencil attachment and shader access
Chia-I Wu525c6602014-08-27 10:22:34 +080061 caches = RENDER_CACHE;
62 break;
Mike Stroyan55658c22014-12-04 11:08:39 +000063 case XGL_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL:
64 // Optimal layout when image is used for read only shader access
65 caches = DATA_READ_CACHE | SAMPLER_CACHE;
66 break;
67 case XGL_IMAGE_LAYOUT_CLEAR_OPTIMAL:
68 // Optimal layout when image is used only for clear operations
69 caches = RENDER_CACHE;
70 break;
71 case XGL_IMAGE_LAYOUT_TRANSFER_SOURCE_OPTIMAL:
72 // Optimal layout when image is used only as source of transfer operations
73 caches = MEM_CACHE | DATA_READ_CACHE | RENDER_CACHE | SAMPLER_CACHE;
74 break;
75 case XGL_IMAGE_LAYOUT_TRANSFER_DESTINATION_OPTIMAL:
76 // Optimal layout when image is used only as destination of transfer operations
77 caches = MEM_CACHE | DATA_WRITE_CACHE | RENDER_CACHE;
Chia-I Wu525c6602014-08-27 10:22:34 +080078 break;
79 default:
80 caches = 0;
81 break;
82 }
83
84 return caches;
85}
86
Chia-I Wub5c1cdf2014-11-22 03:17:45 +080087static uint32_t cmd_get_flush_flags(const struct intel_cmd *cmd,
88 uint32_t old_caches,
89 uint32_t new_caches,
90 bool is_ds)
Chia-I Wu525c6602014-08-27 10:22:34 +080091{
92 uint32_t flags = 0;
93
94 /* not dirty */
95 if (!(old_caches & (MEM_CACHE | RENDER_CACHE | DATA_WRITE_CACHE)))
96 return 0;
97
98 if ((old_caches & RENDER_CACHE) && (new_caches & ~RENDER_CACHE)) {
Chia-I Wub5c1cdf2014-11-22 03:17:45 +080099 if (is_ds)
Chia-I Wu525c6602014-08-27 10:22:34 +0800100 flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
101 else
102 flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
103 }
104
105 if ((old_caches & DATA_WRITE_CACHE) &&
106 (new_caches & ~(DATA_READ_CACHE | DATA_WRITE_CACHE))) {
107 if (cmd_gen(cmd) >= INTEL_GEN(7))
108 flags |= GEN7_PIPE_CONTROL_DC_FLUSH_ENABLE;
109 }
110
111 if (new_caches & SAMPLER_CACHE)
112 flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
113
114 if ((new_caches & DATA_READ_CACHE) && old_caches != DATA_WRITE_CACHE)
115 flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
116
117 if (!flags)
118 return 0;
119
120 flags |= GEN6_PIPE_CONTROL_CS_STALL;
121
122 return flags;
123}
124
Mike Stroyan55658c22014-12-04 11:08:39 +0000125static void cmd_memory_barriers(struct intel_cmd *cmd,
126 uint32_t flush_flags,
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600127 uint32_t memory_barrier_count,
128 const void* memory_barriers)
Chia-I Wu525c6602014-08-27 10:22:34 +0800129{
Mike Stroyan55658c22014-12-04 11:08:39 +0000130 uint32_t i;
131 XGL_MEMORY_BARRIER *memory_barrier;
132 XGL_BUFFER_MEMORY_BARRIER *buffer_memory_barrier;
133 XGL_IMAGE_MEMORY_BARRIER *image_memory_barrier;
134 XGL_FLAGS input_mask = 0;
135 XGL_FLAGS output_mask = 0;
Chia-I Wu525c6602014-08-27 10:22:34 +0800136
Mike Stroyan55658c22014-12-04 11:08:39 +0000137 for (i = 0; i < memory_barrier_count; i++) {
138 memory_barrier = &((XGL_MEMORY_BARRIER *) memory_barriers)[i];
139 switch(memory_barrier->sType)
140 {
141 case XGL_STRUCTURE_TYPE_MEMORY_BARRIER:
142 output_mask |= memory_barrier->outputMask;
143 input_mask |= memory_barrier->inputMask;
144 break;
145 case XGL_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER:
146 buffer_memory_barrier = (XGL_BUFFER_MEMORY_BARRIER *) memory_barrier;
147 output_mask |= buffer_memory_barrier->outputMask;
148 input_mask |= buffer_memory_barrier->inputMask;
149 break;
150 case XGL_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER:
151 image_memory_barrier = (XGL_IMAGE_MEMORY_BARRIER *) memory_barrier;
152 output_mask |= image_memory_barrier->outputMask;
153 input_mask |= image_memory_barrier->inputMask;
154 {
155 struct intel_img *img = intel_img(image_memory_barrier->image);
156 flush_flags |= cmd_get_flush_flags(cmd,
157 img_get_layout_caches(img, image_memory_barrier->oldLayout),
158 img_get_layout_caches(img, image_memory_barrier->newLayout),
Jeremy Hayes2b7e88a2015-01-23 08:51:43 -0700159 icd_format_is_ds(img->layout.format));
Mike Stroyan55658c22014-12-04 11:08:39 +0000160 }
161 break;
162 default:
163 break;
164 }
Chia-I Wu525c6602014-08-27 10:22:34 +0800165 }
166
Mike Stroyan55658c22014-12-04 11:08:39 +0000167 if (output_mask & XGL_MEMORY_OUTPUT_SHADER_WRITE_BIT) {
168 flush_flags |= GEN7_PIPE_CONTROL_DC_FLUSH_ENABLE;
169 }
170 if (output_mask & XGL_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT) {
171 flush_flags |= GEN6_PIPE_CONTROL_RENDER_CACHE_FLUSH;
172 }
173 if (output_mask & XGL_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT) {
174 flush_flags |= GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH;
175 }
176
177 /* CPU write is cache coherent, so XGL_MEMORY_OUTPUT_CPU_WRITE_BIT needs no flush. */
178 /* Meta handles flushes, so XGL_MEMORY_OUTPUT_COPY_BIT needs no flush. */
179
180 if (input_mask & (XGL_MEMORY_INPUT_SHADER_READ_BIT | XGL_MEMORY_INPUT_UNIFORM_READ_BIT)) {
181 flush_flags |= GEN6_PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
182 }
183
184 if (input_mask & XGL_MEMORY_INPUT_UNIFORM_READ_BIT) {
185 flush_flags |= GEN6_PIPE_CONTROL_CONSTANT_CACHE_INVALIDATE;
186 }
187
188 if (input_mask & XGL_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT) {
189 flush_flags |= GEN6_PIPE_CONTROL_VF_CACHE_INVALIDATE;
190 }
191
192 /* These bits have no corresponding cache invalidate operation.
193 * XGL_MEMORY_INPUT_CPU_READ_BIT
194 * XGL_MEMORY_INPUT_INDIRECT_COMMAND_BIT
195 * XGL_MEMORY_INPUT_INDEX_FETCH_BIT
196 * XGL_MEMORY_INPUT_COLOR_ATTACHMENT_BIT
197 * XGL_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT
198 * XGL_MEMORY_INPUT_COPY_BIT
199 */
200
Chia-I Wu525c6602014-08-27 10:22:34 +0800201 cmd_batch_flush(cmd, flush_flags);
202}
203
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600204ICD_EXPORT void XGLAPI xglCmdWaitEvents(
Chia-I Wu525c6602014-08-27 10:22:34 +0800205 XGL_CMD_BUFFER cmdBuffer,
Mike Stroyan55658c22014-12-04 11:08:39 +0000206 const XGL_EVENT_WAIT_INFO* pWaitInfo)
Chia-I Wu525c6602014-08-27 10:22:34 +0800207{
208 struct intel_cmd *cmd = intel_cmd(cmdBuffer);
Chia-I Wu525c6602014-08-27 10:22:34 +0800209
Mike Stroyan55658c22014-12-04 11:08:39 +0000210 /* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
211 * Passing a pWaitInfo->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
212 * does not change that.
213 */
Chia-I Wu525c6602014-08-27 10:22:34 +0800214
Mike Stroyan55658c22014-12-04 11:08:39 +0000215 /* Because the command buffer is serialized, reaching
216 * a pipelined wait is always after completion of prior events.
217 * pWaitInfo->pEvents need not be examined.
218 * xglCmdWaitEvents is equivalent to memory barrier part of xglCmdPipelineBarrier.
219 * cmd_memory_barriers will wait for GEN6_PIPE_CONTROL_CS_STALL and perform
220 * appropriate cache control.
221 */
222 cmd_memory_barriers(cmd,
223 GEN6_PIPE_CONTROL_CS_STALL,
224 pWaitInfo->memBarrierCount, pWaitInfo->pMemBarriers);
225}
226
Mark Lobodzinskie2d07a52015-01-29 08:55:56 -0600227ICD_EXPORT void XGLAPI xglCmdPipelineBarrier(
Mike Stroyan55658c22014-12-04 11:08:39 +0000228 XGL_CMD_BUFFER cmdBuffer,
229 const XGL_PIPELINE_BARRIER* pBarrier)
230{
231 struct intel_cmd *cmd = intel_cmd(cmdBuffer);
232 uint32_t pipe_control_flags = 0;
233 uint32_t i;
234
235 /* This hardware will always wait at XGL_WAIT_EVENT_TOP_OF_PIPE.
236 * Passing a pBarrier->waitEvent of XGL_WAIT_EVENT_BEFORE_FRAGMENT_PROCESSING
237 * does not change that.
238 */
239
240 /* Cache control is done with PIPE_CONTROL flags.
241 * With no GEN6_PIPE_CONTROL_CS_STALL flag set, it behaves as XGL_SET_EVENT_TOP_OF_PIPE.
242 * All other pEvents values will behave as XGL_SET_EVENT_GPU_COMMANDS_COMPLETE.
243 */
244 for (i = 0; i < pBarrier->eventCount; i++) {
245 switch(pBarrier->pEvents[i])
246 {
247 case XGL_SET_EVENT_TOP_OF_PIPE:
248 break;
249 case XGL_SET_EVENT_VERTEX_PROCESSING_COMPLETE:
250 case XGL_SET_EVENT_FRAGMENT_PROCESSING_COMPLETE:
251 case XGL_SET_EVENT_GRAPHICS_PIPELINE_COMPLETE:
252 case XGL_SET_EVENT_COMPUTE_PIPELINE_COMPLETE:
253 case XGL_SET_EVENT_TRANSFER_COMPLETE:
254 case XGL_SET_EVENT_GPU_COMMANDS_COMPLETE:
255 pipe_control_flags |= GEN6_PIPE_CONTROL_CS_STALL;
256 break;
257 default:
258 cmd->result = XGL_ERROR_UNKNOWN;
259 return;
260 break;
261 }
Chia-I Wu525c6602014-08-27 10:22:34 +0800262 }
263
Mike Stroyan55658c22014-12-04 11:08:39 +0000264 /* cmd_memory_barriers can wait for GEN6_PIPE_CONTROL_CS_STALL and perform
265 * appropriate cache control.
266 */
267 cmd_memory_barriers(cmd,
268 pipe_control_flags,
269 pBarrier->memBarrierCount, pBarrier->pMemBarriers);
Chia-I Wu525c6602014-08-27 10:22:34 +0800270}