blob: e1d59231d4ab0693c8474da9be20a7368dcc66ca [file] [log] [blame]
Chia-I Wu5a323262014-08-11 10:31:53 +08001/*
2 * XGL
3 *
4 * Copyright (C) 2014 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 */
24
Chia-I Wu9269d1c2014-08-16 12:47:47 +080025#include "genhw/genhw.h"
26#include "kmd/winsys.h"
Chia-I Wu5a323262014-08-11 10:31:53 +080027#include "dev.h"
Chia-I Wu9269d1c2014-08-16 12:47:47 +080028#include "format.h"
Chia-I Wu5a323262014-08-11 10:31:53 +080029#include "gpu.h"
30#include "img.h"
31#include "mem.h"
32#include "view.h"
33
Chia-I Wu9269d1c2014-08-16 12:47:47 +080034static void emit_null_view_gen7(const struct intel_gpu *gpu, uint32_t dw[8])
35{
36 INTEL_GPU_ASSERT(gpu, 7, 7.5);
37
38 /*
39 * From the Ivy Bridge PRM, volume 4 part 1, page 62:
40 *
41 * "A null surface is used in instances where an actual surface is not
42 * bound. When a write message is generated to a null surface, no
43 * actual surface is written to. When a read message (including any
44 * sampling engine message) is generated to a null surface, the result
45 * is all zeros. Note that a null surface type is allowed to be used
46 * with all messages, even if it is not specificially indicated as
47 * supported. All of the remaining fields in surface state are ignored
48 * for null surfaces, with the following exceptions:
49 *
50 * * Width, Height, Depth, LOD, and Render Target View Extent fields
51 * must match the depth buffer's corresponding state for all render
52 * target surfaces, including null.
53 * * All sampling engine and data port messages support null surfaces
54 * with the above behavior, even if not mentioned as specifically
55 * supported, except for the following:
56 * * Data Port Media Block Read/Write messages.
57 * * The Surface Type of a surface used as a render target (accessed
58 * via the Data Port's Render Target Write message) must be the same
59 * as the Surface Type of all other render targets and of the depth
60 * buffer (defined in 3DSTATE_DEPTH_BUFFER), unless either the depth
61 * buffer or render targets are SURFTYPE_NULL."
62 *
63 * From the Ivy Bridge PRM, volume 4 part 1, page 65:
64 *
65 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
66 * true"
67 */
68
69 dw[0] = GEN6_SURFTYPE_NULL << GEN7_SURFACE_DW0_TYPE__SHIFT |
70 GEN6_FORMAT_B8G8R8A8_UNORM << GEN7_SURFACE_DW0_FORMAT__SHIFT |
71 GEN6_TILING_X << 13;
72
73 dw[1] = 0;
74 dw[2] = 0;
75 dw[3] = 0;
76 dw[4] = 0;
77 dw[5] = 0;
78 dw[6] = 0;
79 dw[7] = 0;
80}
81
82static void emit_mem_view_gen7(const struct intel_gpu *gpu,
83 unsigned offset, unsigned size,
84 unsigned struct_size,
85 XGL_FORMAT elem_format,
86 bool is_rt, bool render_cache_rw,
87 uint32_t dw[8])
88{
89 const bool typed = !icd_format_is_undef(elem_format);
90 const bool structured = (!typed && struct_size > 1);
91 const int elem_size = (typed) ?
92 icd_format_get_size(elem_format) : 1;
93 int width, height, depth, pitch;
94 int surface_type, surface_format, num_entries;
95
96 INTEL_GPU_ASSERT(gpu, 7, 7.5);
97
98 surface_type = (structured) ? GEN7_SURFTYPE_STRBUF : GEN6_SURFTYPE_BUFFER;
99
100 surface_format = (typed) ?
101 intel_format_translate_color(gpu, elem_format) : GEN6_FORMAT_RAW;
102
103 num_entries = size / struct_size;
104 /* see if there is enough space to fit another element */
105 if (size % struct_size >= elem_size && !structured)
106 num_entries++;
107
108 /*
109 * From the Ivy Bridge PRM, volume 4 part 1, page 67:
110 *
111 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
112 * Address) specifies the base address of first element of the
113 * surface. The surface is interpreted as a simple array of that
114 * single element type. The address must be naturally-aligned to the
115 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
116 * must be 16-byte aligned)
117 *
118 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
119 * the base address of the first element of the surface, computed in
120 * software by adding the surface base address to the byte offset of
121 * the element in the buffer."
122 */
123 if (is_rt)
124 assert(offset % elem_size == 0);
125
126 /*
127 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
128 *
129 * "For typed buffer and structured buffer surfaces, the number of
130 * entries in the buffer ranges from 1 to 2^27. For raw buffer
131 * surfaces, the number of entries in the buffer is the number of
132 * bytes which can range from 1 to 2^30."
133 */
134 assert(num_entries >= 1 &&
135 num_entries <= 1 << ((typed || structured) ? 27 : 30));
136
137 /*
138 * From the Ivy Bridge PRM, volume 4 part 1, page 69:
139 *
140 * "For SURFTYPE_BUFFER: The low two bits of this field (Width) must be
141 * 11 if the Surface Format is RAW (the size of the buffer must be a
142 * multiple of 4 bytes)."
143 *
144 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
145 *
146 * "For surfaces of type SURFTYPE_BUFFER and SURFTYPE_STRBUF, this
147 * field (Surface Pitch) indicates the size of the structure."
148 *
149 * "For linear surfaces with Surface Type of SURFTYPE_STRBUF, the pitch
150 * must be a multiple of 4 bytes."
151 */
152 if (structured)
153 assert(struct_size % 4 == 0);
154 else if (!typed)
155 assert(num_entries % 4 == 0);
156
157 pitch = struct_size;
158
159 pitch--;
160 num_entries--;
161 /* bits [6:0] */
162 width = (num_entries & 0x0000007f);
163 /* bits [20:7] */
164 height = (num_entries & 0x001fff80) >> 7;
165 /* bits [30:21] */
166 depth = (num_entries & 0x7fe00000) >> 21;
167 /* limit to [26:21] */
168 if (typed || structured)
169 depth &= 0x3f;
170
171 dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
172 surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT;
173 if (render_cache_rw)
174 dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
175
176 dw[1] = offset;
177
178 dw[2] = height << GEN7_SURFACE_DW2_HEIGHT__SHIFT |
179 width << GEN7_SURFACE_DW2_WIDTH__SHIFT;
180
181 dw[3] = depth << GEN7_SURFACE_DW3_DEPTH__SHIFT |
182 pitch;
183
184 dw[4] = 0;
185 dw[5] = 0;
186
187 dw[6] = 0;
188 dw[7] = 0;
189
190 if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5)) {
191 dw[7] |= GEN75_SCS_RED << GEN75_SURFACE_DW7_SCS_R__SHIFT |
192 GEN75_SCS_GREEN << GEN75_SURFACE_DW7_SCS_G__SHIFT |
193 GEN75_SCS_BLUE << GEN75_SURFACE_DW7_SCS_B__SHIFT |
194 GEN75_SCS_ALPHA << GEN75_SURFACE_DW7_SCS_A__SHIFT;
195 }
196}
197
198static int img_type_to_view_type(XGL_IMAGE_VIEW_TYPE type)
199{
200 switch (type) {
201 case XGL_IMAGE_1D: return XGL_IMAGE_VIEW_1D;
202 case XGL_IMAGE_2D: return XGL_IMAGE_VIEW_2D;
203 case XGL_IMAGE_3D: return XGL_IMAGE_VIEW_3D;
204 default: assert(!"unknown img type"); return XGL_IMAGE_VIEW_1D;
205 }
206}
207
208static int view_type_to_surface_type(XGL_IMAGE_VIEW_TYPE type)
209{
210 switch (type) {
211 case XGL_IMAGE_VIEW_1D: return GEN6_SURFTYPE_1D;
212 case XGL_IMAGE_VIEW_2D: return GEN6_SURFTYPE_2D;
213 case XGL_IMAGE_VIEW_3D: return GEN6_SURFTYPE_3D;
214 case XGL_IMAGE_VIEW_CUBE: return GEN6_SURFTYPE_CUBE;
215 default: assert(!"unknown view type"); return GEN6_SURFTYPE_NULL;
216 }
217}
218
219static int winsys_tiling_to_surface_tiling(enum intel_tiling_mode tiling)
220{
221 switch (tiling) {
222 case INTEL_TILING_NONE: return GEN6_TILING_NONE;
223 case INTEL_TILING_X: return GEN6_TILING_X;
224 case INTEL_TILING_Y: return GEN6_TILING_Y;
225 default: assert(!"unknown tiling"); return GEN6_TILING_NONE;
226 }
227}
228
229static void emit_img_view_gen7(const struct intel_gpu *gpu,
230 const struct intel_img *img,
231 XGL_IMAGE_VIEW_TYPE type,
232 XGL_FORMAT format,
233 unsigned first_level,
234 unsigned num_levels,
235 unsigned first_layer,
236 unsigned num_layers,
237 bool is_rt,
238 uint32_t dw[8])
239{
240 int surface_type, surface_format;
241 int width, height, depth, pitch, lod;
242 unsigned layer_offset, x_offset, y_offset;
243
244 INTEL_GPU_ASSERT(gpu, 7, 7.5);
245
246 surface_type = view_type_to_surface_type(type);
247 assert(surface_type != GEN6_SURFTYPE_BUFFER);
248
249 surface_format = intel_format_translate_color(gpu, format);
250 assert(surface_format >= 0);
251
252 width = img->extent.width;
253 height = img->extent.height;
254 depth = (type == XGL_IMAGE_VIEW_3D) ?
255 img->extent.depth : num_layers;
256 pitch = img->layout.bo_stride;
257
258 if (surface_type == GEN6_SURFTYPE_CUBE) {
259 /*
260 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
261 *
262 * "For SURFTYPE_CUBE:For Sampling Engine Surfaces, the range of
263 * this field is [0,340], indicating the number of cube array
264 * elements (equal to the number of underlying 2D array elements
265 * divided by 6). For other surfaces, this field must be zero."
266 *
267 * When is_rt is true, we treat the texture as a 2D one to avoid the
268 * restriction.
269 */
270 if (is_rt) {
271 surface_type = GEN6_SURFTYPE_2D;
272 }
273 else {
274 assert(num_layers % 6 == 0);
275 depth = num_layers / 6;
276 }
277 }
278
279 /* sanity check the size */
280 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
281 assert(first_layer < 2048 && num_layers <= 2048);
282 switch (surface_type) {
283 case GEN6_SURFTYPE_1D:
284 assert(width <= 16384 && height == 1 && depth <= 2048);
285 break;
286 case GEN6_SURFTYPE_2D:
287 assert(width <= 16384 && height <= 16384 && depth <= 2048);
288 break;
289 case GEN6_SURFTYPE_3D:
290 assert(width <= 2048 && height <= 2048 && depth <= 2048);
291 if (!is_rt)
292 assert(first_layer == 0);
293 break;
294 case GEN6_SURFTYPE_CUBE:
295 assert(width <= 16384 && height <= 16384 && depth <= 86);
296 assert(width == height);
297 if (is_rt)
298 assert(first_layer == 0);
299 break;
300 default:
301 assert(!"unexpected surface type");
302 break;
303 }
304
305 if (is_rt) {
306 assert(num_levels == 1);
307 lod = first_level;
308 }
309 else {
310 lod = num_levels - 1;
311 }
312
313 layer_offset = 0;
314 x_offset = 0;
315 y_offset = 0;
316
317 /*
318 * From the Ivy Bridge PRM, volume 4 part 1, page 68:
319 *
320 * "The Base Address for linear render target surfaces and surfaces
321 * accessed with the typed surface read/write data port messages must
322 * be element-size aligned, for non-YUV surface formats, or a multiple
323 * of 2 element-sizes for YUV surface formats. Other linear surfaces
324 * have no alignment requirements (byte alignment is sufficient)."
325 *
326 * From the Ivy Bridge PRM, volume 4 part 1, page 70:
327 *
328 * "For linear render target surfaces and surfaces accessed with the
329 * typed data port messages, the pitch must be a multiple of the
330 * element size for non-YUV surface formats. Pitch must be a multiple
331 * of 2 * element size for YUV surface formats. For linear surfaces
332 * with Surface Type of SURFTYPE_STRBUF, the pitch must be a multiple
333 * of 4 bytes.For other linear surfaces, the pitch can be any multiple
334 * of bytes."
335 *
336 * From the Ivy Bridge PRM, volume 4 part 1, page 74:
337 *
338 * "For linear surfaces, this field (X Offset) must be zero."
339 */
340 if (img->layout.tiling == INTEL_TILING_NONE) {
341 if (is_rt) {
342 const int elem_size = icd_format_get_size(format);
343 assert(layer_offset % elem_size == 0);
344 assert(pitch % elem_size == 0);
345 }
346
347 assert(!x_offset);
348 }
349
350 dw[0] = surface_type << GEN7_SURFACE_DW0_TYPE__SHIFT |
351 surface_format << GEN7_SURFACE_DW0_FORMAT__SHIFT |
352 winsys_tiling_to_surface_tiling(img->layout.tiling) << 13;
353
354 /*
355 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
356 *
357 * "If this field (Surface Array) is enabled, the Surface Type must be
358 * SURFTYPE_1D, SURFTYPE_2D, or SURFTYPE_CUBE. If this field is
359 * disabled and Surface Type is SURFTYPE_1D, SURFTYPE_2D, or
360 * SURFTYPE_CUBE, the Depth field must be set to zero."
361 *
362 * For non-3D sampler surfaces, resinfo (the sampler message) always
363 * returns zero for the number of layers when this field is not set.
364 */
365 if (surface_type != GEN6_SURFTYPE_3D) {
366 if (num_layers > 1)
367 dw[0] |= GEN7_SURFACE_DW0_IS_ARRAY;
368 else
369 assert(depth == 1);
370 }
371
372 assert(img->layout.align_i == 4 || img->layout.align_i == 8);
373 assert(img->layout.align_j == 2 || img->layout.align_j == 4);
374
375 if (img->layout.align_j == 4)
376 dw[0] |= GEN7_SURFACE_DW0_VALIGN_4;
377
378 if (img->layout.align_i == 8)
379 dw[0] |= GEN7_SURFACE_DW0_HALIGN_8;
380
381 if (img->layout.full_layers)
382 dw[0] |= GEN7_SURFACE_DW0_ARYSPC_FULL;
383 else
384 dw[0] |= GEN7_SURFACE_DW0_ARYSPC_LOD0;
385
386 if (is_rt)
387 dw[0] |= GEN7_SURFACE_DW0_RENDER_CACHE_RW;
388
389 if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt)
390 dw[0] |= GEN7_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
391
392 dw[1] = layer_offset;
393
394 dw[2] = (height - 1) << GEN7_SURFACE_DW2_HEIGHT__SHIFT |
395 (width - 1) << GEN7_SURFACE_DW2_WIDTH__SHIFT;
396
397 dw[3] = (depth - 1) << GEN7_SURFACE_DW3_DEPTH__SHIFT |
398 (pitch - 1);
399
400 dw[4] = first_layer << 18 |
401 (num_layers - 1) << 7;
402
403 /*
404 * MSFMT_MSS means the samples are not interleaved and MSFMT_DEPTH_STENCIL
405 * means the samples are interleaved. The layouts are the same when the
406 * number of samples is 1.
407 */
408 if (img->layout.interleaved_samples && img->samples > 1) {
409 assert(!is_rt);
410 dw[4] |= GEN7_SURFACE_DW4_MSFMT_DEPTH_STENCIL;
411 }
412 else {
413 dw[4] |= GEN7_SURFACE_DW4_MSFMT_MSS;
414 }
415
416 if (img->samples > 4)
417 dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_8;
418 else if (img->samples > 2)
419 dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_4;
420 else
421 dw[4] |= GEN7_SURFACE_DW4_MULTISAMPLECOUNT_1;
422
423 dw[5] = x_offset << GEN7_SURFACE_DW5_X_OFFSET__SHIFT |
424 y_offset << GEN7_SURFACE_DW5_Y_OFFSET__SHIFT |
425 (first_level) << GEN7_SURFACE_DW5_MIN_LOD__SHIFT |
426 lod;
427
428 dw[6] = 0;
429 dw[7] = 0;
430
431 if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5)) {
432 dw[7] |= GEN75_SCS_RED << GEN75_SURFACE_DW7_SCS_R__SHIFT |
433 GEN75_SCS_GREEN << GEN75_SURFACE_DW7_SCS_G__SHIFT |
434 GEN75_SCS_BLUE << GEN75_SURFACE_DW7_SCS_B__SHIFT |
435 GEN75_SCS_ALPHA << GEN75_SURFACE_DW7_SCS_A__SHIFT;
436 }
437}
438
439static void emit_null_view_gen6(const struct intel_gpu *gpu, uint32_t dw[6])
440{
441 INTEL_GPU_ASSERT(gpu, 6, 6);
442
443 /*
444 * From the Sandy Bridge PRM, volume 4 part 1, page 71:
445 *
446 * "A null surface will be used in instances where an actual surface is
447 * not bound. When a write message is generated to a null surface, no
448 * actual surface is written to. When a read message (including any
449 * sampling engine message) is generated to a null surface, the result
450 * is all zeros. Note that a null surface type is allowed to be used
451 * with all messages, even if it is not specificially indicated as
452 * supported. All of the remaining fields in surface state are ignored
453 * for null surfaces, with the following exceptions:
454 *
455 * * [DevSNB+]: Width, Height, Depth, and LOD fields must match the
456 * depth buffer's corresponding state for all render target
457 * surfaces, including null.
458 * * Surface Format must be R8G8B8A8_UNORM."
459 *
460 * From the Sandy Bridge PRM, volume 4 part 1, page 82:
461 *
462 * "If Surface Type is SURFTYPE_NULL, this field (Tiled Surface) must be
463 * true"
464 */
465
466 dw[0] = GEN6_SURFTYPE_NULL << GEN6_SURFACE_DW0_TYPE__SHIFT |
467 GEN6_FORMAT_B8G8R8A8_UNORM << GEN6_SURFACE_DW0_FORMAT__SHIFT;
468
469 dw[1] = 0;
470 dw[2] = 0;
471 dw[3] = GEN6_TILING_X;
472 dw[4] = 0;
473 dw[5] = 0;
474}
475
476static void emit_mem_view_gen6(const struct intel_gpu *gpu,
477 unsigned offset, unsigned size,
478 unsigned struct_size,
479 XGL_FORMAT elem_format,
480 bool is_rt, bool render_cache_rw,
481 uint32_t dw[6])
482{
483 const int elem_size = icd_format_get_size(elem_format);
484 int width, height, depth, pitch;
485 int surface_format, num_entries;
486
487 INTEL_GPU_ASSERT(gpu, 6, 6);
488
489 /*
490 * For SURFTYPE_BUFFER, a SURFACE_STATE specifies an element of a
491 * structure in a buffer.
492 */
493
494 surface_format = intel_format_translate_color(gpu, elem_format);
495
496 num_entries = size / struct_size;
497 /* see if there is enough space to fit another element */
498 if (size % struct_size >= elem_size)
499 num_entries++;
500
501 /*
502 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
503 *
504 * "For SURFTYPE_BUFFER render targets, this field (Surface Base
505 * Address) specifies the base address of first element of the
506 * surface. The surface is interpreted as a simple array of that
507 * single element type. The address must be naturally-aligned to the
508 * element size (e.g., a buffer containing R32G32B32A32_FLOAT elements
509 * must be 16-byte aligned).
510 *
511 * For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
512 * the base address of the first element of the surface, computed in
513 * software by adding the surface base address to the byte offset of
514 * the element in the buffer."
515 */
516 if (is_rt)
517 assert(offset % elem_size == 0);
518
519 /*
520 * From the Sandy Bridge PRM, volume 4 part 1, page 77:
521 *
522 * "For buffer surfaces, the number of entries in the buffer ranges
523 * from 1 to 2^27."
524 */
525 assert(num_entries >= 1 && num_entries <= 1 << 27);
526
527 /*
528 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
529 *
530 * "For surfaces of type SURFTYPE_BUFFER, this field (Surface Pitch)
531 * indicates the size of the structure."
532 */
533 pitch = struct_size;
534
535 pitch--;
536 num_entries--;
537 /* bits [6:0] */
538 width = (num_entries & 0x0000007f);
539 /* bits [19:7] */
540 height = (num_entries & 0x000fff80) >> 7;
541 /* bits [26:20] */
542 depth = (num_entries & 0x07f00000) >> 20;
543
544 dw[0] = GEN6_SURFTYPE_BUFFER << GEN6_SURFACE_DW0_TYPE__SHIFT |
545 surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT;
546 if (render_cache_rw)
547 dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
548
549 dw[1] = offset;
550
551 dw[2] = height << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
552 width << GEN6_SURFACE_DW2_WIDTH__SHIFT;
553
554 dw[3] = depth << GEN6_SURFACE_DW3_DEPTH__SHIFT |
555 pitch << GEN6_SURFACE_DW3_PITCH__SHIFT;
556
557 dw[4] = 0;
558 dw[5] = 0;
559}
560
561static void emit_img_view_gen6(const struct intel_gpu *gpu,
562 const struct intel_img *img,
563 XGL_IMAGE_VIEW_TYPE type,
564 XGL_FORMAT format,
565 unsigned first_level,
566 unsigned num_levels,
567 unsigned first_layer,
568 unsigned num_layers,
569 bool is_rt,
570 uint32_t dw[6])
571{
572 int surface_type, surface_format;
573 int width, height, depth, pitch, lod;
574 unsigned layer_offset, x_offset, y_offset;
575
576 INTEL_GPU_ASSERT(gpu, 6, 6);
577
578 surface_type = view_type_to_surface_type(type);
579 assert(surface_type != GEN6_SURFTYPE_BUFFER);
580
581 surface_format = intel_format_translate_color(gpu, format);
582 assert(surface_format >= 0);
583
584 width = img->extent.width;
585 height = img->extent.height;
586 depth = (type == XGL_IMAGE_VIEW_3D) ?
587 img->extent.depth : num_layers;
588 pitch = img->layout.bo_stride;
589
590 if (surface_type == GEN6_SURFTYPE_CUBE) {
591 /*
592 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
593 *
594 * "For SURFTYPE_CUBE: [DevSNB+]: for Sampling Engine Surfaces, the
595 * range of this field (Depth) is [0,84], indicating the number of
596 * cube array elements (equal to the number of underlying 2D array
597 * elements divided by 6). For other surfaces, this field must be
598 * zero."
599 *
600 * When is_rt is true, we treat the texture as a 2D one to avoid the
601 * restriction.
602 */
603 if (is_rt) {
604 surface_type = GEN6_SURFTYPE_2D;
605 }
606 else {
607 assert(num_layers % 6 == 0);
608 depth = num_layers / 6;
609 }
610 }
611
612 /* sanity check the size */
613 assert(width >= 1 && height >= 1 && depth >= 1 && pitch >= 1);
614 switch (surface_type) {
615 case GEN6_SURFTYPE_1D:
616 assert(width <= 8192 && height == 1 && depth <= 512);
617 assert(first_layer < 512 && num_layers <= 512);
618 break;
619 case GEN6_SURFTYPE_2D:
620 assert(width <= 8192 && height <= 8192 && depth <= 512);
621 assert(first_layer < 512 && num_layers <= 512);
622 break;
623 case GEN6_SURFTYPE_3D:
624 assert(width <= 2048 && height <= 2048 && depth <= 2048);
625 assert(first_layer < 2048 && num_layers <= 512);
626 if (!is_rt)
627 assert(first_layer == 0);
628 break;
629 case GEN6_SURFTYPE_CUBE:
630 assert(width <= 8192 && height <= 8192 && depth <= 85);
631 assert(width == height);
632 assert(first_layer < 512 && num_layers <= 512);
633 if (is_rt)
634 assert(first_layer == 0);
635 break;
636 default:
637 assert(!"unexpected surface type");
638 break;
639 }
640
641 /* non-full array spacing is supported only on GEN7+ */
642 assert(img->layout.full_layers);
643 /* non-interleaved samples are supported only on GEN7+ */
644 if (img->samples > 1)
645 assert(img->layout.interleaved_samples);
646
647 if (is_rt) {
648 assert(num_levels == 1);
649 lod = first_level;
650 }
651 else {
652 lod = num_levels - 1;
653 }
654
655 layer_offset = 0;
656 x_offset = 0;
657 y_offset = 0;
658
659 /*
660 * From the Sandy Bridge PRM, volume 4 part 1, page 76:
661 *
662 * "Linear render target surface base addresses must be element-size
663 * aligned, for non-YUV surface formats, or a multiple of 2
664 * element-sizes for YUV surface formats. Other linear surfaces have
665 * no alignment requirements (byte alignment is sufficient.)"
666 *
667 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
668 *
669 * "For linear render target surfaces, the pitch must be a multiple
670 * of the element size for non-YUV surface formats. Pitch must be a
671 * multiple of 2 * element size for YUV surface formats."
672 *
673 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
674 *
675 * "For linear surfaces, this field (X Offset) must be zero"
676 */
677 if (img->layout.tiling == INTEL_TILING_NONE) {
678 if (is_rt) {
679 const int elem_size = icd_format_get_size(format);
680 assert(layer_offset % elem_size == 0);
681 assert(pitch % elem_size == 0);
682 }
683
684 assert(!x_offset);
685 }
686
687 dw[0] = surface_type << GEN6_SURFACE_DW0_TYPE__SHIFT |
688 surface_format << GEN6_SURFACE_DW0_FORMAT__SHIFT |
689 GEN6_SURFACE_DW0_MIPLAYOUT_BELOW;
690
691 if (surface_type == GEN6_SURFTYPE_CUBE && !is_rt) {
692 dw[0] |= 1 << 9 |
693 GEN6_SURFACE_DW0_CUBE_FACE_ENABLES__MASK;
694 }
695
696 if (is_rt)
697 dw[0] |= GEN6_SURFACE_DW0_RENDER_CACHE_RW;
698
699 dw[1] = layer_offset;
700
701 dw[2] = (height - 1) << GEN6_SURFACE_DW2_HEIGHT__SHIFT |
702 (width - 1) << GEN6_SURFACE_DW2_WIDTH__SHIFT |
703 lod << GEN6_SURFACE_DW2_MIP_COUNT_LOD__SHIFT;
704
705 dw[3] = (depth - 1) << GEN6_SURFACE_DW3_DEPTH__SHIFT |
706 (pitch - 1) << GEN6_SURFACE_DW3_PITCH__SHIFT |
707 winsys_tiling_to_surface_tiling(img->layout.tiling);
708
709 dw[4] = first_level << GEN6_SURFACE_DW4_MIN_LOD__SHIFT |
710 first_layer << 17 |
711 (num_layers - 1) << 8 |
712 ((img->samples > 1) ? GEN6_SURFACE_DW4_MULTISAMPLECOUNT_4 :
713 GEN6_SURFACE_DW4_MULTISAMPLECOUNT_1);
714
715 dw[5] = x_offset << GEN6_SURFACE_DW5_X_OFFSET__SHIFT |
716 y_offset << GEN6_SURFACE_DW5_Y_OFFSET__SHIFT;
717
718 assert(img->layout.align_j == 2 || img->layout.align_j == 4);
719 if (img->layout.align_j == 4)
720 dw[5] |= GEN6_SURFACE_DW5_VALIGN_4;
721}
722
723struct ds_surface_info {
724 int surface_type;
725 int format;
726
727 struct {
728 unsigned stride;
729 } zs, stencil, hiz;
730
731 unsigned width, height, depth;
732 unsigned lod, first_layer, num_layers;
733};
734
735static void
736ds_init_info_null(const struct intel_gpu *gpu,
737 struct ds_surface_info *info)
738{
739 INTEL_GPU_ASSERT(gpu, 6, 7.5);
740
741 memset(info, 0, sizeof(*info));
742
743 info->surface_type = GEN6_SURFTYPE_NULL;
744 info->format = GEN6_ZFORMAT_D32_FLOAT;
745 info->width = 1;
746 info->height = 1;
747 info->depth = 1;
748 info->num_layers = 1;
749}
750
751static void
752ds_init_info(const struct intel_gpu *gpu,
753 const struct intel_img *img,
754 XGL_FORMAT format, unsigned level,
755 unsigned first_layer, unsigned num_layers,
756 struct ds_surface_info *info)
757{
758 bool separate_stencil;
759
760 INTEL_GPU_ASSERT(gpu, 6, 7.5);
761
762 memset(info, 0, sizeof(*info));
763
764 info->surface_type =
765 view_type_to_surface_type(img_type_to_view_type(img->type));
766
767 if (info->surface_type == GEN6_SURFTYPE_CUBE) {
768 /*
769 * From the Sandy Bridge PRM, volume 2 part 1, page 325-326:
770 *
771 * "For Other Surfaces (Cube Surfaces):
772 * This field (Minimum Array Element) is ignored."
773 *
774 * "For Other Surfaces (Cube Surfaces):
775 * This field (Render Target View Extent) is ignored."
776 *
777 * As such, we cannot set first_layer and num_layers on cube surfaces.
778 * To work around that, treat it as a 2D surface.
779 */
780 info->surface_type = GEN6_SURFTYPE_2D;
781 }
782
783 if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) {
784 separate_stencil = true;
785 }
786 else {
787 /*
788 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
789 *
790 * "This field (Separate Stencil Buffer Enable) must be set to the
791 * same value (enabled or disabled) as Hierarchical Depth Buffer
792 * Enable."
793 */
794 separate_stencil = img->aux_offset;
795 }
796
797 /*
798 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
799 *
800 * "If this field (Hierarchical Depth Buffer Enable) is enabled, the
801 * Surface Format of the depth buffer cannot be
802 * D32_FLOAT_S8X24_UINT or D24_UNORM_S8_UINT. Use of stencil
803 * requires the separate stencil buffer."
804 *
805 * From the Ironlake PRM, volume 2 part 1, page 330:
806 *
807 * "If this field (Separate Stencil Buffer Enable) is disabled, the
808 * Surface Format of the depth buffer cannot be D24_UNORM_X8_UINT."
809 *
810 * There is no similar restriction for GEN6. But when D24_UNORM_X8_UINT
811 * is indeed used, the depth values output by the fragment shaders will
812 * be different when read back.
813 *
814 * As for GEN7+, separate_stencil is always true.
815 */
816 switch (format.channelFormat) {
817 case XGL_CH_FMT_R16:
818 info->format = GEN6_ZFORMAT_D16_UNORM;
819 break;
820 case XGL_CH_FMT_R32:
821 info->format = GEN6_ZFORMAT_D32_FLOAT;
822 break;
823 case XGL_CH_FMT_R32G8:
824 info->format = (separate_stencil) ?
825 GEN6_ZFORMAT_D32_FLOAT :
826 GEN6_ZFORMAT_D32_FLOAT_S8X24_UINT;
827 break;
828 case XGL_CH_FMT_R8:
829 if (separate_stencil) {
830 info->format = GEN6_ZFORMAT_D32_FLOAT;
831 break;
832 }
833 /* fall through */
834 default:
835 assert(!"unsupported depth/stencil format");
836 ds_init_info_null(gpu, info);
837 return;
838 break;
839 }
840
841 if (format.channelFormat != XGL_CH_FMT_R8)
842 info->zs.stride = img->layout.bo_stride;
843
844 if (img->s8_layout) {
845 /*
846 * From the Sandy Bridge PRM, volume 2 part 1, page 329:
847 *
848 * "The pitch must be set to 2x the value computed based on width,
849 * as the stencil buffer is stored with two rows interleaved."
850 *
851 * According to the classic driver, we need to do the same for GEN7+
852 * even though the Ivy Bridge PRM does not say anything about it.
853 */
854 info->stencil.stride = img->s8_layout->bo_stride * 2;
855 } else if (format.channelFormat == XGL_CH_FMT_R8) {
856 info->stencil.stride = img->layout.bo_stride * 2;
857 }
858
859 if (img->aux_offset)
860 info->hiz.stride = img->layout.aux_stride;
861
862 info->width = img->extent.width;
863 info->height = img->extent.height;
864 info->depth = (img->type == XGL_IMAGE_3D) ?
865 img->extent.depth : num_layers;
866
867 info->lod = level;
868 info->first_layer = first_layer;
869 info->num_layers = num_layers;
870}
871
872static void emit_ds_view(const struct intel_gpu *gpu,
873 const struct intel_img *img,
874 XGL_FORMAT format, unsigned level,
875 unsigned first_layer, unsigned num_layers,
876 uint32_t dw[10])
877{
878 const int max_2d_size = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 16384 : 8192;
879 const int max_array_size = (intel_gpu_gen(gpu) >= INTEL_GEN(7)) ? 2048 : 512;
880 struct ds_surface_info info;
881 uint32_t dw1, dw2, dw3, dw4, dw5, dw6;
882
883 INTEL_GPU_ASSERT(gpu, 6, 7.5);
884
885 if (img) {
886 ds_init_info(gpu, img, format, level, first_layer, num_layers, &info);
887 }
888 else {
889 ds_init_info_null(gpu, &info);
890 }
891
892 switch (info.surface_type) {
893 case GEN6_SURFTYPE_NULL:
894 break;
895 case GEN6_SURFTYPE_1D:
896 assert(info.width <= max_2d_size && info.height == 1 &&
897 info.depth <= max_array_size);
898 assert(info.first_layer < max_array_size - 1 &&
899 info.num_layers <= max_array_size);
900 break;
901 case GEN6_SURFTYPE_2D:
902 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
903 info.depth <= max_array_size);
904 assert(info.first_layer < max_array_size - 1 &&
905 info.num_layers <= max_array_size);
906 break;
907 case GEN6_SURFTYPE_3D:
908 assert(info.width <= 2048 && info.height <= 2048 && info.depth <= 2048);
909 assert(info.first_layer < 2048 && info.num_layers <= max_array_size);
910 break;
911 case GEN6_SURFTYPE_CUBE:
912 assert(info.width <= max_2d_size && info.height <= max_2d_size &&
913 info.depth == 1);
914 assert(info.first_layer == 0 && info.num_layers == 1);
915 assert(info.width == info.height);
916 break;
917 default:
918 assert(!"unexpected depth surface type");
919 break;
920 }
921
922 dw1 = info.surface_type << 29 |
923 info.format << 18;
924
925 if (info.zs.stride) {
926 /* required for GEN6+ */
927 assert(info.zs.stride > 0 && info.zs.stride < 128 * 1024 &&
928 info.zs.stride % 128 == 0);
929 assert(info.width <= info.zs.stride);
930
931 dw1 |= (info.zs.stride - 1);
932 }
933
934 dw2 = 0;
935
936 if (intel_gpu_gen(gpu) >= INTEL_GEN(7)) {
937 if (info.zs.stride)
938 dw1 |= 1 << 28;
939
940 if (info.stencil.stride)
941 dw1 |= 1 << 27;
942
943 if (info.hiz.stride)
944 dw1 |= 1 << 22;
945
946 dw3 = (info.height - 1) << 18 |
947 (info.width - 1) << 4 |
948 info.lod;
949
950 dw4 = (info.depth - 1) << 21 |
951 info.first_layer << 10;
952
953 dw5 = 0;
954
955 dw6 = (info.num_layers - 1) << 21;
956 }
957 else {
958 /* always Y-tiled */
959 dw1 |= 1 << 27 |
960 1 << 26;
961
962 if (info.hiz.stride) {
963 dw1 |= 1 << 22 |
964 1 << 21;
965 }
966
967 dw3 = (info.height - 1) << 19 |
968 (info.width - 1) << 6 |
969 info.lod << 2 |
970 GEN6_DEPTH_DW3_MIPLAYOUT_BELOW;
971
972 dw4 = (info.depth - 1) << 21 |
973 info.first_layer << 10 |
974 (info.num_layers - 1) << 1;
975
976 dw5 = 0;
977
978 dw6 = 0;
979 }
980
981 dw[0] = dw1;
982 dw[1] = dw2;
983 dw[2] = dw3;
984 dw[3] = dw4;
985 dw[4] = dw5;
986 dw[5] = dw6;
987
988 /* separate stencil */
989 if (info.stencil.stride) {
990 assert(info.stencil.stride > 0 && info.stencil.stride < 128 * 1024 &&
991 info.stencil.stride % 128 == 0);
992
993 dw[6] = info.stencil.stride - 1;
994 dw[7] = img->s8_offset;
995
996 if (intel_gpu_gen(gpu) >= INTEL_GEN(7.5))
997 dw[6] |= GEN75_STENCIL_DW1_STENCIL_BUFFER_ENABLE;
998 }
999 else {
1000 dw[6] = 0;
1001 dw[7] = 0;
1002 }
1003
1004 /* hiz */
1005 if (info.hiz.stride) {
1006 dw[8] = info.hiz.stride - 1;
1007 dw[9] = img->aux_offset;
1008 }
1009 else {
1010 dw[8] = 0;
1011 dw[9] = 0;
1012 }
1013}
1014
Chia-I Wu5a323262014-08-11 10:31:53 +08001015void intel_null_view_init(struct intel_null_view *view,
1016 struct intel_dev *dev)
1017{
Chia-I Wu9269d1c2014-08-16 12:47:47 +08001018 if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7))
1019 emit_null_view_gen7(dev->gpu, view->cmd);
1020 else
1021 emit_null_view_gen6(dev->gpu, view->cmd);
Chia-I Wu5a323262014-08-11 10:31:53 +08001022}
1023
1024void intel_mem_view_init(struct intel_mem_view *view,
1025 struct intel_dev *dev,
1026 const XGL_MEMORY_VIEW_ATTACH_INFO *info)
1027{
Chia-I Wu9269d1c2014-08-16 12:47:47 +08001028 bool will_write;
1029
1030 switch (info->state) {
1031 case XGL_MEMORY_STATE_GRAPHICS_SHADER_WRITE_ONLY:
1032 case XGL_MEMORY_STATE_GRAPHICS_SHADER_READ_WRITE:
1033 case XGL_MEMORY_STATE_COMPUTE_SHADER_WRITE_ONLY:
1034 case XGL_MEMORY_STATE_COMPUTE_SHADER_READ_WRITE:
1035 will_write = true;
1036 break;
1037 default:
1038 will_write = false;
1039 break;
1040 }
Chia-I Wu5a323262014-08-11 10:31:53 +08001041
1042 view->mem = intel_mem(info->mem);
Chia-I Wu9269d1c2014-08-16 12:47:47 +08001043
1044 if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) {
1045 emit_mem_view_gen7(dev->gpu, info->offset, info->range, info->stride,
1046 info->format, will_write, will_write, view->cmd);
1047 } else {
1048 emit_mem_view_gen6(dev->gpu, info->offset, info->range, info->stride,
1049 info->format, will_write, will_write, view->cmd);
1050 }
Chia-I Wu5a323262014-08-11 10:31:53 +08001051}
1052
1053static void img_view_destroy(struct intel_obj *obj)
1054{
1055 struct intel_img_view *view = intel_img_view_from_obj(obj);
1056
1057 intel_img_view_destroy(view);
1058}
1059
1060XGL_RESULT intel_img_view_create(struct intel_dev *dev,
1061 const XGL_IMAGE_VIEW_CREATE_INFO *info,
1062 struct intel_img_view **view_ret)
1063{
1064 struct intel_img *img = intel_img(info->image);
1065 struct intel_img_view *view;
1066
1067 view = (struct intel_img_view *) intel_base_create(dev, sizeof(*view),
1068 dev->base.dbg, XGL_DBG_OBJECT_IMAGE_VIEW, info, 0);
1069 if (!view)
1070 return XGL_ERROR_OUT_OF_MEMORY;
1071
1072 view->obj.destroy = img_view_destroy;
1073
1074 view->img = img;
1075 view->swizzles = info->channels;
1076 view->min_lod = info->minLod;
1077
Chia-I Wu9269d1c2014-08-16 12:47:47 +08001078 if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) {
1079 emit_img_view_gen7(dev->gpu, img, info->viewType, info->format,
1080 info->subresourceRange.baseMipLevel,
1081 info->subresourceRange.mipLevels,
1082 info->subresourceRange.baseArraySlice,
1083 info->subresourceRange.arraySize, false, view->cmd);
1084 } else {
1085 emit_img_view_gen6(dev->gpu, info->image, info->viewType, info->format,
1086 info->subresourceRange.baseMipLevel,
1087 info->subresourceRange.mipLevels,
1088 info->subresourceRange.baseArraySlice,
1089 info->subresourceRange.arraySize, false, view->cmd);
1090 }
1091
Chia-I Wu5a323262014-08-11 10:31:53 +08001092 *view_ret = view;
1093
1094 return XGL_SUCCESS;
1095}
1096
1097void intel_img_view_destroy(struct intel_img_view *view)
1098{
1099 intel_base_destroy(&view->obj.base);
1100}
1101
1102static void rt_view_destroy(struct intel_obj *obj)
1103{
1104 struct intel_rt_view *view = intel_rt_view_from_obj(obj);
1105
1106 intel_rt_view_destroy(view);
1107}
1108
1109XGL_RESULT intel_rt_view_create(struct intel_dev *dev,
1110 const XGL_COLOR_ATTACHMENT_VIEW_CREATE_INFO *info,
1111 struct intel_rt_view **view_ret)
1112{
1113 struct intel_img *img = intel_img(info->image);
1114 struct intel_rt_view *view;
1115
1116 view = (struct intel_rt_view *) intel_base_create(dev, sizeof(*view),
1117 dev->base.dbg, XGL_DBG_OBJECT_COLOR_TARGET_VIEW, info, 0);
1118 if (!view)
1119 return XGL_ERROR_OUT_OF_MEMORY;
1120
1121 view->obj.destroy = rt_view_destroy;
1122
1123 view->img = img;
1124
Chia-I Wu9269d1c2014-08-16 12:47:47 +08001125 if (intel_gpu_gen(dev->gpu) >= INTEL_GEN(7)) {
1126 emit_img_view_gen7(dev->gpu, img, img_type_to_view_type(img->type),
1127 info->format, info->mipLevel, 1,
1128 info->baseArraySlice, info->arraySize,
1129 true, view->cmd);
1130 } else {
1131 emit_img_view_gen6(dev->gpu, img, img_type_to_view_type(img->type),
1132 info->format, info->mipLevel, 1,
1133 info->baseArraySlice, info->arraySize,
1134 true, view->cmd);
1135 }
1136
Chia-I Wu5a323262014-08-11 10:31:53 +08001137 *view_ret = view;
1138
1139 return XGL_SUCCESS;
1140}
1141
1142void intel_rt_view_destroy(struct intel_rt_view *view)
1143{
1144 intel_base_destroy(&view->obj.base);
1145}
1146
1147static void ds_view_destroy(struct intel_obj *obj)
1148{
1149 struct intel_ds_view *view = intel_ds_view_from_obj(obj);
1150
1151 intel_ds_view_destroy(view);
1152}
1153
1154XGL_RESULT intel_ds_view_create(struct intel_dev *dev,
1155 const XGL_DEPTH_STENCIL_VIEW_CREATE_INFO *info,
1156 struct intel_ds_view **view_ret)
1157{
1158 struct intel_img *img = intel_img(info->image);
1159 struct intel_ds_view *view;
1160
1161 view = (struct intel_ds_view *) intel_base_create(dev, sizeof(*view),
1162 dev->base.dbg, XGL_DBG_OBJECT_DEPTH_STENCIL_VIEW, info, 0);
1163 if (!view)
1164 return XGL_ERROR_OUT_OF_MEMORY;
1165
1166 view->obj.destroy = ds_view_destroy;
1167
1168 view->img = img;
1169
Chia-I Wu9269d1c2014-08-16 12:47:47 +08001170 emit_ds_view(dev->gpu, img, img->layout.format, info->mipLevel,
1171 info->baseArraySlice, info->arraySize, view->cmd);
1172
Chia-I Wu5a323262014-08-11 10:31:53 +08001173 *view_ret = view;
1174
1175 return XGL_SUCCESS;
1176}
1177
1178void intel_ds_view_destroy(struct intel_ds_view *view)
1179{
1180 intel_base_destroy(&view->obj.base);
1181}
1182
1183XGL_RESULT XGLAPI intelCreateImageView(
1184 XGL_DEVICE device,
1185 const XGL_IMAGE_VIEW_CREATE_INFO* pCreateInfo,
1186 XGL_IMAGE_VIEW* pView)
1187{
1188 struct intel_dev *dev = intel_dev(device);
1189
1190 return intel_img_view_create(dev, pCreateInfo,
1191 (struct intel_img_view **) pView);
1192}
1193
1194XGL_RESULT XGLAPI intelCreateColorAttachmentView(
1195 XGL_DEVICE device,
1196 const XGL_COLOR_ATTACHMENT_VIEW_CREATE_INFO* pCreateInfo,
1197 XGL_COLOR_ATTACHMENT_VIEW* pView)
1198{
1199 struct intel_dev *dev = intel_dev(device);
1200
1201 return intel_rt_view_create(dev, pCreateInfo,
1202 (struct intel_rt_view **) pView);
1203}
1204
1205XGL_RESULT XGLAPI intelCreateDepthStencilView(
1206 XGL_DEVICE device,
1207 const XGL_DEPTH_STENCIL_VIEW_CREATE_INFO* pCreateInfo,
1208 XGL_DEPTH_STENCIL_VIEW* pView)
1209{
1210 struct intel_dev *dev = intel_dev(device);
1211
1212 return intel_ds_view_create(dev, pCreateInfo,
1213 (struct intel_ds_view **) pView);
1214}