intel: use VS meta shaders for memory filling/copying
Memory filling/copying via rendering has a severe limit: the fill or copy size
is limited to 64K to 256K per SURFACE_STATE on Gen7+. With large size, we may
need hundreds of SURFACE_STATEs. Instead of pursuing that, switch to
VS meta shaders.
diff --git a/icd/intel/cmd_meta.c b/icd/intel/cmd_meta.c
index b4ca65d..6627c84 100644
--- a/icd/intel/cmd_meta.c
+++ b/icd/intel/cmd_meta.c
@@ -291,35 +291,12 @@
return shader_id;
}
-/**
- * Return the suitable format for copying between memories. The
- * format is sampleable, renderable, and the offsets and copy size are
- * multiples of the format size.
- */
-static XGL_CHANNEL_FORMAT cmd_meta_mem_channel_format(const struct intel_cmd *cmd,
- XGL_GPU_SIZE src_offset,
- XGL_GPU_SIZE dst_offset,
- XGL_GPU_SIZE size,
- XGL_SIZE *format_size)
+static bool cmd_meta_mem_dword_aligned(const struct intel_cmd *cmd,
+ XGL_GPU_SIZE src_offset,
+ XGL_GPU_SIZE dst_offset,
+ XGL_GPU_SIZE size)
{
- const XGL_GPU_SIZE align = (src_offset | dst_offset | size) & 0xf;
-
- if (align & 0x1) {
- *format_size = 1;
- return XGL_CH_FMT_R8;
- } else if (align & 0x2) {
- *format_size = 2;
- return XGL_CH_FMT_R16;
- } else if (align & 0x4) {
- *format_size = 4;
- return XGL_CH_FMT_R32;
- } else if (align & 0x8) {
- *format_size = 8;
- return XGL_CH_FMT_R32G32;
- } else {
- *format_size = 16;
- return XGL_CH_FMT_R32G32B32A32;
- }
+ return !((src_offset | dst_offset | size) & 0x3);
}
static XGL_FORMAT cmd_meta_img_raw_format(const struct intel_cmd *cmd,
@@ -369,9 +346,8 @@
XGL_UINT i;
memset(&meta, 0, sizeof(meta));
- meta.mode = INTEL_CMD_META_FS_RECT;
+ meta.mode = INTEL_CMD_META_VS_POINTS;
- meta.shader_id = INTEL_DEV_META_FS_COPY_MEM;
meta.height = 1;
meta.samples = 1;
@@ -381,10 +357,40 @@
for (i = 0; i < regionCount; i++) {
const XGL_MEMORY_COPY *region = &pRegions[i];
XGL_CHANNEL_FORMAT ch;
- XGL_SIZE format_size;
- ch = cmd_meta_mem_channel_format(cmd, region->srcOffset,
- region->destOffset, region->copySize, &format_size);
+ meta.src.x = region->srcOffset;
+ meta.dst.x = region->destOffset;
+ meta.width = region->copySize;
+
+ if (cmd_meta_mem_dword_aligned(cmd, region->srcOffset,
+ region->destOffset, region->copySize)) {
+ meta.shader_id = INTEL_DEV_META_VS_COPY_MEM;
+ meta.src.x /= 4;
+ meta.dst.x /= 4;
+ meta.width /= 4;
+
+ /*
+ * INTEL_DEV_META_VS_COPY_MEM is untyped but expects the stride to
+ * be 16
+ */
+ ch = XGL_CH_FMT_R32G32B32A32;
+ } else {
+ if (cmd_gen(cmd) == INTEL_GEN(6)) {
+ intel_dev_log(cmd->dev, XGL_DBG_MSG_ERROR,
+ XGL_VALIDATION_LEVEL_0, XGL_NULL_HANDLE, 0, 0,
+ "unaligned xglCmdCopyMemory unsupported");
+ cmd->result = XGL_ERROR_UNKNOWN;
+ continue;
+ }
+
+ meta.shader_id = INTEL_DEV_META_VS_COPY_MEM_UNALIGNED;
+
+ /*
+ * INTEL_DEV_META_VS_COPY_MEM_UNALIGNED is untyped but expects the
+ * stride to be 4
+ */
+ ch = XGL_CH_FMT_R8G8B8A8;
+ }
if (format.channelFormat != ch) {
format.channelFormat = ch;
@@ -393,10 +399,6 @@
cmd_meta_set_dst_for_mem(cmd, dst, format, &meta);
}
- meta.src.x = region->srcOffset / format_size;
- meta.dst.x = region->destOffset / format_size;
- meta.width = region->copySize / format_size;
-
cmd_draw_meta(cmd, &meta);
}
}
@@ -612,34 +614,41 @@
struct intel_mem *dst = intel_mem(destMem);
struct intel_cmd_meta meta;
XGL_FORMAT format;
- XGL_SIZE format_size;
uint32_t *ptr;
uint32_t offset;
+ /* must be 4-byte aligned */
+ if ((destOffset | dataSize) & 3) {
+ cmd->result = XGL_ERROR_UNKNOWN;
+ return;
+ }
+
/* write to dynamic state writer first */
offset = cmd_state_pointer(cmd, INTEL_CMD_ITEM_BLOB, 32,
(dataSize + 3) / 4, &ptr);
memcpy(ptr, pData, dataSize);
- format.channelFormat = cmd_meta_mem_channel_format(cmd,
- offset, destOffset, dataSize, &format_size);
- format.numericFormat = XGL_NUM_FMT_UINT;
-
memset(&meta, 0, sizeof(meta));
- meta.mode = INTEL_CMD_META_FS_RECT;
+ meta.mode = INTEL_CMD_META_VS_POINTS;
- meta.shader_id = INTEL_DEV_META_FS_COPY_MEM;
+ meta.shader_id = INTEL_DEV_META_VS_COPY_MEM;
+
+ meta.src.x = offset / 4;
+ meta.dst.x = destOffset / 4;
+ meta.width = dataSize / 4;
+ meta.height = 1;
+ meta.samples = 1;
+
+ /*
+ * INTEL_DEV_META_VS_COPY_MEM is untyped but expects the stride to be 16
+ */
+ format.channelFormat = XGL_CH_FMT_R32G32B32A32;
+ format.numericFormat = XGL_NUM_FMT_UINT;
cmd_meta_set_src_for_writer(cmd, INTEL_CMD_WRITER_STATE,
offset + dataSize, format, &meta);
cmd_meta_set_dst_for_mem(cmd, dst, format, &meta);
- meta.src.x = offset / format_size;
- meta.dst.x = destOffset / format_size;
- meta.width = dataSize / format_size;
- meta.height = 1;
- meta.samples = 1;
-
cmd_draw_meta(cmd, &meta);
}
@@ -654,7 +663,6 @@
struct intel_mem *dst = intel_mem(destMem);
struct intel_cmd_meta meta;
XGL_FORMAT format;
- XGL_SIZE format_size;
/* must be 4-byte aligned */
if ((destOffset | fillSize) & 3) {
@@ -663,25 +671,25 @@
}
memset(&meta, 0, sizeof(meta));
- meta.mode = INTEL_CMD_META_FS_RECT;
+ meta.mode = INTEL_CMD_META_VS_POINTS;
- meta.shader_id = INTEL_DEV_META_FS_CLEAR_COLOR;
+ meta.shader_id = INTEL_DEV_META_VS_FILL_MEM;
meta.clear_val[0] = data;
- meta.clear_val[1] = data;
- meta.clear_val[2] = data;
- meta.clear_val[3] = data;
- format.channelFormat = cmd_meta_mem_channel_format(cmd,
- 0, destOffset, fillSize, &format_size);;
- format.numericFormat = XGL_NUM_FMT_UINT;
- cmd_meta_set_dst_for_mem(cmd, dst, format, &meta);
-
- meta.dst.x = destOffset / format_size;
- meta.width = fillSize / format_size;
+ meta.dst.x = destOffset / 4;
+ meta.width = fillSize / 4;
meta.height = 1;
meta.samples = 1;
+ /*
+ * INTEL_DEV_META_VS_FILL_MEM is untyped but expects the stride to be 16
+ */
+ format.channelFormat = XGL_CH_FMT_R32G32B32A32;
+ format.numericFormat = XGL_NUM_FMT_UINT;
+
+ cmd_meta_set_dst_for_mem(cmd, dst, format, &meta);
+
cmd_draw_meta(cmd, &meta);
}