intel: make PIPELINE_SELECT a queue state
Mantle has the concept of a universal queue, and it can switch between
GRAPHICS and COMPUTE within a command buffer. On the other hand, an XGL
command buffer is created with either XGL_QUEUE_TYPE_GRAPHICS or
XGL_QUEUE_TYPE_COMPUTE. Swtiching to another queue type in the middle of a
command buffer is not allowed. Letting the queue emit PIPELINE_SELECT allows
us to avoid unnecessary PIPELINE_SELECTs in the command buffer.
diff --git a/icd/intel/queue.c b/icd/intel/queue.c
index 06133b6..dff89c8 100644
--- a/icd/intel/queue.c
+++ b/icd/intel/queue.c
@@ -29,6 +29,13 @@
#include "fence.h"
#include "queue.h"
+/* must match intel_cmd::pipeline_select */
+enum queue_state {
+ QUEUE_STATE_GRAPHICS_SELECTED = GEN6_PIPELINE_SELECT_DW0_SELECT_3D,
+ QUEUE_STATE_COMPUTE_SELECTED = GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA,
+ QUEUE_STATE_INITIALIZED = -1,
+};
+
static XGL_RESULT queue_submit_bo(struct intel_queue *queue,
struct intel_bo *bo,
XGL_GPU_SIZE used)
@@ -47,48 +54,118 @@
return (err) ? XGL_ERROR_UNKNOWN : XGL_SUCCESS;
}
-static XGL_RESULT queue_init_hw_and_bo(struct intel_queue *queue)
+static XGL_RESULT queue_set_state(struct intel_queue *queue,
+ enum queue_state state)
{
- struct intel_winsys *winsys = queue->dev->winsys;
+ static const uint32_t queue_state_init[] = {
+ /* STATE_SIP */
+ GEN_RENDER_CMD(COMMON, GEN6, STATE_SIP),
+ 0,
+ /* PIPELINE_SELECT */
+ GEN_RENDER_CMD(SINGLE_DW, GEN6, PIPELINE_SELECT) |
+ GEN6_PIPELINE_SELECT_DW0_SELECT_3D,
+ /* 3DSTATE_VF_STATISTICS */
+ GEN_RENDER_CMD(SINGLE_DW, GEN6, 3DSTATE_VF_STATISTICS),
+ /* end */
+ GEN_MI_CMD(MI_BATCH_BUFFER_END),
+ GEN_MI_CMD(MI_NOOP),
+ };
+ static const uint32_t queue_state_select_graphics[] = {
+ /* PIPELINE_SELECT */
+ GEN_RENDER_CMD(SINGLE_DW, GEN6, PIPELINE_SELECT) |
+ GEN6_PIPELINE_SELECT_DW0_SELECT_3D,
+ /* end */
+ GEN_MI_CMD(MI_BATCH_BUFFER_END),
+ };
+ static const uint32_t queue_state_select_compute[] = {
+ /* PIPELINE_SELECT */
+ GEN_RENDER_CMD(SINGLE_DW, GEN6, PIPELINE_SELECT) |
+ GEN6_PIPELINE_SELECT_DW0_SELECT_MEDIA,
+ /* end */
+ GEN_MI_CMD(MI_BATCH_BUFFER_END),
+ };
struct intel_bo *bo;
- uint32_t *cmd;
- XGL_UINT used;
+ XGL_GPU_SIZE size;
XGL_RESULT ret;
- bo = intel_winsys_alloc_buffer(winsys,
- "queue buffer", 4096, INTEL_DOMAIN_CPU);
- if (!bo)
- return XGL_ERROR_OUT_OF_GPU_MEMORY;
+ if (queue->last_pipeline_select == state)
+ return XGL_SUCCESS;
- cmd = (uint32_t *) intel_bo_map(bo, true);
- if (!cmd) {
- intel_bo_unreference(bo);
- return XGL_ERROR_MEMORY_MAP_FAILED;
+ switch (state) {
+ case QUEUE_STATE_GRAPHICS_SELECTED:
+ bo = queue->select_graphics_bo;
+ size = sizeof(queue_state_select_graphics);
+ break;
+ case QUEUE_STATE_COMPUTE_SELECTED:
+ bo = queue->select_compute_bo;
+ size = sizeof(queue_state_select_compute);
+ break;
+ case QUEUE_STATE_INITIALIZED:
+ /* will be reused for the atomic counters */
+ assert(!queue->atomic_bo);
+ bo = NULL;
+ size = sizeof(queue_state_init);
+ break;
+ default:
+ return XGL_ERROR_INVALID_VALUE;
+ break;
}
- used = 0;
+ if (!bo) {
+ const void *cmd;
+ void *ptr;
- /* disable SIP and VF statistics */
- cmd[used++] = GEN_RENDER_CMD(COMMON, GEN6, STATE_SIP);
- cmd[used++] = 0;
- cmd[used++] = GEN_RENDER_CMD(SINGLE_DW, GEN6, 3DSTATE_VF_STATISTICS);
+ bo = intel_winsys_alloc_buffer(queue->dev->winsys,
+ "queue bo", 4096, INTEL_DOMAIN_CPU);
+ if (!bo)
+ return XGL_ERROR_OUT_OF_GPU_MEMORY;
- cmd[used++] = GEN_MI_CMD(MI_BATCH_BUFFER_END);
- if (used & 1)
- cmd[used++] = GEN_MI_CMD(MI_NOOP);
+ /* do the allocation only */
+ if (queue->ring != INTEL_RING_RENDER) {
+ assert(state == QUEUE_STATE_INITIALIZED);
+ queue->atomic_bo = bo;
+ queue->last_pipeline_select = QUEUE_STATE_INITIALIZED;
+ return XGL_SUCCESS;
+ }
- intel_bo_unmap(bo);
+ ptr = intel_bo_map(bo, true);
+ if (!ptr) {
+ intel_bo_unreference(bo);
+ return XGL_ERROR_MEMORY_MAP_FAILED;
+ }
- ret = queue_submit_bo(queue, bo, sizeof(cmd[0]) * used);
- if (ret != XGL_SUCCESS) {
- intel_bo_unreference(bo);
- return ret;
+ switch (state) {
+ case QUEUE_STATE_GRAPHICS_SELECTED:
+ queue->select_graphics_bo = bo;
+ cmd = queue_state_select_graphics;
+ break;
+ case QUEUE_STATE_COMPUTE_SELECTED:
+ queue->select_compute_bo = bo;
+ cmd = queue_state_select_compute;
+ break;
+ case QUEUE_STATE_INITIALIZED:
+ /* reused for the atomic counters */
+ queue->atomic_bo = bo;
+ cmd = queue_state_init;
+ break;
+ default:
+ break;
+ }
+
+ memcpy(ptr, cmd, size);
+ intel_bo_unmap(bo);
}
- /* reuse the bo for atomic counters */
- queue->bo = bo;
+ assert(queue->ring == INTEL_RING_RENDER);
- return XGL_SUCCESS;
+ ret = queue_submit_bo(queue, bo, size);
+ if (ret == XGL_SUCCESS) {
+ if (state == QUEUE_STATE_INITIALIZED)
+ state = QUEUE_STATE_GRAPHICS_SELECTED;
+ queue->last_pipeline_select = state;
+ }
+
+ return ret;
}
XGL_RESULT intel_queue_create(struct intel_dev *dev,
@@ -97,7 +174,6 @@
{
struct intel_queue *queue;
enum intel_ring_type ring;
- XGL_RESULT ret;
switch (engine) {
case INTEL_GPU_ENGINE_3D:
@@ -116,10 +192,9 @@
queue->dev = dev;
queue->ring = ring;
- ret = queue_init_hw_and_bo(queue);
- if (ret != XGL_SUCCESS) {
+ if (queue_set_state(queue, QUEUE_STATE_INITIALIZED) != XGL_SUCCESS) {
intel_queue_destroy(queue);
- return ret;
+ return XGL_ERROR_INITIALIZATION_FAILED;
}
*queue_ret = queue;
@@ -129,8 +204,12 @@
void intel_queue_destroy(struct intel_queue *queue)
{
- if (queue->bo)
- intel_bo_unreference(queue->bo);
+ if (queue->atomic_bo)
+ intel_bo_unreference(queue->atomic_bo);
+ if (queue->select_graphics_bo)
+ intel_bo_unreference(queue->select_graphics_bo);
+ if (queue->select_compute_bo)
+ intel_bo_unreference(queue->select_compute_bo);
intel_base_destroy(&queue->base);
}
@@ -181,6 +260,10 @@
XGL_GPU_SIZE used;
XGL_RESULT ret;
+ ret = queue_set_state(queue, cmd->pipeline_select);
+ if (ret != XGL_SUCCESS)
+ break;
+
bo = intel_cmd_get_batch(cmd, &used);
ret = queue_submit_bo(queue, bo, used);
queue->last_submitted_cmd = cmd;