intel: update winsys
Mainly to deprecate INTEL_DOMAIN_x.
diff --git a/icd/intel/cmd.c b/icd/intel/cmd.c
index ebf84a4..5dd962d 100644
--- a/icd/intel/cmd.c
+++ b/icd/intel/cmd.c
@@ -39,7 +39,7 @@
void *ptr;
bo = intel_winsys_alloc_buffer(winsys,
- "batch buffer", bo_size, INTEL_DOMAIN_CPU);
+ "batch buffer", bo_size, true);
if (!bo)
return XGL_ERROR_OUT_OF_GPU_MEMORY;
@@ -262,7 +262,7 @@
err = intel_bo_add_reloc(reloc->writer->bo,
sizeof(uint32_t) * reloc->pos, reloc->bo, reloc->val,
- reloc->read_domains, reloc->write_domain, &presumed_offset);
+ reloc->flags, &presumed_offset);
if (err) {
cmd->result = XGL_ERROR_UNKNOWN;
break;
diff --git a/icd/intel/cmd_pipeline.c b/icd/intel/cmd_pipeline.c
index 0b37c55..9409252 100644
--- a/icd/intel/cmd_pipeline.c
+++ b/icd/intel/cmd_pipeline.c
@@ -100,8 +100,6 @@
const uint8_t cmd_len = 5;
const uint32_t dw0 = GEN_RENDER_CMD(3D, GEN6, PIPE_CONTROL) |
(cmd_len - 2);
- const uint32_t read_domains = INTEL_DOMAIN_INSTRUCTION;
- const uint32_t write_domain = INTEL_DOMAIN_INSTRUCTION;
CMD_ASSERT(cmd, 6, 7.5);
@@ -174,10 +172,12 @@
cmd_batch_reserve_reloc(cmd, cmd_len, (bool) bo);
cmd_batch_write(cmd, dw0);
cmd_batch_write(cmd, dw1);
- if (bo)
- cmd_batch_reloc(cmd, bo_offset, bo, read_domains, write_domain);
- else
+ if (bo) {
+ cmd_batch_reloc(cmd, bo_offset, bo, INTEL_RELOC_GGTT |
+ INTEL_RELOC_WRITE);
+ } else {
cmd_batch_write(cmd, 0);
+ }
cmd_batch_write(cmd, 0);
cmd_batch_write(cmd, 0);
}
@@ -275,8 +275,8 @@
cmd_batch_reserve_reloc(cmd, cmd_len, 2);
cmd_batch_write(cmd, dw0);
- cmd_batch_reloc(cmd, offset, mem->bo, INTEL_DOMAIN_VERTEX, 0);
- cmd_batch_reloc(cmd, end_offset, mem->bo, INTEL_DOMAIN_VERTEX, 0);
+ cmd_batch_reloc(cmd, offset, mem->bo, 0);
+ cmd_batch_reloc(cmd, end_offset, mem->bo, 0);
}
static inline void
@@ -338,8 +338,7 @@
cmd_batch_write(cmd, view->cmd[0]);
if (view->img) {
cmd_batch_reloc(cmd, view->cmd[1], view->img->obj.mem->bo,
- INTEL_DOMAIN_RENDER,
- INTEL_DOMAIN_RENDER);
+ INTEL_RELOC_WRITE);
} else {
cmd_batch_write(cmd, 0);
}
@@ -367,8 +366,7 @@
cmd_batch_write(cmd, view->cmd[6]);
if (view->img) {
cmd_batch_reloc(cmd, view->cmd[7], view->img->obj.mem->bo,
- INTEL_DOMAIN_RENDER,
- INTEL_DOMAIN_RENDER);
+ INTEL_RELOC_WRITE);
} else {
cmd_batch_write(cmd, 0);
}
@@ -392,8 +390,7 @@
cmd_batch_write(cmd, view->cmd[8]);
if (view->img) {
cmd_batch_reloc(cmd, view->cmd[9], view->img->obj.mem->bo,
- INTEL_DOMAIN_RENDER,
- INTEL_DOMAIN_RENDER);
+ INTEL_RELOC_WRITE);
} else {
cmd_batch_write(cmd, 0);
}
@@ -761,7 +758,7 @@
memcpy(dw, view->cmd, sizeof(uint32_t) * view->cmd_len);
cmd_state_reloc(cmd, 1, view->cmd[1], view->img->obj.mem->bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ INTEL_RELOC_WRITE);
cmd_state_advance(cmd, view->cmd_len);
}
break;
@@ -775,7 +772,7 @@
memcpy(dw, view->cmd, sizeof(uint32_t) * view->cmd_len);
cmd_state_reloc(cmd, 1, view->cmd[1], view->mem->bo,
- INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
+ INTEL_RELOC_WRITE);
cmd_state_advance(cmd, view->cmd_len);
}
break;
diff --git a/icd/intel/cmd_priv.h b/icd/intel/cmd_priv.h
index 1ea8378..0158199 100644
--- a/icd/intel/cmd_priv.h
+++ b/icd/intel/cmd_priv.h
@@ -40,18 +40,7 @@
uint32_t val;
struct intel_bo *bo;
- /*
- * With application state tracking promised by XGL, we should be able to
- * set
- *
- * I915_EXEC_NO_RELOC
- * I915_EXEC_HANDLE_LUT
- * I915_EXEC_IS_PINNED
- *
- * once we figure them out.
- */
- uint16_t read_domains;
- uint16_t write_domain;
+ uint32_t flags;
};
static inline int cmd_gen(const struct intel_cmd *cmd)
@@ -80,8 +69,7 @@
struct intel_cmd_writer *writer,
XGL_UINT pos, uint32_t val,
struct intel_bo *bo,
- uint16_t read_domains,
- uint16_t write_domain)
+ uint32_t flags)
{
struct intel_cmd_reloc *reloc = &cmd->relocs[cmd->reloc_used];
@@ -91,8 +79,7 @@
reloc->pos = pos;
reloc->val = val;
reloc->bo = bo;
- reloc->read_domains = read_domains;
- reloc->write_domain = write_domain;
+ reloc->flags = flags;
cmd->reloc_used++;
}
@@ -151,13 +138,11 @@
*/
static inline void cmd_batch_reloc(struct intel_cmd *cmd,
uint32_t val, struct intel_bo *bo,
- uint16_t read_domains,
- uint16_t write_domain)
+ uint32_t flags)
{
struct intel_cmd_writer *writer = &cmd->batch;
- cmd_writer_add_reloc(cmd, writer, writer->used, val,
- bo, read_domains, write_domain);
+ cmd_writer_add_reloc(cmd, writer, writer->used, val, bo, flags);
writer->used++;
}
@@ -210,16 +195,13 @@
const struct intel_cmd_writer *kernel = &cmd->kernel;
cmd_reserve_reloc(cmd, 5);
- cmd_writer_add_reloc(cmd, writer, 2, 1,
- state->bo, INTEL_DOMAIN_SAMPLER, 0);
- cmd_writer_add_reloc(cmd, writer, 3, 1,
- state->bo, INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, 0);
- cmd_writer_add_reloc(cmd, writer, 5, 1,
- kernel->bo, INTEL_DOMAIN_INSTRUCTION, 0);
- cmd_writer_add_reloc(cmd, writer, 7, 1 + (state->size << 2),
- state->bo, INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION, 0);
- cmd_writer_add_reloc(cmd, writer, 9, 1 + (kernel->size << 2),
- kernel->bo, INTEL_DOMAIN_INSTRUCTION, 0);
+ cmd_writer_add_reloc(cmd, writer, 2, 1, state->bo, 0);
+ cmd_writer_add_reloc(cmd, writer, 3, 1, state->bo, 0);
+ cmd_writer_add_reloc(cmd, writer, 5, 1, kernel->bo, 0);
+ cmd_writer_add_reloc(cmd, writer, 7, 1 +
+ (state->size << 2), state->bo, 0);
+ cmd_writer_add_reloc(cmd, writer, 9, 1 +
+ (kernel->size << 2), kernel->bo, 0);
if (cmd->batch.used & 1) {
cmd_batch_reserve(cmd, 1);
@@ -278,13 +260,11 @@
static inline void cmd_state_reloc(struct intel_cmd *cmd,
XGL_INT offset, uint32_t val,
struct intel_bo *bo,
- uint16_t read_domains,
- uint16_t write_domain)
+ uint32_t flags)
{
struct intel_cmd_writer *writer = &cmd->state;
- cmd_writer_add_reloc(cmd, writer, writer->used + offset, val,
- bo, read_domains, write_domain);
+ cmd_writer_add_reloc(cmd, writer, writer->used + offset, val, bo, flags);
}
/**
diff --git a/icd/intel/dev.c b/icd/intel/dev.c
index 3613f54..f8d7720 100644
--- a/icd/intel/dev.c
+++ b/icd/intel/dev.c
@@ -99,7 +99,7 @@
}
dev->cmd_scratch_bo = intel_winsys_alloc_buffer(dev->winsys,
- "command buffer scratch", 4096, INTEL_DOMAIN_INSTRUCTION);
+ "command buffer scratch", 4096, false);
if (!dev->cmd_scratch_bo) {
intel_dev_destroy(dev);
return XGL_ERROR_OUT_OF_GPU_MEMORY;
diff --git a/icd/intel/kmd/winsys.h b/icd/intel/kmd/winsys.h
index df76154..e080a08 100644
--- a/icd/intel/kmd/winsys.h
+++ b/icd/intel/kmd/winsys.h
@@ -28,6 +28,7 @@
#ifndef INTEL_WINSYS_H
#define INTEL_WINSYS_H
+#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
@@ -45,14 +46,10 @@
};
/* this is compatible with i915_drm.h's definitions */
-enum intel_domain_flag {
- INTEL_DOMAIN_CPU = 0x00000001,
- INTEL_DOMAIN_RENDER = 0x00000002,
- INTEL_DOMAIN_SAMPLER = 0x00000004,
- INTEL_DOMAIN_COMMAND = 0x00000008,
- INTEL_DOMAIN_INSTRUCTION = 0x00000010,
- INTEL_DOMAIN_VERTEX = 0x00000020,
- INTEL_DOMAIN_GTT = 0x00000040,
+enum intel_reloc_flag {
+ INTEL_RELOC_FENCE = 1 << 0,
+ INTEL_RELOC_GGTT = 1 << 1,
+ INTEL_RELOC_WRITE = 1 << 2,
};
/* this is compatible with i915_drm.h's definitions */
@@ -74,7 +71,10 @@
struct intel_winsys_info {
int devid;
- int max_batch_size;
+ /* the sizes of the aperture in bytes */
+ size_t aperture_total;
+ size_t aperture_mappable;
+
bool has_llc;
bool has_address_swizzling;
bool has_logical_context;
@@ -120,7 +120,7 @@
* \param tiling Tiling mode.
* \param pitch Pitch of the bo.
* \param height Height of the bo.
- * \param initial_domain Initial (write) domain.
+ * \param cpu_init Will be initialized by CPU.
*/
struct intel_bo *
intel_winsys_alloc_bo(struct intel_winsys *winsys,
@@ -128,7 +128,7 @@
enum intel_tiling_mode tiling,
unsigned long pitch,
unsigned long height,
- uint32_t initial_domain);
+ bool cpu_init);
/**
* Allocate a linear buffer object.
@@ -137,10 +137,10 @@
intel_winsys_alloc_buffer(struct intel_winsys *winsys,
const char *name,
unsigned long size,
- uint32_t initial_domain)
+ bool cpu_init)
{
return intel_winsys_alloc_bo(winsys, name,
- INTEL_TILING_NONE, size, 1, initial_domain);
+ INTEL_TILING_NONE, size, 1, cpu_init);
}
/**
@@ -224,8 +224,7 @@
* sequential writes, but reads would be very slow. Callers always have a
* linear view of the bo.
*
- * map_unsynchronized() is similar to map_gtt(), except that it does not
- * block.
+ * map_gtt_async() is similar to map_gtt(), except that it does not block.
*/
void *
intel_bo_map(struct intel_bo *bo, bool write_enable);
@@ -234,7 +233,7 @@
intel_bo_map_gtt(struct intel_bo *bo);
void *
-intel_bo_map_unsynchronized(struct intel_bo *bo);
+intel_bo_map_gtt_async(struct intel_bo *bo);
/**
* Unmap \p bo.
@@ -268,8 +267,7 @@
int
intel_bo_add_reloc(struct intel_bo *bo, uint32_t offset,
struct intel_bo *target_bo, uint32_t target_offset,
- uint32_t read_domains, uint32_t write_domain,
- uint64_t *presumed_offset);
+ uint32_t flags, uint64_t *presumed_offset);
/**
* Return the current number of relocations.
diff --git a/icd/intel/kmd/winsys_drm.c b/icd/intel/kmd/winsys_drm.c
index 571e3b8..ba022b9 100644
--- a/icd/intel/kmd/winsys_drm.c
+++ b/icd/intel/kmd/winsys_drm.c
@@ -40,8 +40,6 @@
#include "icd.h"
#include "winsys.h"
-#define BATCH_SZ (8192 * sizeof(uint32_t))
-
struct intel_winsys {
int fd;
drm_intel_bufmgr *bufmgr;
@@ -132,8 +130,6 @@
info->devid = drm_intel_bufmgr_gem_get_devid(winsys->bufmgr);
- info->max_batch_size = BATCH_SZ;
-
get_param(winsys, I915_PARAM_HAS_LLC, &val);
info->has_llc = val;
info->has_address_swizzling = test_address_swizzling(winsys);
@@ -159,6 +155,8 @@
struct intel_winsys *
intel_winsys_create_for_fd(int fd)
{
+ /* so that we can have enough (up to 4094) relocs per bo */
+ const int batch_size = sizeof(uint32_t) * 8192;
struct intel_winsys *winsys;
winsys = icd_alloc(sizeof(*winsys), 0, XGL_SYSTEM_ALLOC_INTERNAL);
@@ -169,7 +167,7 @@
winsys->fd = fd;
- winsys->bufmgr = drm_intel_bufmgr_gem_init(winsys->fd, BATCH_SZ);
+ winsys->bufmgr = drm_intel_bufmgr_gem_init(winsys->fd, batch_size);
if (!winsys->bufmgr) {
icd_free(winsys);
return NULL;
@@ -183,12 +181,7 @@
/*
* No need to implicitly set up a fence register for each non-linear reloc
- * entry. When a fence register is needed for a reloc entry,
- * drm_intel_bo_emit_reloc_fence() will be called explicitly.
- *
- * intel_bo_add_reloc() currently lacks "bool fenced" for this to work.
- * But we never need a fence register on GEN4+ so we do not need to worry
- * about it yet.
+ * entry. INTEL_RELOC_FENCE will be set on reloc entries that need them.
*/
drm_intel_bufmgr_gem_enable_fenced_relocs(winsys->bufmgr);
@@ -224,10 +217,8 @@
enum intel_tiling_mode tiling,
unsigned long pitch,
unsigned long height,
- uint32_t initial_domain)
+ bool cpu_init)
{
- const bool for_render =
- (initial_domain & (INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION));
const unsigned int alignment = 4096; /* always page-aligned */
unsigned long size;
drm_intel_bo *bo;
@@ -250,12 +241,12 @@
size = pitch * height;
- if (for_render) {
- bo = drm_intel_bo_alloc_for_render(winsys->bufmgr,
- name, size, alignment);
+ if (cpu_init) {
+ bo = drm_intel_bo_alloc(winsys->bufmgr, name, size, alignment);
}
else {
- bo = drm_intel_bo_alloc(winsys->bufmgr, name, size, alignment);
+ bo = drm_intel_bo_alloc_for_render(winsys->bufmgr,
+ name, size, alignment);
}
if (bo && tiling != INTEL_TILING_NONE) {
@@ -466,7 +457,7 @@
}
void *
-intel_bo_map_unsynchronized(struct intel_bo *bo)
+intel_bo_map_gtt_async(struct intel_bo *bo)
{
int err;
@@ -504,14 +495,37 @@
int
intel_bo_add_reloc(struct intel_bo *bo, uint32_t offset,
struct intel_bo *target_bo, uint32_t target_offset,
- uint32_t read_domains, uint32_t write_domain,
- uint64_t *presumed_offset)
+ uint32_t flags, uint64_t *presumed_offset)
{
+ uint32_t read_domains, write_domain;
int err;
- err = drm_intel_bo_emit_reloc(gem_bo(bo), offset,
- gem_bo(target_bo), target_offset,
- read_domains, write_domain);
+ if (flags & INTEL_RELOC_WRITE) {
+ /*
+ * Because of the translation to domains, INTEL_RELOC_GGTT should only
+ * be set on GEN6 when the bo is written by MI_* or PIPE_CONTROL. The
+ * kernel will translate it back to INTEL_RELOC_GGTT.
+ */
+ write_domain = (flags & INTEL_RELOC_GGTT) ?
+ I915_GEM_DOMAIN_INSTRUCTION : I915_GEM_DOMAIN_RENDER;
+ read_domains = write_domain;
+ } else {
+ write_domain = 0;
+ read_domains = I915_GEM_DOMAIN_RENDER |
+ I915_GEM_DOMAIN_SAMPLER |
+ I915_GEM_DOMAIN_INSTRUCTION |
+ I915_GEM_DOMAIN_VERTEX;
+ }
+
+ if (flags & INTEL_RELOC_FENCE) {
+ err = drm_intel_bo_emit_reloc_fence(gem_bo(bo), offset,
+ gem_bo(target_bo), target_offset,
+ read_domains, write_domain);
+ } else {
+ err = drm_intel_bo_emit_reloc(gem_bo(bo), offset,
+ gem_bo(target_bo), target_offset,
+ read_domains, write_domain);
+ }
*presumed_offset = gem_bo(target_bo)->offset64 + target_offset;
diff --git a/icd/intel/mem.h b/icd/intel/mem.h
index 7522794..10b5580 100644
--- a/icd/intel/mem.h
+++ b/icd/intel/mem.h
@@ -45,7 +45,7 @@
static inline void *intel_mem_map(struct intel_mem *mem, XGL_FLAGS flags)
{
- return intel_bo_map_unsynchronized(mem->bo);
+ return intel_bo_map_gtt_async(mem->bo);
}
static inline void *intel_mem_map_sync(struct intel_mem *mem, bool rw)
diff --git a/icd/intel/queue.c b/icd/intel/queue.c
index e80aa37..937c1f0 100644
--- a/icd/intel/queue.c
+++ b/icd/intel/queue.c
@@ -56,7 +56,7 @@
void *ptr;
bo = intel_winsys_alloc_buffer(queue->dev->winsys,
- "queue bo", size, INTEL_DOMAIN_CPU);
+ "queue bo", size, true);
if (!bo)
return NULL;