intel: update winsys

Mainly to deprecate INTEL_DOMAIN_x.
diff --git a/icd/intel/kmd/winsys.h b/icd/intel/kmd/winsys.h
index df76154..e080a08 100644
--- a/icd/intel/kmd/winsys.h
+++ b/icd/intel/kmd/winsys.h
@@ -28,6 +28,7 @@
 #ifndef INTEL_WINSYS_H
 #define INTEL_WINSYS_H
 
+#include <stddef.h>
 #include <stdint.h>
 #include <stdbool.h>
 
@@ -45,14 +46,10 @@
 };
 
 /* this is compatible with i915_drm.h's definitions */
-enum intel_domain_flag {
-   INTEL_DOMAIN_CPU           = 0x00000001,
-   INTEL_DOMAIN_RENDER        = 0x00000002,
-   INTEL_DOMAIN_SAMPLER       = 0x00000004,
-   INTEL_DOMAIN_COMMAND	      = 0x00000008,
-   INTEL_DOMAIN_INSTRUCTION   = 0x00000010,
-   INTEL_DOMAIN_VERTEX        = 0x00000020,
-   INTEL_DOMAIN_GTT           = 0x00000040,
+enum intel_reloc_flag {
+   INTEL_RELOC_FENCE          = 1 << 0,
+   INTEL_RELOC_GGTT           = 1 << 1,
+   INTEL_RELOC_WRITE          = 1 << 2,
 };
 
 /* this is compatible with i915_drm.h's definitions */
@@ -74,7 +71,10 @@
 struct intel_winsys_info {
    int devid;
 
-   int max_batch_size;
+   /* the sizes of the aperture in bytes */
+   size_t aperture_total;
+   size_t aperture_mappable;
+
    bool has_llc;
    bool has_address_swizzling;
    bool has_logical_context;
@@ -120,7 +120,7 @@
  * \param tiling           Tiling mode.
  * \param pitch            Pitch of the bo.
  * \param height           Height of the bo.
- * \param initial_domain   Initial (write) domain.
+ * \param cpu_init         Will be initialized by CPU.
  */
 struct intel_bo *
 intel_winsys_alloc_bo(struct intel_winsys *winsys,
@@ -128,7 +128,7 @@
                       enum intel_tiling_mode tiling,
                       unsigned long pitch,
                       unsigned long height,
-                      uint32_t initial_domain);
+                      bool cpu_init);
 
 /**
  * Allocate a linear buffer object.
@@ -137,10 +137,10 @@
 intel_winsys_alloc_buffer(struct intel_winsys *winsys,
                           const char *name,
                           unsigned long size,
-                          uint32_t initial_domain)
+                          bool cpu_init)
 {
    return intel_winsys_alloc_bo(winsys, name,
-         INTEL_TILING_NONE, size, 1, initial_domain);
+         INTEL_TILING_NONE, size, 1, cpu_init);
 }
 
 /**
@@ -224,8 +224,7 @@
  * sequential writes, but reads would be very slow.  Callers always have a
  * linear view of the bo.
  *
- * map_unsynchronized() is similar to map_gtt(), except that it does not
- * block.
+ * map_gtt_async() is similar to map_gtt(), except that it does not block.
  */
 void *
 intel_bo_map(struct intel_bo *bo, bool write_enable);
@@ -234,7 +233,7 @@
 intel_bo_map_gtt(struct intel_bo *bo);
 
 void *
-intel_bo_map_unsynchronized(struct intel_bo *bo);
+intel_bo_map_gtt_async(struct intel_bo *bo);
 
 /**
  * Unmap \p bo.
@@ -268,8 +267,7 @@
 int
 intel_bo_add_reloc(struct intel_bo *bo, uint32_t offset,
                    struct intel_bo *target_bo, uint32_t target_offset,
-                   uint32_t read_domains, uint32_t write_domain,
-                   uint64_t *presumed_offset);
+                   uint32_t flags, uint64_t *presumed_offset);
 
 /**
  * Return the current number of relocations.
diff --git a/icd/intel/kmd/winsys_drm.c b/icd/intel/kmd/winsys_drm.c
index 571e3b8..ba022b9 100644
--- a/icd/intel/kmd/winsys_drm.c
+++ b/icd/intel/kmd/winsys_drm.c
@@ -40,8 +40,6 @@
 #include "icd.h"
 #include "winsys.h"
 
-#define BATCH_SZ (8192 * sizeof(uint32_t))
-
 struct intel_winsys {
    int fd;
    drm_intel_bufmgr *bufmgr;
@@ -132,8 +130,6 @@
 
    info->devid = drm_intel_bufmgr_gem_get_devid(winsys->bufmgr);
 
-   info->max_batch_size = BATCH_SZ;
-
    get_param(winsys, I915_PARAM_HAS_LLC, &val);
    info->has_llc = val;
    info->has_address_swizzling = test_address_swizzling(winsys);
@@ -159,6 +155,8 @@
 struct intel_winsys *
 intel_winsys_create_for_fd(int fd)
 {
+   /* so that we can have enough (up to 4094) relocs per bo */
+   const int batch_size = sizeof(uint32_t) * 8192;
    struct intel_winsys *winsys;
 
    winsys = icd_alloc(sizeof(*winsys), 0, XGL_SYSTEM_ALLOC_INTERNAL);
@@ -169,7 +167,7 @@
 
    winsys->fd = fd;
 
-   winsys->bufmgr = drm_intel_bufmgr_gem_init(winsys->fd, BATCH_SZ);
+   winsys->bufmgr = drm_intel_bufmgr_gem_init(winsys->fd, batch_size);
    if (!winsys->bufmgr) {
       icd_free(winsys);
       return NULL;
@@ -183,12 +181,7 @@
 
    /*
     * No need to implicitly set up a fence register for each non-linear reloc
-    * entry.  When a fence register is needed for a reloc entry,
-    * drm_intel_bo_emit_reloc_fence() will be called explicitly.
-    *
-    * intel_bo_add_reloc() currently lacks "bool fenced" for this to work.
-    * But we never need a fence register on GEN4+ so we do not need to worry
-    * about it yet.
+    * entry.  INTEL_RELOC_FENCE will be set on reloc entries that need them.
     */
    drm_intel_bufmgr_gem_enable_fenced_relocs(winsys->bufmgr);
 
@@ -224,10 +217,8 @@
                       enum intel_tiling_mode tiling,
                       unsigned long pitch,
                       unsigned long height,
-                      uint32_t initial_domain)
+                      bool cpu_init)
 {
-   const bool for_render =
-      (initial_domain & (INTEL_DOMAIN_RENDER | INTEL_DOMAIN_INSTRUCTION));
    const unsigned int alignment = 4096; /* always page-aligned */
    unsigned long size;
    drm_intel_bo *bo;
@@ -250,12 +241,12 @@
 
    size = pitch * height;
 
-   if (for_render) {
-      bo = drm_intel_bo_alloc_for_render(winsys->bufmgr,
-            name, size, alignment);
+   if (cpu_init) {
+      bo = drm_intel_bo_alloc(winsys->bufmgr, name, size, alignment);
    }
    else {
-      bo = drm_intel_bo_alloc(winsys->bufmgr, name, size, alignment);
+      bo = drm_intel_bo_alloc_for_render(winsys->bufmgr,
+            name, size, alignment);
    }
 
    if (bo && tiling != INTEL_TILING_NONE) {
@@ -466,7 +457,7 @@
 }
 
 void *
-intel_bo_map_unsynchronized(struct intel_bo *bo)
+intel_bo_map_gtt_async(struct intel_bo *bo)
 {
    int err;
 
@@ -504,14 +495,37 @@
 int
 intel_bo_add_reloc(struct intel_bo *bo, uint32_t offset,
                    struct intel_bo *target_bo, uint32_t target_offset,
-                   uint32_t read_domains, uint32_t write_domain,
-                   uint64_t *presumed_offset)
+                   uint32_t flags, uint64_t *presumed_offset)
 {
+   uint32_t read_domains, write_domain;
    int err;
 
-   err = drm_intel_bo_emit_reloc(gem_bo(bo), offset,
-         gem_bo(target_bo), target_offset,
-         read_domains, write_domain);
+   if (flags & INTEL_RELOC_WRITE) {
+      /*
+       * Because of the translation to domains, INTEL_RELOC_GGTT should only
+       * be set on GEN6 when the bo is written by MI_* or PIPE_CONTROL.  The
+       * kernel will translate it back to INTEL_RELOC_GGTT.
+       */
+      write_domain = (flags & INTEL_RELOC_GGTT) ?
+         I915_GEM_DOMAIN_INSTRUCTION : I915_GEM_DOMAIN_RENDER;
+      read_domains = write_domain;
+   } else {
+      write_domain = 0;
+      read_domains = I915_GEM_DOMAIN_RENDER |
+                     I915_GEM_DOMAIN_SAMPLER |
+                     I915_GEM_DOMAIN_INSTRUCTION |
+                     I915_GEM_DOMAIN_VERTEX;
+   }
+
+   if (flags & INTEL_RELOC_FENCE) {
+      err = drm_intel_bo_emit_reloc_fence(gem_bo(bo), offset,
+            gem_bo(target_bo), target_offset,
+            read_domains, write_domain);
+   } else {
+      err = drm_intel_bo_emit_reloc(gem_bo(bo), offset,
+            gem_bo(target_bo), target_offset,
+            read_domains, write_domain);
+   }
 
    *presumed_offset = gem_bo(target_bo)->offset64 + target_offset;