Prepare for split BLT ring on Sandybridge.

Depends on libdrm 057fab3382c02af54126ce395c43d4e6dce9439a

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=31123
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/benchmarks/intel_upload_blit_large.c b/benchmarks/intel_upload_blit_large.c
index b3581d4..de0f668 100644
--- a/benchmarks/intel_upload_blit_large.c
+++ b/benchmarks/intel_upload_blit_large.c
@@ -58,6 +58,7 @@
 #include "drmtest.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
 
 #define OBJECT_WIDTH	1280
 #define OBJECT_HEIGHT	720
@@ -128,7 +129,7 @@
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
 
diff --git a/benchmarks/intel_upload_blit_large_gtt.c b/benchmarks/intel_upload_blit_large_gtt.c
index 3946301..dc2733e 100644
--- a/benchmarks/intel_upload_blit_large_gtt.c
+++ b/benchmarks/intel_upload_blit_large_gtt.c
@@ -58,6 +58,7 @@
 #include "drmtest.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
 
 #define OBJECT_WIDTH	1280
 #define OBJECT_HEIGHT	720
@@ -128,7 +129,7 @@
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
 
diff --git a/benchmarks/intel_upload_blit_large_map.c b/benchmarks/intel_upload_blit_large_map.c
index 8a50839..0ca9e9d 100644
--- a/benchmarks/intel_upload_blit_large_map.c
+++ b/benchmarks/intel_upload_blit_large_map.c
@@ -61,6 +61,7 @@
 #include "drmtest.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
 
 #define OBJECT_WIDTH	1280
 #define OBJECT_HEIGHT	720
@@ -131,7 +132,7 @@
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
 
diff --git a/benchmarks/intel_upload_blit_small.c b/benchmarks/intel_upload_blit_small.c
index 719a8a1..8ad25ad 100644
--- a/benchmarks/intel_upload_blit_small.c
+++ b/benchmarks/intel_upload_blit_small.c
@@ -54,6 +54,7 @@
 #include "drmtest.h"
 #include "intel_bufmgr.h"
 #include "intel_batchbuffer.h"
+#include "intel_gpu_tools.h"
 
 /* Happens to be 128k, the size of the VBOs used by i965's Mesa driver. */
 #define OBJECT_WIDTH	256
@@ -141,7 +142,7 @@
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
 
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	dst_bo = drm_intel_bo_alloc(bufmgr, "dst", object_size, 4096);
 
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index ae5150e..5415469 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -58,11 +58,12 @@
 }
 
 struct intel_batchbuffer *
-intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr)
+intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr, uint32_t devid)
 {
 	struct intel_batchbuffer *batch = calloc(sizeof(*batch), 1);
 
 	batch->bufmgr = bufmgr;
+	batch->devid = devid;
 	intel_batchbuffer_reset(batch);
 
 	return batch;
@@ -82,6 +83,7 @@
 intel_batchbuffer_flush(struct intel_batchbuffer *batch)
 {
 	unsigned int used = batch->ptr - batch->map;
+	int ring;
 	int ret;
 
 	if (used == 0)
@@ -104,7 +106,10 @@
 	batch->map = NULL;
 	batch->ptr = NULL;
 
-	ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0);
+	ring = 0;
+	if (IS_GEN6(batch->devid))
+		ring = I915_EXEC_BLT;
+	ret = drm_intel_bo_mrb_exec(batch->bo, used, NULL, 0, 0, ring);
 	assert(ret == 0);
 
 	intel_batchbuffer_reset(batch);
@@ -145,7 +150,7 @@
 void
 intel_copy_bo(struct intel_batchbuffer *batch,
 	      drm_intel_bo *dst_bo, drm_intel_bo *src_bo,
-	      int width, int height, uint32_t devid)
+	      int width, int height)
 {
 	uint32_t src_tiling, dst_tiling, swizzle;
 	uint32_t src_pitch, dst_pitch;
@@ -155,13 +160,13 @@
 	drm_intel_bo_get_tiling(dst_bo, &dst_tiling, &swizzle);
 
 	src_pitch = width * 4;
-	if (IS_965(devid) && src_tiling != I915_TILING_NONE) {
+	if (IS_965(batch->devid) && src_tiling != I915_TILING_NONE) {
 		src_pitch /= 4;
 		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
 	}
 
 	dst_pitch = width * 4;
-	if (IS_965(devid) && dst_tiling != I915_TILING_NONE) {
+	if (IS_965(batch->devid) && dst_tiling != I915_TILING_NONE) {
 		dst_pitch /= 4;
 		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
 	}
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index fcd9ceb..84f7576 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -11,6 +11,7 @@
 struct intel_batchbuffer
 {
 	drm_intel_bufmgr *bufmgr;
+	uint32_t devid;
 
 	drm_intel_bo *bo;
 
@@ -28,7 +29,8 @@
 	unsigned int size;
 };
 
-struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr);
+struct intel_batchbuffer *intel_batchbuffer_alloc(drm_intel_bufmgr *bufmgr,
+						  uint32_t devid);
 
 void intel_batchbuffer_free(struct intel_batchbuffer *batch);
 
@@ -117,6 +119,6 @@
 
 void intel_copy_bo(struct intel_batchbuffer *batch,
 		   drm_intel_bo *dst_bo, drm_intel_bo *src_bo,
-		   int width, int height, uint32_t devid);
+		   int width, int height);
 
 #endif
diff --git a/tests/gem_bad_address.c b/tests/gem_bad_address.c
index 188020d..fbb9649 100644
--- a/tests/gem_bad_address.c
+++ b/tests/gem_bad_address.c
@@ -68,7 +68,7 @@
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	bad_store();
 
diff --git a/tests/gem_bad_batch.c b/tests/gem_bad_batch.c
index da2870b..db6636a 100644
--- a/tests/gem_bad_batch.c
+++ b/tests/gem_bad_batch.c
@@ -64,7 +64,7 @@
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	bad_batch();
 
diff --git a/tests/gem_bad_blit.c b/tests/gem_bad_blit.c
index 362b5ac..2216527 100644
--- a/tests/gem_bad_blit.c
+++ b/tests/gem_bad_blit.c
@@ -100,19 +100,17 @@
 int main(int argc, char **argv)
 {
 	drm_intel_bo *src;
-	uint32_t devid;
 	int fd;
 
 	fd = drm_open_any();
-	devid = intel_get_drm_devid(fd);
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	src = drm_intel_bo_alloc(bufmgr, "src", 128 * 128, 4096);
 
-	bad_blit(src, devid);
+	bad_blit(src, batch->devid);
 
 	intel_batchbuffer_free(batch);
 	drm_intel_bufmgr_destroy(bufmgr);
diff --git a/tests/gem_hang.c b/tests/gem_hang.c
index bda00fd..40d2bb0 100644
--- a/tests/gem_hang.c
+++ b/tests/gem_hang.c
@@ -81,11 +81,10 @@
 	bad_pipe = atoi(argv[1]);
 
 	fd = drm_open_any();
-	intel_get_drm_devid(fd);
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	gpu_hang();
 
diff --git a/tests/gem_pread_after_blit.c b/tests/gem_pread_after_blit.c
index 4ba9a62..c9c8b02 100644
--- a/tests/gem_pread_after_blit.c
+++ b/tests/gem_pread_after_blit.c
@@ -131,14 +131,12 @@
 	drm_intel_bo *src1, *src2, *bo;
 	uint32_t start1 = 0;
 	uint32_t start2 = 1024 * 1024 / 4;
-	uint32_t devid;
 
 	fd = drm_open_any();
-	devid = intel_get_drm_devid(fd);
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	src1 = create_bo(start1);
 	src2 = create_bo(start2);
@@ -147,21 +145,21 @@
 
 	/* First, do a full-buffer read after blitting */
 	printf("Large read after blit 1\n");
-	intel_copy_bo(batch, bo, src1, width, height, devid);
+	intel_copy_bo(batch, bo, src1, width, height);
 	verify_large_read(bo, start1);
 	printf("Large read after blit 2\n");
-	intel_copy_bo(batch, bo, src2, width, height, devid);
+	intel_copy_bo(batch, bo, src2, width, height);
 	verify_large_read(bo, start2);
 
 	printf("Small reads after blit 1\n");
-	intel_copy_bo(batch, bo, src1, width, height, devid);
+	intel_copy_bo(batch, bo, src1, width, height);
 	verify_small_read(bo, start1);
 	printf("Small reads after blit 2\n");
-	intel_copy_bo(batch, bo, src2, width, height, devid);
+	intel_copy_bo(batch, bo, src2, width, height);
 	verify_small_read(bo, start2);
 
 	printf("Large read after blit 3\n");
-	intel_copy_bo(batch, bo, src1, width, height, devid);
+	intel_copy_bo(batch, bo, src1, width, height);
 	verify_large_read(bo, start1);
 
 	drm_intel_bo_unreference(src1);
diff --git a/tests/gem_ringfill.c b/tests/gem_ringfill.c
index c9860aa..5050e57 100644
--- a/tests/gem_ringfill.c
+++ b/tests/gem_ringfill.c
@@ -55,16 +55,14 @@
 int main(int argc, char **argv)
 {
 	int fd;
-	uint32_t devid;
 	int i;
 	drm_intel_bo *src_bo, *dst_bo;
 
 	fd = drm_open_any();
-	devid = intel_get_drm_devid(fd);
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	src_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
 	dst_bo = drm_intel_bo_alloc(bufmgr, "src bo", size, 4096);
@@ -85,7 +83,7 @@
 	 * doing this, we aren't likely to with this test.
 	 */
 	for (i = 0; i < 128 * 1024 / (8 * 4) * 1.25; i++) {
-		intel_copy_bo(batch, dst_bo, src_bo, width, height, devid);
+		intel_copy_bo(batch, dst_bo, src_bo, width, height);
 		intel_batchbuffer_flush(batch);
 	}
 
diff --git a/tests/gem_tiled_blits.c b/tests/gem_tiled_blits.c
index 212b4b7..b7f5d49 100644
--- a/tests/gem_tiled_blits.c
+++ b/tests/gem_tiled_blits.c
@@ -60,7 +60,6 @@
 static drm_intel_bufmgr *bufmgr;
 struct intel_batchbuffer *batch;
 static int width = 512, height = 512;
-static uint32_t devid;
 
 static drm_intel_bo *
 create_bo(uint32_t start_val)
@@ -86,7 +85,7 @@
 	}
 	drm_intel_bo_unmap(linear_bo);
 
-	intel_copy_bo (batch, bo, linear_bo, width, height, devid);
+	intel_copy_bo (batch, bo, linear_bo, width, height);
 
 	drm_intel_bo_unreference(linear_bo);
 
@@ -102,7 +101,7 @@
 
 	linear_bo = drm_intel_bo_alloc(bufmgr, "linear dst", 1024 * 1024, 4096);
 
-	intel_copy_bo(batch, linear_bo, bo, width, height, devid);
+	intel_copy_bo(batch, linear_bo, bo, width, height);
 
 	drm_intel_bo_map(linear_bo, 0);
 	linear = linear_bo->virtual;
@@ -131,11 +130,10 @@
 	int i;
 
 	fd = drm_open_any();
-	devid = intel_get_drm_devid(fd);
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	for (i = 0; i < bo_count; i++) {
 		bo[i] = create_bo(start);
@@ -156,7 +154,7 @@
 		if (src == dst)
 			continue;
 
-		intel_copy_bo(batch, bo[dst], bo[src], width, height, devid);
+		intel_copy_bo(batch, bo[dst], bo[src], width, height);
 		bo_start_val[dst] = bo_start_val[src];
 
 		/*
diff --git a/tests/gem_tiled_pread.c b/tests/gem_tiled_pread.c
index e19453e..ed7d858 100644
--- a/tests/gem_tiled_pread.c
+++ b/tests/gem_tiled_pread.c
@@ -81,7 +81,7 @@
 		linear[i] = val++;
 	drm_intel_bo_unmap(linear_bo);
 
-	intel_copy_bo(batch, bo, linear_bo, width, height, devid);
+	intel_copy_bo(batch, bo, linear_bo, width, height);
 
 	drm_intel_bo_unreference(linear_bo);
 
@@ -133,7 +133,7 @@
 
 	bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
 	drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-	batch = intel_batchbuffer_alloc(bufmgr);
+	batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
 
 	bo = create_bo(devid);