batch: Specify number of relocations to accommodate

Since relocations are variable size, depending upon generation, it is
easier to handle the resizing of the batch request inside the
BEGIN_BATCH macro. This still leaves us with having to resize commands
in a few places - which still need adaption for gen8+.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/benchmarks/intel_upload_blit_large.c b/benchmarks/intel_upload_blit_large.c
index d9287ab..689f9c4 100644
--- a/benchmarks/intel_upload_blit_large.c
+++ b/benchmarks/intel_upload_blit_large.c
@@ -97,7 +97,7 @@
 	drm_intel_bo_subdata(src_bo, 0, sizeof(data), data);
 
 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(batch->devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  (width * 4) /* dst pitch */);
diff --git a/benchmarks/intel_upload_blit_large_gtt.c b/benchmarks/intel_upload_blit_large_gtt.c
index 9859a74..601496d 100644
--- a/benchmarks/intel_upload_blit_large_gtt.c
+++ b/benchmarks/intel_upload_blit_large_gtt.c
@@ -95,7 +95,7 @@
 	drm_intel_gem_bo_unmap_gtt(src_bo);
 
 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(batch->devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  (width * 4) /* dst pitch */);
diff --git a/benchmarks/intel_upload_blit_large_map.c b/benchmarks/intel_upload_blit_large_map.c
index 771cb3c..d916737 100644
--- a/benchmarks/intel_upload_blit_large_map.c
+++ b/benchmarks/intel_upload_blit_large_map.c
@@ -98,7 +98,7 @@
 	drm_intel_bo_unmap(src_bo);
 
 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(batch->devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  (width * 4) /* dst pitch */);
diff --git a/benchmarks/intel_upload_blit_small.c b/benchmarks/intel_upload_blit_small.c
index b7d8068..b9640a4 100644
--- a/benchmarks/intel_upload_blit_small.c
+++ b/benchmarks/intel_upload_blit_small.c
@@ -108,7 +108,7 @@
 	}
 
 	/* Render the junk to the dst. */
-	BLIT_COPY_BATCH_START(batch->devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  (width * 4) /* dst pitch */);
diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 175791e..7313bb5 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -360,9 +360,7 @@
 		igt_fail(1);
 	}
 
-	BEGIN_BATCH(gen >= 8 ? 10 : 8);
-	OUT_BATCH(XY_SRC_COPY_BLT_CMD | cmd_bits |
-		  (gen >= 8 ? 8 : 6));
+	BLIT_COPY_BATCH_START(cmd_bits);
 	OUT_BATCH((br13_bits) |
 		  (0xcc << 16) | /* copy ROP */
 		  dst_pitch);
@@ -376,12 +374,14 @@
 
 #define CMD_POLY_STIPPLE_OFFSET       0x7906
 	if (gen == 5) {
+		BEGIN_BATCH(2, 0);
 		OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16);
 		OUT_BATCH(0);
+		ADVANCE_BATCH();
 	}
 
 	if (gen >= 6 && src_bo == dst_bo) {
-		BEGIN_BATCH(3);
+		BEGIN_BATCH(3, 0);
 		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 		OUT_BATCH(0);
 		OUT_BATCH(0);
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 37955a7..74cf13b 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -77,6 +77,7 @@
 /**
  * BEGIN_BATCH:
  * @n: number of DWORDS to emit
+ * @r: number of RELOCS to emit
  *
  * Prepares a batch to emit @n DWORDS, flushing it if there's not enough space
  * available.
@@ -84,10 +85,13 @@
  * This macro needs a pointer to an #intel_batchbuffer structure called batch in
  * scope.
  */
-#define BEGIN_BATCH(n) do {						\
+#define BEGIN_BATCH(n, r) do {						\
+	int __n = (n); \
 	igt_assert(batch->end == NULL); \
-	intel_batchbuffer_require_space(batch, (n)*4);			\
-	batch->end = batch->ptr + (n) * 4; \
+	if (batch->gen >= 8) __n += r;	\
+	__n *= 4; \
+	intel_batchbuffer_require_space(batch, __n);			\
+	batch->end = batch->ptr + __n; \
 } while (0)
 
 /**
@@ -150,35 +154,21 @@
 	batch->end = NULL; \
 } while(0)
 
-#define BLIT_COPY_BATCH_START(devid, flags) do { \
-	if (intel_gen(devid) >= 8) { \
-		BEGIN_BATCH(10); \
-		OUT_BATCH(XY_SRC_COPY_BLT_CMD | \
-				XY_SRC_COPY_BLT_WRITE_ALPHA | \
-				XY_SRC_COPY_BLT_WRITE_RGB | \
-				(flags) | 8); \
-	} else { \
-		BEGIN_BATCH(8); \
-		OUT_BATCH(XY_SRC_COPY_BLT_CMD | \
-				XY_SRC_COPY_BLT_WRITE_ALPHA | \
-				XY_SRC_COPY_BLT_WRITE_RGB | \
-				(flags) | 6); \
-	} \
+#define BLIT_COPY_BATCH_START(flags) do { \
+	BEGIN_BATCH(8, 2); \
+	OUT_BATCH(XY_SRC_COPY_BLT_CMD | \
+		  XY_SRC_COPY_BLT_WRITE_ALPHA | \
+		  XY_SRC_COPY_BLT_WRITE_RGB | \
+		  (flags) | \
+		  (6 + (2*batch->gen >= 8))); \
 } while(0)
 
-#define COLOR_BLIT_COPY_BATCH_START(devid, flags) do { \
-	if (intel_gen(devid) >= 8) { \
-		BEGIN_BATCH(8); \
-		OUT_BATCH(MI_NOOP); \
-		OUT_BATCH(XY_COLOR_BLT_CMD_NOLEN | 0x5 | \
-				COLOR_BLT_WRITE_ALPHA | \
-				XY_COLOR_BLT_WRITE_RGB); \
-	} else { \
-		BEGIN_BATCH(6); \
-		OUT_BATCH(XY_COLOR_BLT_CMD_NOLEN | 0x4 | \
-				COLOR_BLT_WRITE_ALPHA | \
-				XY_COLOR_BLT_WRITE_RGB); \
-	} \
+#define COLOR_BLIT_COPY_BATCH_START(flags) do { \
+	BEGIN_BATCH(6, 1); \
+	OUT_BATCH(XY_COLOR_BLT_CMD_NOLEN | \
+		  COLOR_BLT_WRITE_ALPHA | \
+		  XY_COLOR_BLT_WRITE_RGB | \
+		  (4 + (batch->gen >= 8))); \
 } while(0)
 
 void
diff --git a/tests/drm_vma_limiter_cached.c b/tests/drm_vma_limiter_cached.c
index 9383587..74d0c1a 100644
--- a/tests/drm_vma_limiter_cached.c
+++ b/tests/drm_vma_limiter_cached.c
@@ -81,7 +81,7 @@
 	/* put some load onto the gpu to keep the light buffers active for long
 	 * enough */
 	for (i = 0; i < 10000; i++) {
-		BLIT_COPY_BATCH_START(batch->devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  4096);
@@ -110,7 +110,7 @@
 			drm_intel_gem_bo_unmap_gtt(bo[j]);
 
 			/* put it onto the active list ... */
-			COLOR_BLIT_COPY_BATCH_START(intel_get_drm_devid(fd), 0);
+			COLOR_BLIT_COPY_BATCH_START(0);
 			OUT_BATCH((3 << 24) | /* 32 bits */
 				  128);
 			OUT_BATCH(0); /* dst x1,y1 */
diff --git a/tests/gem_bad_address.c b/tests/gem_bad_address.c
index e7a9587..4a4a570 100644
--- a/tests/gem_bad_address.c
+++ b/tests/gem_bad_address.c
@@ -50,7 +50,7 @@
 static void
 bad_store(void)
 {
-	BEGIN_BATCH(4);
+	BEGIN_BATCH(4, 0);
 	OUT_BATCH(MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL | 1 << 21);
 	OUT_BATCH(0);
 	OUT_BATCH(BAD_GTT_DEST);
diff --git a/tests/gem_bad_batch.c b/tests/gem_bad_batch.c
index 7f92a93..e3de3b6 100644
--- a/tests/gem_bad_batch.c
+++ b/tests/gem_bad_batch.c
@@ -48,7 +48,7 @@
 static void
 bad_batch(void)
 {
-	BEGIN_BATCH(2);
+	BEGIN_BATCH(2, 0);
 	OUT_BATCH(MI_BATCH_BUFFER_START);
 	OUT_BATCH(0);
 	ADVANCE_BATCH();
diff --git a/tests/gem_bad_blit.c b/tests/gem_bad_blit.c
index 33d1ac8..b467ba8 100644
--- a/tests/gem_bad_blit.c
+++ b/tests/gem_bad_blit.c
@@ -78,7 +78,7 @@
 		cmd_bits |= XY_SRC_COPY_BLT_DST_TILED;
 	}
 
-	BLIT_COPY_BATCH_START(devid, cmd_bits);
+	BLIT_COPY_BATCH_START(cmd_bits);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  dst_pitch);
diff --git a/tests/gem_caching.c b/tests/gem_caching.c
index ddd7d94..b7f4bfb 100644
--- a/tests/gem_caching.c
+++ b/tests/gem_caching.c
@@ -63,7 +63,7 @@
 static void
 copy_bo(drm_intel_bo *src, drm_intel_bo *dst)
 {
-	BLIT_COPY_BATCH_START(devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  4096);
diff --git a/tests/gem_cs_prefetch.c b/tests/gem_cs_prefetch.c
index ad5f4c6..e64b224 100644
--- a/tests/gem_cs_prefetch.c
+++ b/tests/gem_cs_prefetch.c
@@ -134,7 +134,7 @@
 
 		/* copy the sample batch with the gpu to the new one, so that we
 		 * also test the unmappable part of the gtt. */
-		BLIT_COPY_BATCH_START(batch->devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  4096);
diff --git a/tests/gem_double_irq_loop.c b/tests/gem_double_irq_loop.c
index 0fbb46e..f9dab7c 100644
--- a/tests/gem_double_irq_loop.c
+++ b/tests/gem_double_irq_loop.c
@@ -62,7 +62,7 @@
 	int i;
 
 	for (i = 0; i < 0x800; i++) {
-		BLIT_COPY_BATCH_START(batch->devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  4*4096);
@@ -75,7 +75,7 @@
 		ADVANCE_BATCH();
 		intel_batchbuffer_flush(batch);
 
-		BEGIN_BATCH(4);
+		BEGIN_BATCH(4, 1);
 		OUT_BATCH(MI_FLUSH_DW | 1);
 		OUT_BATCH(0); /* reserved */
 		OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
diff --git a/tests/gem_dummy_reloc_loop.c b/tests/gem_dummy_reloc_loop.c
index 4fe0786..7a97140 100644
--- a/tests/gem_dummy_reloc_loop.c
+++ b/tests/gem_dummy_reloc_loop.c
@@ -71,23 +71,21 @@
 	int i;
 
 	for (i = 0; i < 0x100000; i++) {
+		BEGIN_BATCH(4, 1);
 		if (ring == I915_EXEC_RENDER) {
-			BEGIN_BATCH(4);
 			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
 			OUT_BATCH(0xffffffff); /* compare dword */
 			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
 					I915_GEM_DOMAIN_RENDER, 0);
 			OUT_BATCH(MI_NOOP);
-			ADVANCE_BATCH();
 		} else {
-			BEGIN_BATCH(4);
 			OUT_BATCH(MI_FLUSH_DW | 1);
 			OUT_BATCH(0); /* reserved */
 			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
 					I915_GEM_DOMAIN_RENDER, 0);
 			OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
-			ADVANCE_BATCH();
 		}
+		ADVANCE_BATCH();
 		intel_batchbuffer_flush_on_ring(batch, ring);
 
 		drm_intel_bo_map(target_buffer, 0);
@@ -106,23 +104,21 @@
 	for (i = 0; i < 0x100000; i++) {
 		int ring = random() % num_rings + 1;
 
+		BEGIN_BATCH(4, 1);
 		if (ring == I915_EXEC_RENDER) {
-			BEGIN_BATCH(4);
 			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
 			OUT_BATCH(0xffffffff); /* compare dword */
 			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
 					I915_GEM_DOMAIN_RENDER, 0);
 			OUT_BATCH(MI_NOOP);
-			ADVANCE_BATCH();
 		} else {
-			BEGIN_BATCH(4);
 			OUT_BATCH(MI_FLUSH_DW | 1);
 			OUT_BATCH(0); /* reserved */
 			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
 					I915_GEM_DOMAIN_RENDER, 0);
 			OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
-			ADVANCE_BATCH();
 		}
+		ADVANCE_BATCH();
 		intel_batchbuffer_flush_on_ring(batch, ring);
 
 		drm_intel_bo_map(target_buffer, 0);
@@ -148,23 +144,21 @@
 		mindex = random() % NUM_FD;
 		batch = mbatch[mindex];
 
+		BEGIN_BATCH(4, 1);
 		if (ring == I915_EXEC_RENDER) {
-			BEGIN_BATCH(4);
 			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
 			OUT_BATCH(0xffffffff); /* compare dword */
 			OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
 					I915_GEM_DOMAIN_RENDER, 0);
 			OUT_BATCH(MI_NOOP);
-			ADVANCE_BATCH();
 		} else {
-			BEGIN_BATCH(4);
 			OUT_BATCH(MI_FLUSH_DW | 1);
 			OUT_BATCH(0); /* reserved */
 			OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
 					I915_GEM_DOMAIN_RENDER, 0);
 			OUT_BATCH(MI_NOOP | (1<<22) | (0xf));
-			ADVANCE_BATCH();
 		}
+		ADVANCE_BATCH();
 		intel_batchbuffer_flush_on_ring(batch, ring);
 
 		drm_intel_bo_map(target_buffer, 0);
diff --git a/tests/gem_exec_bad_domains.c b/tests/gem_exec_bad_domains.c
index 7641f8f..9901281 100644
--- a/tests/gem_exec_bad_domains.c
+++ b/tests/gem_exec_bad_domains.c
@@ -163,13 +163,13 @@
 	}
 
 	igt_subtest("cpu-domain") {
-		BEGIN_BATCH(2);
+		BEGIN_BATCH(2, 1);
 		OUT_BATCH(0);
 		OUT_RELOC(tmp, I915_GEM_DOMAIN_CPU, 0, 0);
 		ADVANCE_BATCH();
 		igt_assert(run_batch() == -EINVAL);
 
-		BEGIN_BATCH(2);
+		BEGIN_BATCH(2, 1);
 		OUT_BATCH(0);
 		OUT_RELOC(tmp, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU, 0);
 		ADVANCE_BATCH();
@@ -177,13 +177,13 @@
 	}
 
 	igt_subtest("gtt-domain") {
-		BEGIN_BATCH(2);
+		BEGIN_BATCH(2, 1);
 		OUT_BATCH(0);
 		OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT, 0, 0);
 		ADVANCE_BATCH();
 		igt_assert(run_batch() == -EINVAL);
 
-		BEGIN_BATCH(2);
+		BEGIN_BATCH(2, 1);
 		OUT_BATCH(0);
 		OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT, 0);
 		ADVANCE_BATCH();
@@ -193,7 +193,7 @@
 	/* Note: Older kernels disallow this. Punt on the skip check though
 	 * since this is too old. */
 	igt_subtest("conflicting-write-domain") {
-		BEGIN_BATCH(4);
+		BEGIN_BATCH(4, 2);
 		OUT_BATCH(0);
 		OUT_RELOC(tmp, I915_GEM_DOMAIN_RENDER,
 			  I915_GEM_DOMAIN_RENDER, 0);
@@ -208,14 +208,14 @@
 		multi_write_domain(fd);
 
 	igt_subtest("invalid-gpu-domain") {
-		BEGIN_BATCH(2);
+		BEGIN_BATCH(2, 1);
 		OUT_BATCH(0);
 		OUT_RELOC(tmp, ~(I915_GEM_GPU_DOMAINS | I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU),
 			  0, 0);
 		ADVANCE_BATCH();
 		igt_assert(run_batch() == -EINVAL);
 
-		BEGIN_BATCH(2);
+		BEGIN_BATCH(2, 1);
 		OUT_BATCH(0);
 		OUT_RELOC(tmp, I915_GEM_DOMAIN_GTT << 1,
 			  I915_GEM_DOMAIN_GTT << 1, 0);
diff --git a/tests/gem_fenced_exec_thrash.c b/tests/gem_fenced_exec_thrash.c
index 6ef21a4..85ead30 100644
--- a/tests/gem_fenced_exec_thrash.c
+++ b/tests/gem_fenced_exec_thrash.c
@@ -85,7 +85,7 @@
 	}
 
 	for (i = 0; i < 5; i++) {
-		BLIT_COPY_BATCH_START(devid, tile_flags);
+		BLIT_COPY_BATCH_START(tile_flags);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  pitch);
@@ -97,8 +97,8 @@
 		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (IS_GEN6(devid) || IS_GEN7(devid)) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
diff --git a/tests/gem_hang.c b/tests/gem_hang.c
index d5eb564..7a7c8ac 100644
--- a/tests/gem_hang.c
+++ b/tests/gem_hang.c
@@ -54,7 +54,7 @@
 	cmd = bad_pipe ? MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW :
 		MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
 
-	BEGIN_BATCH(6);
+	BEGIN_BATCH(6, 0);
 	/* The documentation says that the LOAD_SCAN_LINES command
 	 * always comes in pairs. Don't ask me why. */
 	OUT_BATCH(MI_LOAD_SCAN_LINES_INCL | (bad_pipe << 20));
diff --git a/tests/gem_hangcheck_forcewake.c b/tests/gem_hangcheck_forcewake.c
index 219a265..ec74c51 100644
--- a/tests/gem_hangcheck_forcewake.c
+++ b/tests/gem_hangcheck_forcewake.c
@@ -88,9 +88,8 @@
 		pitch /= 4;
 
 	for (i = 0; i < 10000; i++) {
-		BLIT_COPY_BATCH_START(devid,
-				XY_SRC_COPY_BLT_SRC_TILED |
-				XY_SRC_COPY_BLT_DST_TILED);
+		BLIT_COPY_BATCH_START(XY_SRC_COPY_BLT_SRC_TILED |
+				      XY_SRC_COPY_BLT_DST_TILED);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  pitch);
@@ -102,8 +101,8 @@
 		OUT_RELOC_FENCED(bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (IS_GEN6(devid) || IS_GEN7(devid)) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
diff --git a/tests/gem_multi_bsd_sync_loop.c b/tests/gem_multi_bsd_sync_loop.c
index 003da42..dec738b 100644
--- a/tests/gem_multi_bsd_sync_loop.c
+++ b/tests/gem_multi_bsd_sync_loop.c
@@ -78,7 +78,7 @@
 		mindex = random() % NUM_FD;
 		batch = mbatch[mindex];
 		if (ring == I915_EXEC_RENDER) {
-			BEGIN_BATCH(4);
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
 			OUT_BATCH(0xffffffff); /* compare dword */
 			OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
@@ -86,7 +86,7 @@
 			OUT_BATCH(MI_NOOP);
 			ADVANCE_BATCH();
 		} else {
-			BEGIN_BATCH(4);
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(MI_FLUSH_DW | 1);
 			OUT_BATCH(0); /* reserved */
 			OUT_RELOC(mbuffer[mindex], I915_GEM_DOMAIN_RENDER,
diff --git a/tests/gem_non_secure_batch.c b/tests/gem_non_secure_batch.c
index 01101e9..d8969da 100644
--- a/tests/gem_non_secure_batch.c
+++ b/tests/gem_non_secure_batch.c
@@ -66,7 +66,7 @@
 	for (i = 0; i < 0x100; i++) {
 		int ring = random() % num_rings + 1;
 
-		BEGIN_BATCH(4);
+		BEGIN_BATCH(4, 0);
 		OUT_BATCH(MI_LOAD_REGISTER_IMM | 1);
 		OUT_BATCH(0x203c); /* RENDER RING CTL */
 		OUT_BATCH(0); /* try to stop the ring */
diff --git a/tests/gem_partial_pwrite_pread.c b/tests/gem_partial_pwrite_pread.c
index 92cc057..b9ffeec 100644
--- a/tests/gem_partial_pwrite_pread.c
+++ b/tests/gem_partial_pwrite_pread.c
@@ -63,7 +63,7 @@
 static void
 copy_bo(drm_intel_bo *src, drm_intel_bo *dst)
 {
-	BLIT_COPY_BATCH_START(devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  4096);
diff --git a/tests/gem_persistent_relocs.c b/tests/gem_persistent_relocs.c
index 11e7b6c..585eda8 100644
--- a/tests/gem_persistent_relocs.c
+++ b/tests/gem_persistent_relocs.c
@@ -125,7 +125,7 @@
 	}
 
 	for (i = 0; i < 5; i++) {
-		BLIT_COPY_BATCH_START(devid, tile_flags);
+		BLIT_COPY_BATCH_START(tile_flags);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  pitch);
@@ -137,8 +137,8 @@
 		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (intel_gen(devid) >= 6) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
diff --git a/tests/gem_pipe_control_store_loop.c b/tests/gem_pipe_control_store_loop.c
index 27d1091..86681f2 100644
--- a/tests/gem_pipe_control_store_loop.c
+++ b/tests/gem_pipe_control_store_loop.c
@@ -77,7 +77,7 @@
 		igt_assert(target_bo);
 
 		if (preuse_buffer) {
-			COLOR_BLIT_COPY_BATCH_START(devid, 0);
+			COLOR_BLIT_COPY_BATCH_START(0);
 			OUT_BATCH((3 << 24) | (0xf0 << 16) | 64);
 			OUT_BATCH(0);
 			OUT_BATCH(1 << 16 | 1);
@@ -99,8 +99,8 @@
 		/* gem_storedw_batches_loop.c is a bit overenthusiastic with
 		 * creating new batchbuffers - with buffer reuse disabled, the
 		 * support code will do that for us. */
-		if (intel_gen(devid) >= 8) {
-			BEGIN_BATCH(5);
+		if (batch->gen >= 8) {
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(GFX_OP_PIPE_CONTROL + 1);
 			OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
 			OUT_RELOC_FENCED(target_bo,
@@ -109,10 +109,10 @@
 			OUT_BATCH(val); /* write data */
 			ADVANCE_BATCH();
 
-		} else if (intel_gen(devid) >= 6) {
+		} else if (batch->gen >= 6) {
 			/* work-around hw issue, see intel_emit_post_sync_nonzero_flush
 			 * in mesa sources. */
-			BEGIN_BATCH(4);
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(GFX_OP_PIPE_CONTROL);
 			OUT_BATCH(PIPE_CONTROL_CS_STALL |
 			     PIPE_CONTROL_STALL_AT_SCOREBOARD);
@@ -120,7 +120,7 @@
 			OUT_BATCH(0); /* write data */
 			ADVANCE_BATCH();
 
-			BEGIN_BATCH(4);
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(GFX_OP_PIPE_CONTROL);
 			OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
 			OUT_RELOC(target_bo,
@@ -128,8 +128,8 @@
 			     PIPE_CONTROL_GLOBAL_GTT);
 			OUT_BATCH(val); /* write data */
 			ADVANCE_BATCH();
-		} else if (intel_gen(devid) >= 4) {
-			BEGIN_BATCH(4);
+		} else if (batch->gen >= 4) {
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_WC_FLUSH |
 					PIPE_CONTROL_TC_FLUSH |
 					PIPE_CONTROL_WRITE_IMMEDIATE | 2);
diff --git a/tests/gem_reloc_vs_gpu.c b/tests/gem_reloc_vs_gpu.c
index d799bb9..bd6acdf 100644
--- a/tests/gem_reloc_vs_gpu.c
+++ b/tests/gem_reloc_vs_gpu.c
@@ -117,7 +117,7 @@
 	}
 
 	for (i = 0; i < 10; i++) {
-		BLIT_COPY_BATCH_START(devid, tile_flags);
+		BLIT_COPY_BATCH_START(tile_flags);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  pitch);
@@ -129,8 +129,8 @@
 		OUT_RELOC_FENCED(dummy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (intel_gen(devid) >= 6) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
diff --git a/tests/gem_ring_sync_loop.c b/tests/gem_ring_sync_loop.c
index d4e7e2a..fc510e4 100644
--- a/tests/gem_ring_sync_loop.c
+++ b/tests/gem_ring_sync_loop.c
@@ -67,7 +67,7 @@
 		int ring = random() % num_rings + 1;
 
 		if (ring == I915_EXEC_RENDER) {
-			BEGIN_BATCH(4);
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
 			OUT_BATCH(0xffffffff); /* compare dword */
 			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
@@ -75,7 +75,7 @@
 			OUT_BATCH(MI_NOOP);
 			ADVANCE_BATCH();
 		} else {
-			BEGIN_BATCH(4);
+			BEGIN_BATCH(4, 1);
 			OUT_BATCH(MI_FLUSH_DW | 1);
 			OUT_BATCH(0); /* reserved */
 			OUT_RELOC(target_buffer, I915_GEM_DOMAIN_RENDER,
diff --git a/tests/gem_ringfill.c b/tests/gem_ringfill.c
index 46a0e77..3ecd25e 100644
--- a/tests/gem_ringfill.c
+++ b/tests/gem_ringfill.c
@@ -178,7 +178,7 @@
 		     unsigned w, unsigned h,
 		     struct igt_buf *dst, unsigned dst_x, unsigned dst_y)
 {
-	BLIT_COPY_BATCH_START(batch->devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  dst->stride);
diff --git a/tests/gem_set_tiling_vs_blt.c b/tests/gem_set_tiling_vs_blt.c
index 1b7e459..4de325c 100644
--- a/tests/gem_set_tiling_vs_blt.c
+++ b/tests/gem_set_tiling_vs_blt.c
@@ -86,7 +86,7 @@
 	busy_bo = drm_intel_bo_alloc(bufmgr, "busy bo bo", 16*1024*1024, 4096);
 
 	for (i = 0; i < 250; i++) {
-		BLIT_COPY_BATCH_START(devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  2*1024*4);
@@ -98,8 +98,8 @@
 		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (IS_GEN6(devid) || IS_GEN7(devid)) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
@@ -157,7 +157,7 @@
 		blt_bits = XY_SRC_COPY_BLT_SRC_TILED;
 	}
 
-	BLIT_COPY_BATCH_START(devid, blt_bits);
+	BLIT_COPY_BATCH_START(blt_bits);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  stride);
@@ -181,7 +181,7 @@
 	/* Note: We don't care about gen4+ here because the blitter doesn't use
 	 * fences there. So not setting tiling flags on the tiled buffer is ok.
 	 */
-	BLIT_COPY_BATCH_START(devid, 0);
+	BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  stride_after);
diff --git a/tests/gem_storedw_loop_blt.c b/tests/gem_storedw_loop_blt.c
index 7f12b52..43750cc 100644
--- a/tests/gem_storedw_loop_blt.c
+++ b/tests/gem_storedw_loop_blt.c
@@ -59,22 +59,19 @@
 	if (!has_ppgtt)
 		cmd |= MI_MEM_VIRTUAL;
 
-	if (intel_gen(devid) >= 8) {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
+	BEGIN_BATCH(4, 0);
+	OUT_BATCH(cmd);
+	if (batch->gen >= 8) {
 		OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	} else {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
 		OUT_BATCH(0); /* reserved */
 		OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	}
+	ADVANCE_BATCH();
 }
 
 static void
diff --git a/tests/gem_storedw_loop_bsd.c b/tests/gem_storedw_loop_bsd.c
index f89d522..d5451be 100644
--- a/tests/gem_storedw_loop_bsd.c
+++ b/tests/gem_storedw_loop_bsd.c
@@ -59,23 +59,19 @@
 	if (!has_ppgtt)
 		cmd |= MI_MEM_VIRTUAL;
 
-	if (intel_gen(devid) >= 8) {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
+	BEGIN_BATCH(4, 0);
+	OUT_BATCH(cmd);
+	if (batch->gen >= 8) {
 		OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
-		OUT_BATCH(0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	} else {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
 		OUT_BATCH(0); /* reserved */
 		OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	}
+	ADVANCE_BATCH();
 }
 
 static void
diff --git a/tests/gem_storedw_loop_render.c b/tests/gem_storedw_loop_render.c
index 9defc6d..e1d3dad 100644
--- a/tests/gem_storedw_loop_render.c
+++ b/tests/gem_storedw_loop_render.c
@@ -59,23 +59,19 @@
 	if (!has_ppgtt)
 		cmd |= MI_MEM_VIRTUAL;
 
-	if (intel_gen(devid) >= 8) {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
+	BEGIN_BATCH(4, 0);
+	OUT_BATCH(cmd);
+	if (batch->gen >= 8) {
 		OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
-		OUT_BATCH(0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	} else {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
 		OUT_BATCH(0); /* reserved */
 		OUT_RELOC(dest, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	}
+	ADVANCE_BATCH();
 }
 
 static void
diff --git a/tests/gem_storedw_loop_vebox.c b/tests/gem_storedw_loop_vebox.c
index 7f43167..5e5536f 100644
--- a/tests/gem_storedw_loop_vebox.c
+++ b/tests/gem_storedw_loop_vebox.c
@@ -62,9 +62,9 @@
 	cmd = MI_STORE_DWORD_IMM;
 
 	for (i = 0; i < SLOW_QUICK(0x2000, 0x10); i++) {
-		BEGIN_BATCH(4);
+		BEGIN_BATCH(4, 0);
 		OUT_BATCH(cmd);
-		if (intel_gen(batch->devid) < 8)
+		if (batch->gen < 8)
 			OUT_BATCH(0); /* reserved */
 		OUT_RELOC(target_buffer, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
diff --git a/tests/gem_stress.c b/tests/gem_stress.c
index c3dd531..8d62b03 100644
--- a/tests/gem_stress.c
+++ b/tests/gem_stress.c
@@ -163,7 +163,7 @@
 	}
 
 	/* copy lower half to upper half */
-	BLIT_COPY_BATCH_START(devid, cmd_bits);
+	BLIT_COPY_BATCH_START(cmd_bits);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  dst_pitch);
@@ -175,8 +175,8 @@
 	OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 	ADVANCE_BATCH();
 
-	if (IS_GEN6(devid) || IS_GEN7(devid)) {
-		BEGIN_BATCH(3);
+	if (batch->gen >= 6) {
+		BEGIN_BATCH(3, 0);
 		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 		OUT_BATCH(0);
 		OUT_BATCH(0);
diff --git a/tests/gem_tiled_partial_pwrite_pread.c b/tests/gem_tiled_partial_pwrite_pread.c
index a4a9d0b..cf8f48d 100644
--- a/tests/gem_tiled_partial_pwrite_pread.c
+++ b/tests/gem_tiled_partial_pwrite_pread.c
@@ -84,7 +84,7 @@
 		cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED;
 	}
 
-	BLIT_COPY_BATCH_START(devid, cmd_bits);
+	BLIT_COPY_BATCH_START(cmd_bits);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xcc << 16) | /* copy ROP */
 		  dst_pitch);
diff --git a/tests/gem_unfence_active_buffers.c b/tests/gem_unfence_active_buffers.c
index 2c221a2..fffe3a7 100644
--- a/tests/gem_unfence_active_buffers.c
+++ b/tests/gem_unfence_active_buffers.c
@@ -86,7 +86,7 @@
 	busy_bo = drm_intel_bo_alloc(bufmgr, "busy bo bo", 16*1024*1024, 4096);
 
 	for (i = 0; i < 250; i++) {
-		BLIT_COPY_BATCH_START(devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  2*1024*4);
@@ -98,8 +98,8 @@
 		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (IS_GEN6(devid) || IS_GEN7(devid)) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
@@ -119,7 +119,7 @@
 
 		drm_intel_bo_disable_reuse(test_bo);
 
-		BLIT_COPY_BATCH_START(devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  TEST_STRIDE);
@@ -138,7 +138,7 @@
 
 	/* launch a few batchs to ensure the damaged slab objects get reused. */
 	for (i = 0; i < 10; i++) {
-		BLIT_COPY_BATCH_START(devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  2*1024*4);
@@ -150,8 +150,8 @@
 		OUT_RELOC_FENCED(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (IS_GEN6(devid) || IS_GEN7(devid)) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 8) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
diff --git a/tests/gem_unref_active_buffers.c b/tests/gem_unref_active_buffers.c
index ca77598..7a1bc93 100644
--- a/tests/gem_unref_active_buffers.c
+++ b/tests/gem_unref_active_buffers.c
@@ -74,7 +74,7 @@
 		load_bo = drm_intel_bo_alloc(bufmgr, "target bo", 1024*4096, 4096);
 		igt_assert(load_bo);
 
-		BLIT_COPY_BATCH_START(batch->devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  4096);
diff --git a/tests/gem_wait_render_timeout.c b/tests/gem_wait_render_timeout.c
index 0a833f7..e05b7ae 100644
--- a/tests/gem_wait_render_timeout.c
+++ b/tests/gem_wait_render_timeout.c
@@ -104,16 +104,8 @@
 	const unsigned short height = pages/4;
 	const unsigned short width =  4096;
 
-	if (intel_gen(batch->devid) >= 8) {
-		BEGIN_BATCH(8);
-		OUT_BATCH(MI_NOOP);
-		OUT_BATCH(XY_COLOR_BLT_CMD_NOLEN | 5 |
-			  COLOR_BLT_WRITE_ALPHA	| XY_COLOR_BLT_WRITE_RGB);
-	} else {
-		BEGIN_BATCH(6);
-		OUT_BATCH(XY_COLOR_BLT_CMD_NOLEN | 4 |
-			  COLOR_BLT_WRITE_ALPHA	| XY_COLOR_BLT_WRITE_RGB);
-	}
+	COLOR_BLIT_COPY_BATCH_START(COLOR_BLT_WRITE_ALPHA |
+				    XY_COLOR_BLT_WRITE_RGB);
 	OUT_BATCH((3 << 24)	| /* 32 Bit Color */
 		  (0xF0 << 16)	| /* Raster OP copy background register */
 		  0);		  /* Dest pitch is 0 */
diff --git a/tests/gem_write_read_ring_switch.c b/tests/gem_write_read_ring_switch.c
index f09b3db..f3407f9 100644
--- a/tests/gem_write_read_ring_switch.c
+++ b/tests/gem_write_read_ring_switch.c
@@ -80,7 +80,7 @@
 	/* put some load onto the gpu to keep the light buffers active for long
 	 * enough */
 	for (i = 0; i < 1000; i++) {
-		BLIT_COPY_BATCH_START(batch->devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  4096);
@@ -93,7 +93,7 @@
 		ADVANCE_BATCH();
 	}
 
-	COLOR_BLIT_COPY_BATCH_START(batch->devid, 0);
+	COLOR_BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((3 << 24) | /* 32 bits */
 		  (0xff << 16) |
 		  128);
@@ -107,7 +107,7 @@
 
 	/* Emit an empty batch so that signalled seqno on the target ring >
 	 * signalled seqnoe on the blt ring. This is required to hit the bug. */
-	BEGIN_BATCH(2);
+	BEGIN_BATCH(2, 0);
 	OUT_BATCH(MI_NOOP);
 	OUT_BATCH(MI_NOOP);
 	ADVANCE_BATCH();
@@ -116,14 +116,14 @@
 	/* For the ring->ring sync it's important to only emit a read reloc, for
 	 * otherwise the obj->last_write_seqno will be updated. */
 	if (ring == I915_EXEC_RENDER) {
-		BEGIN_BATCH(4);
+		BEGIN_BATCH(4, 1);
 		OUT_BATCH(MI_COND_BATCH_BUFFER_END | MI_DO_COMPARE);
 		OUT_BATCH(0xffffffff); /* compare dword */
 		OUT_RELOC(target_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		OUT_BATCH(MI_NOOP);
 		ADVANCE_BATCH();
 	} else {
-		BEGIN_BATCH(4);
+		BEGIN_BATCH(4, 1);
 		OUT_BATCH(MI_FLUSH_DW | 1);
 		OUT_BATCH(0); /* reserved */
 		OUT_RELOC(target_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
diff --git a/tests/kms_fbc_crc.c b/tests/kms_fbc_crc.c
index 95d4041..4675ee5 100644
--- a/tests/kms_fbc_crc.c
+++ b/tests/kms_fbc_crc.c
@@ -83,20 +83,33 @@
 	return test_modes[mode];
 }
 
-static void fill_blt(data_t *data, uint32_t handle, unsigned char color)
+static void fill_blt(data_t *data,
+		     uint32_t handle,
+		     struct igt_fb *fb,
+		     unsigned char color)
 {
 	drm_intel_bo *dst = gem_handle_to_libdrm_bo(data->bufmgr,
 						    data->drm_fd,
 						    "", handle);
 	struct intel_batchbuffer *batch;
+	unsigned flags;
+	int pitch;
 
 	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
 	igt_assert(batch);
 
-	COLOR_BLIT_COPY_BATCH_START(batch->devid, 0);
-	OUT_BATCH((0 << 24) | (0xf0 << 16) | 0);
+	pitch = fb->stride;
+	flags = XY_COLOR_BLT_WRITE_ALPHA |
+		XY_COLOR_BLT_WRITE_RGB;
+	if (fb->tiling && batch->gen >= 4) {
+		flags |= XY_COLOR_BLT_TILED;
+		pitch /= 4;
+	}
+
+	COLOR_BLIT_COPY_BATCH_START(flags);
+	OUT_BATCH(3 << 24 | 0xf0 << 16 | pitch);
 	OUT_BATCH(0);
-	OUT_BATCH(1 << 16 | 4);
+	OUT_BATCH(fb->height << 16 | fb->width);
 	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
 	OUT_BATCH(color);
 	ADVANCE_BATCH();
@@ -127,7 +140,7 @@
 	igt_assert(batch);
 
 	/* add the reloc to make sure the kernel will think we write to dst */
-	BEGIN_BATCH(4);
+	BEGIN_BATCH(4, 1);
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 	OUT_BATCH(MI_NOOP);
 	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
@@ -226,7 +239,7 @@
 		break;
 	case TEST_BLT:
 	case TEST_PAGE_FLIP_AND_BLT:
-		fill_blt(data, handle, 0xff);
+		fill_blt(data, handle, data->fb, ~0);
 		break;
 	case TEST_RENDER:
 	case TEST_CONTEXT:
diff --git a/tests/kms_fence_pin_leak.c b/tests/kms_fence_pin_leak.c
index 93f4e16..69f36b8 100644
--- a/tests/kms_fence_pin_leak.c
+++ b/tests/kms_fence_pin_leak.c
@@ -54,7 +54,7 @@
 	igt_assert(batch);
 
 	/* add the reloc to make sure the kernel will think we write to dst */
-	BEGIN_BATCH(4);
+	BEGIN_BATCH(4, 1);
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 	OUT_BATCH(MI_NOOP);
 	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
diff --git a/tests/kms_flip.c b/tests/kms_flip.c
index cb94f0b..3d3aa9b 100644
--- a/tests/kms_flip.c
+++ b/tests/kms_flip.c
@@ -179,7 +179,7 @@
 	igt_assert(target_bo);
 
 	for (i = 0; i < limit; i++) {
-		BLIT_COPY_BATCH_START(devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  pitch);
@@ -191,8 +191,8 @@
 		OUT_RELOC_FENCED(target_bo, I915_GEM_DOMAIN_RENDER, 0, 0);
 		ADVANCE_BATCH();
 
-		if (IS_GEN6(devid) || IS_GEN7(devid)) {
-			BEGIN_BATCH(3);
+		if (batch->gen >= 6) {
+			BEGIN_BATCH(3, 0);
 			OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
 			OUT_BATCH(0);
 			OUT_BATCH(0);
diff --git a/tests/kms_mmio_vs_cs_flip.c b/tests/kms_mmio_vs_cs_flip.c
index a34809a..c8bc702 100644
--- a/tests/kms_mmio_vs_cs_flip.c
+++ b/tests/kms_mmio_vs_cs_flip.c
@@ -55,7 +55,7 @@
 	igt_assert(bo);
 
 	/* add relocs to make sure the kernel will think we write to dst */
-	BEGIN_BATCH(4);
+	BEGIN_BATCH(4, 1);
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 	OUT_BATCH(MI_NOOP);
 	OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
@@ -81,7 +81,7 @@
 	pitch = w * 4;
 
 	for (i = 0; i < 40; i++) {
-		BLIT_COPY_BATCH_START(data->devid, 0);
+		BLIT_COPY_BATCH_START(0);
 		OUT_BATCH((3 << 24) | /* 32 bits */
 			  (0xcc << 16) | /* copy ROP */
 			  pitch);
diff --git a/tests/kms_psr_sink_crc.c b/tests/kms_psr_sink_crc.c
index 909d6ca..49f9549 100644
--- a/tests/kms_psr_sink_crc.c
+++ b/tests/kms_psr_sink_crc.c
@@ -180,8 +180,7 @@
 	batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
 	igt_assert(batch);
 
-	BEGIN_BATCH(5);
-	OUT_BATCH(COLOR_BLT_CMD);
+	COLOR_BLIT_COPY_BATCH_START(0);
 	OUT_BATCH((1 << 24) | (0xf0 << 16) | 0);
 	OUT_BATCH(1 << 16 | 4);
 	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
@@ -214,7 +213,7 @@
 	igt_assert(batch);
 
 	/* add the reloc to make sure the kernel will think we write to dst */
-	BEGIN_BATCH(4);
+	BEGIN_BATCH(4, 1);
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 	OUT_BATCH(MI_NOOP);
 	OUT_RELOC(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
diff --git a/tests/pm_rps.c b/tests/pm_rps.c
index e285178..ef382ec 100644
--- a/tests/pm_rps.c
+++ b/tests/pm_rps.c
@@ -178,23 +178,19 @@
 	if (!lh.has_ppgtt)
 		cmd |= MI_MEM_VIRTUAL;
 
-	if (intel_gen(lh.devid) >= 8) {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
+	BEGIN_BATCH(4, 1);
+	OUT_BATCH(cmd);
+	if (batch->gen >= 8) {
 		OUT_RELOC(lh.target_buffer, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
-		OUT_BATCH(0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	} else {
-		BEGIN_BATCH(4);
-		OUT_BATCH(cmd);
 		OUT_BATCH(0); /* reserved */
 		OUT_RELOC(lh.target_buffer, I915_GEM_DOMAIN_INSTRUCTION,
 			  I915_GEM_DOMAIN_INSTRUCTION, 0);
 		OUT_BATCH(val);
-		ADVANCE_BATCH();
 	}
+	ADVANCE_BATCH();
 }
 
 #define LOAD_HELPER_PAUSE_USEC 500
diff --git a/tests/prime_nv_pcopy.c b/tests/prime_nv_pcopy.c
index fb0f62f..218f4ba 100644
--- a/tests/prime_nv_pcopy.c
+++ b/tests/prime_nv_pcopy.c
@@ -166,7 +166,7 @@
 static void
 noop_intel(drm_intel_bo *bo)
 {
-	BEGIN_BATCH(3);
+	BEGIN_BATCH(3, 1);
 	OUT_BATCH(MI_NOOP);
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 	OUT_RELOC(bo, I915_GEM_DOMAIN_RENDER,
diff --git a/tools/intel_perf_counters.c b/tools/intel_perf_counters.c
index e6eafb2..739f926 100644
--- a/tools/intel_perf_counters.c
+++ b/tools/intel_perf_counters.c
@@ -332,7 +332,7 @@
 
 	stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
 
-	BEGIN_BATCH(6);
+	BEGIN_BATCH(6, 2);
 	OUT_BATCH(GEN5_MI_REPORT_PERF_COUNT | MI_COUNTER_SET_0);
 	OUT_RELOC(stats_bo,
 		  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
@@ -380,7 +380,7 @@
 
 	stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
 
-	BEGIN_BATCH(3);
+	BEGIN_BATCH(3, 1);
 	OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
 	OUT_RELOC(stats_bo,
 		  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
@@ -410,7 +410,7 @@
 
 	stats_bo = drm_intel_bo_alloc(bufmgr, "stats", 4096, 4096);
 
-	BEGIN_BATCH(3);
+	BEGIN_BATCH(3, 1);
 	OUT_BATCH(GEN6_MI_REPORT_PERF_COUNT | (3 - 2));
 	OUT_RELOC(stats_bo,
 		  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);