lib/gpu_fill: Further code unification in gpu_fill

We can unify gen7_emit_vfe_state and gen8_emit_vfe_state
functions for gpgpu/media_fill and media_spin by adding
parameters. gen8_emit_media_object was renamed to gen_*
and extended with additional offset parameters - we can
have one gen7_emit_media_objects for all tests.
I have renamed gen8_emit_media_object to gen_emit_*, because
function belongs to all gens and it would be odd to have
all named genX_* and only one without this prefix.

v2: Use #defines instead of variables as emit_vfe_state parameters.
Fixed gen7_emit_media_objects. Unified vfe state parameters
in media_spin library for gen8 and gen9 (gen9 had different values
by mistake).
v3: Fixed bug in emit_vfe_state for gen8 and gen9 in gpgpu_fill.
Moved #defines for emit_vfe_state to particular libraries.
v4: Fixed indentations in media_spin
v5: Few more style changes.

Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
Cc: Lukasz Kalamarz <lukasz.kalamarz@intel.com>
Cc: Antonio Argenziano <antonio.argenziano@intel.com>
Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Ewelina Musial <ewelina.musial@intel.com>
Reviewed-By: Ewelina Musial <ewelina.musial@intel.com>
diff --git a/lib/gpgpu_fill.c b/lib/gpgpu_fill.c
index 52925a5..9a2598b 100644
--- a/lib/gpgpu_fill.c
+++ b/lib/gpgpu_fill.c
@@ -95,6 +95,13 @@
  */
 
 #define BATCH_STATE_SPLIT 2048
+/* VFE STATE params */
+#define THREADS 1
+#define GEN7_GPGPU_URB_ENTRIES 0
+#define GEN8_GPGPU_URB_ENTRIES 1
+#define GPGPU_URB_SIZE 0
+#define GPGPU_CURBE_SIZE 1
+#define GEN7_VFE_STATE_GPGPU_MODE 1
 
 void
 gen7_gpgpu_fillfunc(struct intel_batchbuffer *batch,
@@ -129,7 +136,9 @@
 	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
 
 	gen7_emit_state_base_address(batch);
-	gen7_emit_vfe_state_gpgpu(batch);
+	gen7_emit_vfe_state(batch, THREADS, GEN7_GPGPU_URB_ENTRIES,
+			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE,
+			    GEN7_VFE_STATE_GPGPU_MODE);
 	gen7_emit_curbe_load(batch, curbe_buffer);
 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 	gen7_emit_gpgpu_walk(batch, x, y, width, height);
@@ -176,7 +185,8 @@
 	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_GPGPU);
 
 	gen8_emit_state_base_address(batch);
-	gen8_emit_vfe_state_gpgpu(batch);
+	gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
+			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
 	gen7_emit_curbe_load(batch, curbe_buffer);
 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 	gen8_emit_gpgpu_walk(batch, x, y, width, height);
@@ -224,7 +234,8 @@
 		  PIPELINE_SELECT_GPGPU);
 
 	gen9_emit_state_base_address(batch);
-	gen8_emit_vfe_state_gpgpu(batch);
+	gen8_emit_vfe_state(batch, THREADS, GEN8_GPGPU_URB_ENTRIES,
+			    GPGPU_URB_SIZE, GPGPU_CURBE_SIZE);
 	gen7_emit_curbe_load(batch, curbe_buffer);
 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 	gen8_emit_gpgpu_walk(batch, x, y, width, height);
diff --git a/lib/gpu_fill.c b/lib/gpu_fill.c
index 24e03cf..2179136 100644
--- a/lib/gpu_fill.c
+++ b/lib/gpu_fill.c
@@ -194,7 +194,9 @@
 }
 
 void
-gen7_emit_vfe_state(struct intel_batchbuffer *batch)
+gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size, uint32_t mode)
 {
 	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
 
@@ -202,39 +204,15 @@
 	OUT_BATCH(0);
 
 	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
+	OUT_BATCH(threads << 16 |
+		urb_entries << 8 |
+		mode << 2); /* GPGPU vs media mode */
 
 	OUT_BATCH(0);
 
 	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |	/* in 256 bits unit */
-		  2);		/* in 256 bits unit */
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-void
-gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (8 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | /* max num of threads */
-		  0 << 8 | /* num of URB entry */
-		  1 << 2); /* GPGPU mode */
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 |	/* URB entry size in 256 bits unit */
-		  1);		/* CURBE entry size in 256 bits unit */
+	OUT_BATCH(urb_size << 16 |	/* in 256 bits unit */
+		  curbe_size);		/* in 256 bits unit */
 
 	/* scoreboard */
 	OUT_BATCH(0);
@@ -279,25 +257,7 @@
 
 	for (i = 0; i < width / 16; i++) {
 		for (j = 0; j < height / 16; j++) {
-			OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
-
-			/* interface descriptor offset */
-			OUT_BATCH(0);
-
-			/* without indirect data */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* scoreboard */
-			OUT_BATCH(0);
-			OUT_BATCH(0);
-
-			/* inline data (xoffset, yoffset) */
-			OUT_BATCH(x + i * 16);
-			OUT_BATCH(y + j * 16);
-			if (AT_LEAST_GEN(batch->devid, 8) &&
-			    !IS_CHERRYVIEW(batch->devid))
-				gen8_emit_media_state_flush(batch);
+			gen_emit_media_object(batch, x + i * 16, y + j * 16);
 		}
 	}
 }
@@ -505,7 +465,9 @@
 }
 
 void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch)
+gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size)
 {
 	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
 
@@ -514,61 +476,14 @@
 	OUT_BATCH(0);
 
 	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 |
-		2 << 8);
+	OUT_BATCH(threads << 16 |
+		urb_entries << 8);
 
 	OUT_BATCH(0);
 
 	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN7_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(1 << 16 | 1 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(0 << 16 | 1);
-
-	/* scoreboard */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-}
-
-void
-gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch)
-{
-	OUT_BATCH(GEN8_MEDIA_VFE_STATE | (9 - 2));
-
-	/* scratch buffer */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
-
-	/* number of threads & urb entries */
-	OUT_BATCH(2 << 8);
-
-	OUT_BATCH(0);
-
-	/* urb entry size & curbe size */
-	OUT_BATCH(2 << 16 |
-		2);
+	OUT_BATCH(urb_size << 16 |
+		curbe_size);
 
 	/* scoreboard */
 	OUT_BATCH(0);
@@ -637,9 +552,10 @@
 }
 
 void
-gen8_emit_media_objects_spin(struct intel_batchbuffer *batch)
+gen_emit_media_object(struct intel_batchbuffer *batch,
+		       unsigned int xoffset, unsigned int yoffset)
 {
-	OUT_BATCH(GEN8_MEDIA_OBJECT | (8 - 2));
+	OUT_BATCH(GEN7_MEDIA_OBJECT | (8 - 2));
 
 	/* interface descriptor offset */
 	OUT_BATCH(0);
@@ -653,8 +569,8 @@
 	OUT_BATCH(0);
 
 	/* inline data (xoffset, yoffset) */
-	OUT_BATCH(0);
-	OUT_BATCH(0);
+	OUT_BATCH(xoffset);
+	OUT_BATCH(yoffset);
 	if (AT_LEAST_GEN(batch->devid, 8) && !IS_CHERRYVIEW(batch->devid))
 		gen8_emit_media_state_flush(batch);
 }
diff --git a/lib/gpu_fill.h b/lib/gpu_fill.h
index 1e4be4b..d41e6dd 100644
--- a/lib/gpu_fill.h
+++ b/lib/gpu_fill.h
@@ -68,10 +68,9 @@
 gen7_emit_state_base_address(struct intel_batchbuffer *batch);
 
 void
-gen7_emit_vfe_state(struct intel_batchbuffer *batch);
-
-void
-gen7_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
+gen7_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size, uint32_t mode);
 
 void
 gen7_emit_curbe_load(struct intel_batchbuffer *batch, uint32_t curbe_buffer);
@@ -112,13 +111,9 @@
 gen8_emit_media_state_flush(struct intel_batchbuffer *batch);
 
 void
-gen8_emit_vfe_state(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_vfe_state_gpgpu(struct intel_batchbuffer *batch);
-
-void
-gen8_emit_vfe_state_spin(struct intel_batchbuffer *batch);
+gen8_emit_vfe_state(struct intel_batchbuffer *batch, uint32_t threads,
+		    uint32_t urb_entries, uint32_t urb_size,
+		    uint32_t curbe_size);
 
 void
 gen8_emit_gpgpu_walk(struct intel_batchbuffer *batch,
@@ -126,7 +121,8 @@
 		     unsigned int width, unsigned int height);
 
 void
-gen8_emit_media_objects_spin(struct intel_batchbuffer *batch);
+gen_emit_media_object(struct intel_batchbuffer *batch, unsigned int xoffset,
+		  unsigned int yoffset);
 
 void
 gen9_emit_state_base_address(struct intel_batchbuffer *batch);
diff --git a/lib/media_fill_gen7.c b/lib/media_fill_gen7.c
index 3dc5617..a38b855 100644
--- a/lib/media_fill_gen7.c
+++ b/lib/media_fill_gen7.c
@@ -43,6 +43,12 @@
  */
 
 #define BATCH_STATE_SPLIT 2048
+/* VFE STATE params */
+#define THREADS 1
+#define MEDIA_URB_ENTRIES 2
+#define MEDIA_URB_SIZE 2
+#define MEDIA_CURBE_SIZE 2
+#define GEN7_VFE_STATE_MEDIA_MODE 0
 
 void
 gen7_media_fillfunc(struct intel_batchbuffer *batch,
@@ -69,7 +75,8 @@
 	OUT_BATCH(GEN7_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
 	gen7_emit_state_base_address(batch);
 
-	gen7_emit_vfe_state(batch);
+	gen7_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+			    MEDIA_CURBE_SIZE, GEN7_VFE_STATE_MEDIA_MODE);
 
 	gen7_emit_curbe_load(batch, curbe_buffer);
 
diff --git a/lib/media_fill_gen8.c b/lib/media_fill_gen8.c
index 63fe72e..fc89c7f 100644
--- a/lib/media_fill_gen8.c
+++ b/lib/media_fill_gen8.c
@@ -46,6 +46,11 @@
  */
 
 #define BATCH_STATE_SPLIT 2048
+/* VFE STATE params */
+#define THREADS 1
+#define MEDIA_URB_ENTRIES 2
+#define MEDIA_URB_SIZE 2
+#define MEDIA_CURBE_SIZE 2
 
 void
 gen8_media_fillfunc(struct intel_batchbuffer *batch,
@@ -72,7 +77,8 @@
 	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
 	gen8_emit_state_base_address(batch);
 
-	gen8_emit_vfe_state(batch);
+	gen8_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+			    MEDIA_CURBE_SIZE);
 
 	gen7_emit_curbe_load(batch, curbe_buffer);
 
diff --git a/lib/media_fill_gen9.c b/lib/media_fill_gen9.c
index 78e892f..805e757 100644
--- a/lib/media_fill_gen9.c
+++ b/lib/media_fill_gen9.c
@@ -43,6 +43,11 @@
  */
 
 #define BATCH_STATE_SPLIT 2048
+/* VFE STATE params */
+#define THREADS 1
+#define MEDIA_URB_ENTRIES 2
+#define MEDIA_URB_SIZE 2
+#define MEDIA_CURBE_SIZE 2
 
 void
 gen9_media_fillfunc(struct intel_batchbuffer *batch,
@@ -74,7 +79,8 @@
 			GEN9_FORCE_MEDIA_AWAKE_MASK);
 	gen9_emit_state_base_address(batch);
 
-	gen8_emit_vfe_state(batch);
+	gen8_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES, MEDIA_URB_SIZE,
+			    MEDIA_CURBE_SIZE);
 
 	gen7_emit_curbe_load(batch, curbe_buffer);
 
diff --git a/lib/media_spin.c b/lib/media_spin.c
index 3592e0d..c3a8d57 100644
--- a/lib/media_spin.c
+++ b/lib/media_spin.c
@@ -67,6 +67,17 @@
  */
 
 #define BATCH_STATE_SPLIT 2048
+/* VFE STATE params */
+#define THREADS 0
+#define MEDIA_URB_ENTRIES 2
+#define MEDIA_URB_SIZE 2
+#define MEDIA_CURBE_SIZE 2
+
+/* Offsets needed in gen_emit_media_object. In media_spin library this
+ * values do not matter.
+ */
+#define xoffset 0
+#define yoffset 0
 
 void
 gen8_media_spinfunc(struct intel_batchbuffer *batch,
@@ -90,13 +101,14 @@
 	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
 	gen8_emit_state_base_address(batch);
 
-	gen8_emit_vfe_state_spin(batch);
+	gen8_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES,
+			    MEDIA_URB_SIZE, MEDIA_CURBE_SIZE);
 
 	gen7_emit_curbe_load(batch, curbe_buffer);
 
 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects_spin(batch);
+	gen_emit_media_object(batch, xoffset, yoffset);
 
 	OUT_BATCH(MI_BATCH_BUFFER_END);
 
@@ -134,13 +146,14 @@
 		  GEN9_FORCE_MEDIA_AWAKE_MASK);
 	gen9_emit_state_base_address(batch);
 
-	gen8_emit_vfe_state_spin(batch);
+	gen8_emit_vfe_state(batch, THREADS, MEDIA_URB_ENTRIES,
+			    MEDIA_URB_SIZE, MEDIA_CURBE_SIZE);
 
 	gen7_emit_curbe_load(batch, curbe_buffer);
 
 	gen7_emit_interface_descriptor_load(batch, interface_descriptor);
 
-	gen8_emit_media_objects_spin(batch);
+	gen_emit_media_object(batch, xoffset, yoffset);
 
 	OUT_BATCH(GEN8_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
 		  GEN9_FORCE_MEDIA_AWAKE_DISABLE |