drm/i915/tracepoints: Add backend level request in and out tracepoints

Two new tracepoints placed at the call sites where requests are
actually passed to the GPU enable userspace to track engine
utilisation.

These tracepoints are only enabled when the
DRM_I915_LOW_LEVEL_TRACEPOINTS Kconfig option is enabled.

v2: Fix compilation with !CONFIG_DRM_I915_LOW_LEVEL_TRACEPOINTS.

v3: Name global seqno consistently across tracepoints.

v4: Remove port info from request out tracepoint. (Chris Wilson)

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 8ced9e2..beec88a 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -518,6 +518,8 @@ static void __i915_guc_submit(struct drm_i915_gem_request *rq)
 	if (i915_vma_is_map_and_fenceable(rq->ring->vma))
 		POSTING_READ_FW(GUC_STATUS);
 
+	trace_i915_gem_request_in(rq, 0);
+
 	b_ret = guc_ring_doorbell(client);
 
 	client->submissions[engine_id] += 1;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index ca2faca..0a5f27b 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -487,6 +487,45 @@ DEFINE_EVENT(i915_gem_request, i915_gem_request_execute,
 	     TP_PROTO(struct drm_i915_gem_request *req),
 	     TP_ARGS(req)
 );
+
+DECLARE_EVENT_CLASS(i915_gem_request_hw,
+		    TP_PROTO(struct drm_i915_gem_request *req,
+			     unsigned int port),
+		    TP_ARGS(req, port),
+
+		    TP_STRUCT__entry(
+				     __field(u32, dev)
+				     __field(u32, ring)
+				     __field(u32, seqno)
+				     __field(u32, global_seqno)
+				     __field(u32, ctx)
+				     __field(u32, port)
+				    ),
+
+		    TP_fast_assign(
+			           __entry->dev = req->i915->drm.primary->index;
+			           __entry->ring = req->engine->id;
+			           __entry->ctx = req->ctx->hw_id;
+			           __entry->seqno = req->fence.seqno;
+			           __entry->global_seqno = req->global_seqno;
+			           __entry->port = port;
+			          ),
+
+		    TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u",
+			      __entry->dev, __entry->ring, __entry->ctx,
+			      __entry->seqno, __entry->global_seqno,
+			      __entry->port)
+);
+
+DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in,
+	     TP_PROTO(struct drm_i915_gem_request *req, unsigned int port),
+	     TP_ARGS(req, port)
+);
+
+DEFINE_EVENT(i915_gem_request, i915_gem_request_out,
+	     TP_PROTO(struct drm_i915_gem_request *req),
+	     TP_ARGS(req)
+);
 #else
 #if !defined(TRACE_HEADER_MULTI_READ)
 static inline void
@@ -498,6 +537,16 @@ static inline void
 trace_i915_gem_request_execute(struct drm_i915_gem_request *req)
 {
 }
+
+static inline void
+trace_i915_gem_request_in(struct drm_i915_gem_request *req, unsigned int port)
+{
+}
+
+static inline void
+trace_i915_gem_request_out(struct drm_i915_gem_request *req)
+{
+}
 #endif
 #endif
 
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 5553f55..39329d4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -479,6 +479,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		cursor->priotree.priority = INT_MAX;
 
 		__i915_gem_request_submit(cursor);
+		trace_i915_gem_request_in(cursor, port - engine->execlist_port);
 		last = cursor;
 		submit = true;
 	}
@@ -593,6 +594,7 @@ static void intel_lrc_irq_handler(unsigned long data)
 				execlists_context_status_change(port[0].request,
 								INTEL_CONTEXT_SCHEDULE_OUT);
 
+				trace_i915_gem_request_out(port[0].request);
 				i915_gem_request_put(port[0].request);
 				port[0] = port[1];
 				memset(&port[1], 0, sizeof(port[1]));