drm/i915/execlists: Pack the count into the low bits of the port.request

add/remove: 1/1 grow/shrink: 5/4 up/down: 391/-578 (-187)
function                                     old     new   delta
execlists_submit_ports                       262     471    +209
port_assign.isra                               -     136    +136
capture                                     6344    6359     +15
reset_common_ring                            438     452     +14
execlists_submit_request                     228     238     +10
gen8_init_common_ring                        334     341      +7
intel_engine_is_idle                         106     105      -1
i915_engine_info                            2314    2290     -24
__i915_gem_set_wedged_BKL                    485     411     -74
intel_lrc_irq_handler                       1789    1604    -185
execlists_update_context                     294       -    -294

The most important change there is the improve to the
intel_lrc_irq_handler and excclist_submit_ports (net improvement since
execlists_update_context is now inlined).

v2: Use the port_api() for guc as well (even though currently we do not
pack any counters in there, yet) and hide all port->request_count inside
the helpers.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20170517121007.27224-5-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index ec16fb6..162f0a9 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -368,8 +368,15 @@ struct intel_engine_cs {
 	/* Execlists */
 	struct tasklet_struct irq_tasklet;
 	struct execlist_port {
-		struct drm_i915_gem_request *request;
-		unsigned int count;
+		struct drm_i915_gem_request *request_count;
+#define EXECLIST_COUNT_BITS 2
+#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
+#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
+#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
+#define port_set(p, packed) ((p)->request_count = (packed))
+#define port_isset(p) ((p)->request_count)
+#define port_index(p, e) ((p) - (e)->execlist_port)
 		GEM_DEBUG_DECL(u32 context_id);
 	} execlist_port[2];
 	struct rb_root execlist_queue;