drm/i915: Group the irq breadcrumb variables into the same cacheline
As we inspect both the tasklet (to check for an active bottom-half) and
set the irq-posted flag at the same time (both in the interrupt handler
and then in the bottom-halt), group those two together into the same
cacheline. (Not having total control over placement of the struct means
we can't guarantee the cacheline boundary, we need to align the kmalloc
and then each struct, but the grouping should help.)
v2: Try a couple of different names for the state touched by the user
interrupt handler.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: http://patchwork.freedesktop.org/patch/msgid/1467805142-22219-3-git-send-email-chris@chris-wilson.co.uk
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index a59e0ca..3d05cae 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -793,8 +793,8 @@
seq_printf(m, "Current sequence (%s): %x\n",
engine->name, intel_engine_get_seqno(engine));
- seq_printf(m, "Current user interrupts (%s): %x\n",
- engine->name, READ_ONCE(engine->user_interrupts));
+ seq_printf(m, "Current user interrupts (%s): %lx\n",
+ engine->name, READ_ONCE(engine->breadcrumbs.irq_wakeups));
spin_lock(&b->lock);
for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
@@ -1442,9 +1442,9 @@
engine->last_submitted_seqno);
seq_printf(m, "\twaiters? %d\n",
intel_engine_has_waiter(engine));
- seq_printf(m, "\tuser interrupts = %x [current %x]\n",
+ seq_printf(m, "\tuser interrupts = %lx [current %lx]\n",
engine->hangcheck.user_interrupts,
- READ_ONCE(engine->user_interrupts));
+ READ_ONCE(engine->breadcrumbs.irq_wakeups));
seq_printf(m, "\tACTHD = 0x%08llx [current 0x%08llx]\n",
(long long)engine->hangcheck.acthd,
(long long)acthd[id]);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 11e9769..d2c6099 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3998,8 +3998,8 @@
* is woken.
*/
if (engine->irq_seqno_barrier &&
- READ_ONCE(engine->breadcrumbs.tasklet) == current &&
- cmpxchg_relaxed(&engine->irq_posted, 1, 0)) {
+ READ_ONCE(engine->breadcrumbs.irq_seqno_bh) == current &&
+ cmpxchg_relaxed(&engine->breadcrumbs.irq_posted, 1, 0)) {
struct task_struct *tsk;
/* The ordering of irq_posted versus applying the barrier
@@ -4023,7 +4023,7 @@
* irq_posted == false but we are still running).
*/
rcu_read_lock();
- tsk = READ_ONCE(engine->breadcrumbs.tasklet);
+ tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
if (tsk && tsk != current)
/* Note that if the bottom-half is changed as we
* are sending the wake-up, the new bottom-half will
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b77d808..a69a5fc 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -977,10 +977,10 @@
static void notify_ring(struct intel_engine_cs *engine)
{
- smp_store_mb(engine->irq_posted, true);
+ smp_store_mb(engine->breadcrumbs.irq_posted, true);
if (intel_engine_wakeup(engine)) {
trace_i915_gem_request_notify(engine);
- engine->user_interrupts++;
+ engine->breadcrumbs.irq_wakeups++;
}
}
@@ -3054,12 +3054,12 @@
return HANGCHECK_HUNG;
}
-static unsigned kick_waiters(struct intel_engine_cs *engine)
+static unsigned long kick_waiters(struct intel_engine_cs *engine)
{
struct drm_i915_private *i915 = engine->i915;
- unsigned user_interrupts = READ_ONCE(engine->user_interrupts);
+ unsigned long irq_count = READ_ONCE(engine->breadcrumbs.irq_wakeups);
- if (engine->hangcheck.user_interrupts == user_interrupts &&
+ if (engine->hangcheck.user_interrupts == irq_count &&
!test_and_set_bit(engine->id, &i915->gpu_error.missed_irq_rings)) {
if (!test_bit(engine->id, &i915->gpu_error.test_irq_rings))
DRM_ERROR("Hangcheck timer elapsed... %s idle\n",
@@ -3068,7 +3068,7 @@
intel_engine_enable_fake_irq(engine);
}
- return user_interrupts;
+ return irq_count;
}
/*
* This is called when the chip hasn't reported back with completed
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 6fcbb52..d89b2c9 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -49,7 +49,7 @@
* we still need to force the barrier before reading the seqno,
* just in case.
*/
- engine->irq_posted = true;
+ engine->breadcrumbs.irq_posted = true;
spin_lock_irq(&engine->i915->irq_lock);
engine->irq_enable(engine);
@@ -62,7 +62,7 @@
engine->irq_disable(engine);
spin_unlock_irq(&engine->i915->irq_lock);
- engine->irq_posted = false;
+ engine->breadcrumbs.irq_posted = false;
}
static void __intel_breadcrumbs_enable_irq(struct intel_breadcrumbs *b)
@@ -195,7 +195,7 @@
}
rb_link_node(&wait->node, parent, p);
rb_insert_color(&wait->node, &b->waiters);
- GEM_BUG_ON(!first && !b->tasklet);
+ GEM_BUG_ON(!first && !b->irq_seqno_bh);
if (completed) {
struct rb_node *next = rb_next(completed);
@@ -204,7 +204,7 @@
if (next && next != &wait->node) {
GEM_BUG_ON(first);
b->first_wait = to_wait(next);
- smp_store_mb(b->tasklet, b->first_wait->tsk);
+ smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
/* As there is a delay between reading the current
* seqno, processing the completed tasks and selecting
* the next waiter, we may have missed the interrupt
@@ -216,7 +216,7 @@
* in case the seqno passed.
*/
__intel_breadcrumbs_enable_irq(b);
- if (READ_ONCE(engine->irq_posted))
+ if (READ_ONCE(b->irq_posted))
wake_up_process(to_wait(next)->tsk);
}
@@ -230,18 +230,18 @@
if (first) {
GEM_BUG_ON(rb_first(&b->waiters) != &wait->node);
b->first_wait = wait;
- smp_store_mb(b->tasklet, wait->tsk);
+ smp_store_mb(b->irq_seqno_bh, wait->tsk);
/* After assigning ourselves as the new bottom-half, we must
* perform a cursory check to prevent a missed interrupt.
* Either we miss the interrupt whilst programming the hardware,
* or if there was a previous waiter (for a later seqno) they
* may be woken instead of us (due to the inherent race
- * in the unlocked read of b->tasklet in the irq handler) and
- * so we miss the wake up.
+ * in the unlocked read of b->irq_seqno_bh in the irq handler)
+ * and so we miss the wake up.
*/
__intel_breadcrumbs_enable_irq(b);
}
- GEM_BUG_ON(!b->tasklet);
+ GEM_BUG_ON(!b->irq_seqno_bh);
GEM_BUG_ON(!b->first_wait);
GEM_BUG_ON(rb_first(&b->waiters) != &b->first_wait->node);
@@ -301,7 +301,7 @@
const int priority = wakeup_priority(b, wait->tsk);
struct rb_node *next;
- GEM_BUG_ON(b->tasklet != wait->tsk);
+ GEM_BUG_ON(b->irq_seqno_bh != wait->tsk);
/* We are the current bottom-half. Find the next candidate,
* the first waiter in the queue on the remaining oldest
@@ -344,13 +344,13 @@
* exception rather than a seqno completion.
*/
b->first_wait = to_wait(next);
- smp_store_mb(b->tasklet, b->first_wait->tsk);
+ smp_store_mb(b->irq_seqno_bh, b->first_wait->tsk);
if (b->first_wait->seqno != wait->seqno)
__intel_breadcrumbs_enable_irq(b);
- wake_up_process(b->tasklet);
+ wake_up_process(b->irq_seqno_bh);
} else {
b->first_wait = NULL;
- WRITE_ONCE(b->tasklet, NULL);
+ WRITE_ONCE(b->irq_seqno_bh, NULL);
__intel_breadcrumbs_disable_irq(b);
}
} else {
@@ -364,7 +364,7 @@
GEM_BUG_ON(b->first_wait == wait);
GEM_BUG_ON(rb_first(&b->waiters) !=
(b->first_wait ? &b->first_wait->node : NULL));
- GEM_BUG_ON(!b->tasklet ^ RB_EMPTY_ROOT(&b->waiters));
+ GEM_BUG_ON(!b->irq_seqno_bh ^ RB_EMPTY_ROOT(&b->waiters));
spin_unlock(&b->lock);
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 121294c..12cb7ed 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -74,8 +74,8 @@
struct intel_ring_hangcheck {
u64 acthd;
+ unsigned long user_interrupts;
u32 seqno;
- unsigned user_interrupts;
int score;
enum intel_ring_hangcheck_action action;
int deadlock;
@@ -167,16 +167,20 @@
* the overhead of waking that client is much preferred.
*/
struct intel_breadcrumbs {
+ struct task_struct *irq_seqno_bh; /* bh for user interrupts */
+ unsigned long irq_wakeups;
+ bool irq_posted;
+
spinlock_t lock; /* protects the lists of requests */
struct rb_root waiters; /* sorted by retirement, priority */
struct rb_root signals; /* sorted by retirement */
struct intel_wait *first_wait; /* oldest waiter by retirement */
- struct task_struct *tasklet; /* bh for user interrupts */
struct task_struct *signaler; /* used for fence signalling */
struct drm_i915_gem_request *first_signal;
struct timer_list fake_irq; /* used after a missed interrupt */
- bool irq_enabled;
- bool rpm_wakelock;
+
+ bool irq_enabled : 1;
+ bool rpm_wakelock : 1;
} breadcrumbs;
/*
@@ -189,7 +193,6 @@
struct intel_hw_status_page status_page;
struct i915_ctx_workarounds wa_ctx;
- bool irq_posted;
u32 irq_keep_mask; /* always keep these interrupts */
u32 irq_enable_mask; /* bitmask to enable ring interrupt */
void (*irq_enable)(struct intel_engine_cs *ring);
@@ -319,7 +322,6 @@
* inspecting request list.
*/
u32 last_submitted_seqno;
- unsigned user_interrupts;
bool gpu_caches_dirty;
@@ -543,13 +545,13 @@
static inline bool intel_engine_has_waiter(struct intel_engine_cs *engine)
{
- return READ_ONCE(engine->breadcrumbs.tasklet);
+ return READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
}
static inline bool intel_engine_wakeup(struct intel_engine_cs *engine)
{
bool wakeup = false;
- struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.tasklet);
+ struct task_struct *tsk = READ_ONCE(engine->breadcrumbs.irq_seqno_bh);
/* Note that for this not to dangerously chase a dangling pointer,
* the caller is responsible for ensure that the task remain valid for
* wake_up_process() i.e. that the RCU grace period cannot expire.