padata: ensure the reorder timer callback runs on the correct CPU
The reorder timer function runs on the CPU where the timer interrupt was
handled which is not necessarily one of the CPUs of the 'pcpu' CPU mask
set.
Ensure the padata_reorder() callback runs on the correct CPU, which is
one in the 'pcpu' CPU mask set and, preferrably, the next expected one.
Do so by comparing the current CPU with the expected target CPU. If they
match, call padata_reorder() right away. If they differ, schedule a work
item on the target CPU that does the padata_reorder() call for us.
Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 2f9c1f9..5c0175b 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -85,6 +85,7 @@
* @swork: work struct for serialization.
* @pd: Backpointer to the internal control structure.
* @work: work struct for parallelization.
+ * @reorder_work: work struct for reordering.
* @num_obj: Number of objects that are processed by this cpu.
* @cpu_index: Index of the cpu.
*/
@@ -93,6 +94,7 @@
struct padata_list reorder;
struct parallel_data *pd;
struct work_struct work;
+ struct work_struct reorder_work;
atomic_t num_obj;
int cpu_index;
};
diff --git a/kernel/padata.c b/kernel/padata.c
index 1b9b4ba..b406614 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -275,11 +275,51 @@
return;
}
+static void invoke_padata_reorder(struct work_struct *work)
+{
+ struct padata_parallel_queue *pqueue;
+ struct parallel_data *pd;
+
+ local_bh_disable();
+ pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
+ pd = pqueue->pd;
+ padata_reorder(pd);
+ local_bh_enable();
+}
+
static void padata_reorder_timer(unsigned long arg)
{
struct parallel_data *pd = (struct parallel_data *)arg;
+ unsigned int weight;
+ int target_cpu, cpu;
- padata_reorder(pd);
+ cpu = get_cpu();
+
+ /* We don't lock pd here to not interfere with parallel processing
+ * padata_reorder() calls on other CPUs. We just need any CPU out of
+ * the cpumask.pcpu set. It would be nice if it's the right one but
+ * it doesn't matter if we're off to the next one by using an outdated
+ * pd->processed value.
+ */
+ weight = cpumask_weight(pd->cpumask.pcpu);
+ target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
+
+ /* ensure to call the reorder callback on the correct CPU */
+ if (cpu != target_cpu) {
+ struct padata_parallel_queue *pqueue;
+ struct padata_instance *pinst;
+
+ /* The timer function is serialized wrt itself -- no locking
+ * needed.
+ */
+ pinst = pd->pinst;
+ pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
+ queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
+ } else {
+ padata_reorder(pd);
+ }
+
+ put_cpu();
}
static void padata_serial_worker(struct work_struct *serial_work)
@@ -399,6 +439,7 @@
__padata_list_init(&pqueue->reorder);
__padata_list_init(&pqueue->parallel);
INIT_WORK(&pqueue->work, padata_parallel_worker);
+ INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
atomic_set(&pqueue->num_obj, 0);
}
}