blk-mq: balance mapping between present CPUs and queues
Spread queues among present CPUs first, then building mapping on other
non-present CPUs.
So we can minimize count of dead queues which are mapped by un-present
CPUs only. Then bad IO performance can be avoided by unbalanced mapping
between present CPUs and queues.
The similar policy has been applied on Managed IRQ affinity.
Cc: Yi Zhang <yi.zhang@redhat.com>
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Reviewed-by: Bob Liu <bob.liu@oracle.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index f945621..0157f2b 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -15,10 +15,10 @@
#include "blk.h"
#include "blk-mq.h"
-static int cpu_to_queue_index(struct blk_mq_queue_map *qmap,
- unsigned int nr_queues, const int cpu)
+static int queue_index(struct blk_mq_queue_map *qmap,
+ unsigned int nr_queues, const int q)
{
- return qmap->queue_offset + (cpu % nr_queues);
+ return qmap->queue_offset + (q % nr_queues);
}
static int get_first_sibling(unsigned int cpu)
@@ -36,21 +36,36 @@
{
unsigned int *map = qmap->mq_map;
unsigned int nr_queues = qmap->nr_queues;
- unsigned int cpu, first_sibling;
+ unsigned int cpu, first_sibling, q = 0;
+
+ for_each_possible_cpu(cpu)
+ map[cpu] = -1;
+
+ /*
+ * Spread queues among present CPUs first for minimizing
+ * count of dead queues which are mapped by all un-present CPUs
+ */
+ for_each_present_cpu(cpu) {
+ if (q >= nr_queues)
+ break;
+ map[cpu] = queue_index(qmap, nr_queues, q++);
+ }
for_each_possible_cpu(cpu) {
+ if (map[cpu] != -1)
+ continue;
/*
* First do sequential mapping between CPUs and queues.
* In case we still have CPUs to map, and we have some number of
* threads per cores then map sibling threads to the same queue
* for performance optimizations.
*/
- if (cpu < nr_queues) {
- map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
+ if (q < nr_queues) {
+ map[cpu] = queue_index(qmap, nr_queues, q++);
} else {
first_sibling = get_first_sibling(cpu);
if (first_sibling == cpu)
- map[cpu] = cpu_to_queue_index(qmap, nr_queues, cpu);
+ map[cpu] = queue_index(qmap, nr_queues, q++);
else
map[cpu] = map[first_sibling];
}