block: make queue flags non-atomic

We can save some atomic ops in the IO path, if we clearly define
the rules of how to modify the queue flags.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index f7f1635..d3a25b0 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -546,7 +546,7 @@
 {
 	struct loop_device *lo = q->queuedata;
 
-	clear_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags);
+	queue_flag_clear_unlocked(QUEUE_FLAG_PLUGGED, q);
 	blk_run_address_space(lo->lo_backing_file->f_mapping);
 }
 
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 27bfe72..e322cce 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -2399,7 +2399,7 @@
 		del_gendisk(lun->disk);
 		/*
 		 * I wish I could do:
-		 *    set_bit(QUEUE_FLAG_DEAD, &q->queue_flags);
+		 *    queue_flag_set(QUEUE_FLAG_DEAD, q);
 		 * As it is, we rely on our internal poisoning and let
 		 * the upper levels to spin furiously failing all the I/O.
 		 */
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 51be533..73326e7 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -873,10 +873,13 @@
 	q->max_hw_sectors = t->limits.max_hw_sectors;
 	q->seg_boundary_mask = t->limits.seg_boundary_mask;
 	q->bounce_pfn = t->limits.bounce_pfn;
+	/* XXX: the below will probably go bug. must ensure there can be no
+	 * concurrency on queue_flags, and use the unlocked versions...
+	 */
 	if (t->limits.no_cluster)
-		q->queue_flags &= ~(1 << QUEUE_FLAG_CLUSTER);
+		queue_flag_clear(QUEUE_FLAG_CLUSTER, q);
 	else
-		q->queue_flags |= (1 << QUEUE_FLAG_CLUSTER);
+		queue_flag_set(QUEUE_FLAG_CLUSTER, q);
 
 }
 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 87620b7..acd716b 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -282,7 +282,8 @@
 		kfree(new);
 		return NULL;
 	}
-	set_bit(QUEUE_FLAG_CLUSTER, &new->queue->queue_flags);
+	/* Can be unlocked because the queue is new: no concurrency */
+	queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
 
 	blk_queue_make_request(new->queue, md_fail_request);
 
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 07103c3..f6600bf 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -1773,7 +1773,7 @@
 	if (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts)
 		printk(KERN_INFO "scsi_debug: slave_alloc <%u %u %u %u>\n",
 		       sdp->host->host_no, sdp->channel, sdp->id, sdp->lun);
-	set_bit(QUEUE_FLAG_BIDI, &sdp->request_queue->queue_flags);
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, sdp->request_queue);
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 67f412b..d545ad1 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -536,6 +536,9 @@
 	       !shost->host_blocked && !shost->host_self_blocked &&
 		!((shost->can_queue > 0) &&
 		  (shost->host_busy >= shost->can_queue))) {
+
+		int flagset;
+
 		/*
 		 * As long as shost is accepting commands and we have
 		 * starved queues, call blk_run_queue. scsi_request_fn
@@ -549,19 +552,20 @@
 		sdev = list_entry(shost->starved_list.next,
 					  struct scsi_device, starved_entry);
 		list_del_init(&sdev->starved_entry);
-		spin_unlock_irqrestore(shost->host_lock, flags);
+		spin_unlock(shost->host_lock);
 
+		spin_lock(sdev->request_queue->queue_lock);
+		flagset = test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
+				!test_bit(QUEUE_FLAG_REENTER,
+					&sdev->request_queue->queue_flags);
+		if (flagset)
+			queue_flag_set(QUEUE_FLAG_REENTER, sdev->request_queue);
+		__blk_run_queue(sdev->request_queue);
+		if (flagset)
+			queue_flag_clear(QUEUE_FLAG_REENTER, sdev->request_queue);
+		spin_unlock(sdev->request_queue->queue_lock);
 
-		if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) &&
-		    !test_and_set_bit(QUEUE_FLAG_REENTER,
-				      &sdev->request_queue->queue_flags)) {
-			blk_run_queue(sdev->request_queue);
-			clear_bit(QUEUE_FLAG_REENTER,
-				  &sdev->request_queue->queue_flags);
-		} else
-			blk_run_queue(sdev->request_queue);
-
-		spin_lock_irqsave(shost->host_lock, flags);
+		spin_lock(shost->host_lock);
 		if (unlikely(!list_empty(&sdev->starved_entry)))
 			/*
 			 * sdev lost a race, and was put back on the
@@ -1585,8 +1589,9 @@
 
 	blk_queue_max_segment_size(q, dma_get_max_seg_size(dev));
 
+	/* New queue, no concurrency on queue_flags */
 	if (!shost->use_clustering)
-		clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags);
+		queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q);
 
 	/*
 	 * set a reasonable default alignment on word boundaries: the
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 7899e3d..f4461d3 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -248,8 +248,7 @@
 	else
 		q->queuedata = shost;
 
-	set_bit(QUEUE_FLAG_BIDI, &q->queue_flags);
-
+	queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
 	return 0;
 }