target: Add percpu refcounting for se_lun access

This patch adds percpu refcounting for se_lun access that allows the
association of an se_lun + se_cmd in transport_lookup_cmd_lun() to
occur without an extra list_head for tracking outstanding I/O during
se_lun shutdown.

This effectively changes se_lun shutdown logic to wait for outstanding
I/O percpu references to complete in transport_lun_remove_cmd() using
se_lun->lun_ref_comp, instead of explicitly draining the per se_lun
command list and waiting for individual se_cmd descriptor processing
to complete.

Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index d90dbb0..569a3c7 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -92,6 +92,9 @@
 		se_cmd->pr_res_key = deve->pr_res_key;
 		se_cmd->orig_fe_lun = unpacked_lun;
 		se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
+
+		percpu_ref_get(&se_lun->lun_ref);
+		se_cmd->lun_ref_active = true;
 	}
 	spin_unlock_irqrestore(&se_sess->se_node_acl->device_list_lock, flags);
 
@@ -119,6 +122,9 @@
 		se_cmd->se_lun = &se_sess->se_tpg->tpg_virt_lun0;
 		se_cmd->orig_fe_lun = 0;
 		se_cmd->se_cmd_flags |= SCF_SE_LUN_CMD;
+
+		percpu_ref_get(&se_lun->lun_ref);
+		se_cmd->lun_ref_active = true;
 	}
 
 	/* Directly associate cmd with se_dev */
@@ -134,10 +140,6 @@
 		dev->read_bytes += se_cmd->data_length;
 	spin_unlock_irqrestore(&dev->stats_lock, flags);
 
-	spin_lock_irqsave(&se_lun->lun_cmd_lock, flags);
-	list_add_tail(&se_cmd->se_lun_node, &se_lun->lun_cmd_list);
-	spin_unlock_irqrestore(&se_lun->lun_cmd_lock, flags);
-
 	return 0;
 }
 EXPORT_SYMBOL(transport_lookup_cmd_lun);