qla2xxx: Add selective command queuing

queue work element to specific process lessen cache miss

Signed-off-by: Quinn Tran <quinn.tran@qlogic.com>
Signed-off-by: Himanshu Madhani <himanshu.madhani@qlogic.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index cf0fe8e..3e89122 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -3302,7 +3302,7 @@
 	}
 }
 
-void qla_irq_affinity_release(struct kref *ref)
+static void qla_irq_affinity_release(struct kref *ref)
 {
 	struct irq_affinity_notify *notify =
 		container_of(ref, struct irq_affinity_notify, kref);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 9a4aed0..d3cd271 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -3982,13 +3982,24 @@
 
 	cmd->cmd_in_wq = 1;
 	cmd->cmd_flags |= BIT_0;
+	cmd->se_cmd.cpuid = -1;
 
 	spin_lock(&vha->cmd_list_lock);
 	list_add_tail(&cmd->cmd_list, &vha->qla_cmd_list);
 	spin_unlock(&vha->cmd_list_lock);
 
 	INIT_WORK(&cmd->work, qlt_do_work);
-	queue_work(qla_tgt_wq, &cmd->work);
+	if (ha->msix_count) {
+		cmd->se_cmd.cpuid = ha->tgt.rspq_vector_cpuid;
+		if (cmd->atio.u.isp24.fcp_cmnd.rddata)
+			queue_work_on(smp_processor_id(), qla_tgt_wq,
+			    &cmd->work);
+		else
+			queue_work_on(cmd->se_cmd.cpuid, qla_tgt_wq,
+			    &cmd->work);
+	} else {
+		queue_work(qla_tgt_wq, &cmd->work);
+	}
 	return 0;
 
 }
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 2881509..b44f397 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -299,7 +299,7 @@
 	cmd->vha->tgt_counters.core_qla_free_cmd++;
 	cmd->cmd_in_wq = 1;
 	INIT_WORK(&cmd->work, tcm_qla2xxx_complete_free);
-	queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
 
 /*
@@ -504,7 +504,7 @@
 	cmd->cmd_flags |= BIT_10;
 	cmd->cmd_in_wq = 1;
 	INIT_WORK(&cmd->work, tcm_qla2xxx_handle_data_work);
-	queue_work(tcm_qla2xxx_free_wq, &cmd->work);
+	queue_work_on(smp_processor_id(), tcm_qla2xxx_free_wq, &cmd->work);
 }
 
 static void tcm_qla2xxx_handle_dif_work(struct work_struct *work)
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index eae4924..717c73b 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -715,7 +715,10 @@
 	cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-	queue_work(target_completion_wq, &cmd->work);
+	if (cmd->cpuid == -1)
+		queue_work(target_completion_wq, &cmd->work);
+	else
+		queue_work_on(cmd->cpuid, target_completion_wq, &cmd->work);
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index dc8b796..4b258fc 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -528,6 +528,7 @@
 	unsigned int		t_prot_nents;
 	sense_reason_t		pi_err;
 	sector_t		bad_sector;
+	int			cpuid;
 };
 
 struct se_ua {