mpt3sas: Make use of additional HighPriority credit message frames for sending SCSI IO's

Driver assumes HighPriority credit as part of Global credit. But,
Firmware treats HighPriority credit value and global cedits as two
different values. Changed host queue algorithm to treat global credits
and highPriority credits as two different values.

Signed-off-by: Suganath prabu Subramani <suganath-prabu.subramani@avagotech.com>
Signed-off-by: Chaitra P B <chaitra.basappa@avagotech.com>
Reviewed-by: Tomas Henzl <thenzl@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index f59495b..31838d9a 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -775,7 +775,7 @@
 
 	mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
 	if (mpi_reply && mpi_reply->Function == MPI2_FUNCTION_EVENT_ACK)
-		return 1;
+		return mpt3sas_check_for_pending_internal_cmds(ioc, smid);
 
 	if (ioc->base_cmds.status == MPT3_CMD_NOT_USED)
 		return 1;
@@ -806,6 +806,7 @@
 	Mpi2EventNotificationReply_t *mpi_reply;
 	Mpi2EventAckRequest_t *ack_request;
 	u16 smid;
+	struct _event_ack_list *delayed_event_ack;
 
 	mpi_reply = mpt3sas_base_get_reply_virt_addr(ioc, reply);
 	if (!mpi_reply)
@@ -819,8 +820,18 @@
 		goto out;
 	smid = mpt3sas_base_get_smid(ioc, ioc->base_cb_idx);
 	if (!smid) {
-		pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
-		    ioc->name, __func__);
+		delayed_event_ack = kzalloc(sizeof(*delayed_event_ack),
+					GFP_ATOMIC);
+		if (!delayed_event_ack)
+			goto out;
+		INIT_LIST_HEAD(&delayed_event_ack->list);
+		delayed_event_ack->Event = mpi_reply->Event;
+		delayed_event_ack->EventContext = mpi_reply->EventContext;
+		list_add_tail(&delayed_event_ack->list,
+				&ioc->delayed_event_ack_list);
+		dewtprintk(ioc, pr_info(MPT3SAS_FMT
+				"DELAYED: EVENT ACK: event (0x%04x)\n",
+				ioc->name, le16_to_cpu(mpi_reply->Event)));
 		goto out;
 	}
 
@@ -3189,20 +3200,35 @@
 	}
 	ioc->shost->sg_tablesize = sg_tablesize;
 
-	ioc->hi_priority_depth = facts->HighPriorityCredit;
-	ioc->internal_depth = ioc->hi_priority_depth + (5);
+	ioc->internal_depth = min_t(int, (facts->HighPriorityCredit + (5)),
+		(facts->RequestCredit / 4));
+	if (ioc->internal_depth < INTERNAL_CMDS_COUNT) {
+		if (facts->RequestCredit <= (INTERNAL_CMDS_COUNT +
+				INTERNAL_SCSIIO_CMDS_COUNT)) {
+			pr_err(MPT3SAS_FMT "IOC doesn't have enough Request \
+			    Credits, it has just %d number of credits\n",
+			    ioc->name, facts->RequestCredit);
+			return -ENOMEM;
+		}
+		ioc->internal_depth = 10;
+	}
+
+	ioc->hi_priority_depth = ioc->internal_depth - (5);
 	/* command line tunables  for max controller queue depth */
 	if (max_queue_depth != -1 && max_queue_depth != 0) {
 		max_request_credit = min_t(u16, max_queue_depth +
-		    ioc->hi_priority_depth + ioc->internal_depth,
-		    facts->RequestCredit);
+			ioc->internal_depth, facts->RequestCredit);
 		if (max_request_credit > MAX_HBA_QUEUE_DEPTH)
 			max_request_credit =  MAX_HBA_QUEUE_DEPTH;
 	} else
 		max_request_credit = min_t(u16, facts->RequestCredit,
 		    MAX_HBA_QUEUE_DEPTH);
 
-	ioc->hba_queue_depth = max_request_credit;
+	/* Firmware maintains additional facts->HighPriorityCredit number of
+	 * credits for HiPriprity Request messages, so hba queue depth will be
+	 * sum of max_request_credit and high priority queue depth.
+	 */
+	ioc->hba_queue_depth = max_request_credit + ioc->hi_priority_depth;
 
 	/* request frame size */
 	ioc->request_sz = facts->IOCRequestFrameSize * 4;
@@ -3249,7 +3275,6 @@
 		ioc->reply_post_queue_depth += 16 -
 		(ioc->reply_post_queue_depth % 16);
 
-
 	if (ioc->reply_post_queue_depth >
 	    facts->MaxReplyDescriptorPostQueueDepth) {
 		ioc->reply_post_queue_depth =
@@ -3331,7 +3356,7 @@
 	/* set the scsi host can_queue depth
 	 * with some internal commands that could be outstanding
 	 */
-	ioc->shost->can_queue = ioc->scsiio_depth;
+	ioc->shost->can_queue = ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT;
 	dinitprintk(ioc, pr_info(MPT3SAS_FMT
 		"scsi host: can_queue depth (%d)\n",
 		ioc->name, ioc->shost->can_queue));
@@ -3358,8 +3383,8 @@
 		    ioc->chains_needed_per_io, ioc->request_sz, sz/1024);
 		if (ioc->scsiio_depth < MPT3SAS_SAS_QUEUE_DEPTH)
 			goto out;
-		retry_sz += 64;
-		ioc->hba_queue_depth = max_request_credit - retry_sz;
+		retry_sz = 64;
+		ioc->hba_queue_depth -= retry_sz;
 		goto retry_allocation;
 	}
 
@@ -4977,6 +5002,8 @@
 	u32 reply_address;
 	u16 smid;
 	struct _tr_list *delayed_tr, *delayed_tr_next;
+	struct _sc_list *delayed_sc, *delayed_sc_next;
+	struct _event_ack_list *delayed_event_ack, *delayed_event_ack_next;
 	u8 hide_flag;
 	struct adapter_reply_queue *reply_q;
 	long reply_post_free;
@@ -4999,6 +5026,18 @@
 		kfree(delayed_tr);
 	}
 
+	list_for_each_entry_safe(delayed_sc, delayed_sc_next,
+	    &ioc->delayed_sc_list, list) {
+		list_del(&delayed_sc->list);
+		kfree(delayed_sc);
+	}
+
+	list_for_each_entry_safe(delayed_event_ack, delayed_event_ack_next,
+	    &ioc->delayed_event_ack_list, list) {
+		list_del(&delayed_event_ack->list);
+		kfree(delayed_event_ack);
+	}
+
 	/* initialize the scsi lookup free list */
 	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
 	INIT_LIST_HEAD(&ioc->free_list);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 2786222..4b52a07 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -122,6 +122,8 @@
 #define  NO_SLEEP			0
 
 #define INTERNAL_CMDS_COUNT		10	/* reserved cmds */
+/* reserved for issuing internally framed scsi io cmds */
+#define INTERNAL_SCSIIO_CMDS_COUNT	3
 
 #define MPI3_HIM_MASK			0xFFFFFFFF /* mask every bit*/
 
@@ -677,6 +679,25 @@
 	u16	state;
 };
 
+/**
+ * struct _sc_list - delayed SAS_IO_UNIT_CONTROL message list
+ * @handle: device handle
+ */
+struct _sc_list {
+	struct list_head list;
+	u16     handle;
+};
+
+/**
+ * struct _event_ack_list - delayed event acknowledgment list
+ * @Event: Event ID
+ * @EventContext: used to track the event uniquely
+ */
+struct _event_ack_list {
+	struct list_head list;
+	u16     Event;
+	u32     EventContext;
+};
 
 /**
  * struct adapter_reply_queue - the reply queue struct
@@ -922,6 +943,8 @@
  * @replyPostRegisterIndex: index of next position in Reply Desc Post Queue
  * @delayed_tr_list: target reset link list
  * @delayed_tr_volume_list: volume target reset link list
+ * @delayed_sc_list:
+ * @delayed_event_ack_list:
  * @temp_sensors_count: flag to carry the number of temperature sensors
  * @pci_access_mutex: Mutex to synchronize ioctl,sysfs show path and
  *	pci resource handling. PCI resource freeing will lead to free
@@ -1143,6 +1166,8 @@
 
 	struct list_head delayed_tr_list;
 	struct list_head delayed_tr_volume_list;
+	struct list_head delayed_sc_list;
+	struct list_head delayed_event_ack_list;
 	u8		temp_sensors_count;
 	struct mutex pci_access_mutex;
 
@@ -1260,6 +1285,8 @@
 void mpt3sas_expander_remove(struct MPT3SAS_ADAPTER *ioc, u64 sas_address);
 void mpt3sas_device_remove_by_sas_address(struct MPT3SAS_ADAPTER *ioc,
 	u64 sas_address);
+u8 mpt3sas_check_for_pending_internal_cmds(struct MPT3SAS_ADAPTER *ioc,
+	u16 smid);
 
 struct _sas_node *mpt3sas_scsih_expander_find_by_handle(
 	struct MPT3SAS_ADAPTER *ioc, u16 handle);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index dcb4c18..b12cada 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3222,6 +3222,7 @@
 	Mpi2SasIoUnitControlRequest_t *mpi_request;
 	u16 smid_sas_ctrl;
 	u32 ioc_state;
+	struct _sc_list *delayed_sc;
 
 	if (ioc->remove_host) {
 		dewtprintk(ioc, pr_info(MPT3SAS_FMT
@@ -3264,9 +3265,16 @@
 
 	smid_sas_ctrl = mpt3sas_base_get_smid(ioc, ioc->tm_sas_control_cb_idx);
 	if (!smid_sas_ctrl) {
-		pr_err(MPT3SAS_FMT "%s: failed obtaining a smid\n",
-		    ioc->name, __func__);
-		return 1;
+		delayed_sc = kzalloc(sizeof(*delayed_sc), GFP_ATOMIC);
+		if (!delayed_sc)
+			return _scsih_check_for_pending_tm(ioc, smid);
+		INIT_LIST_HEAD(&delayed_sc->list);
+		delayed_sc->handle = mpi_request_tm->DevHandle;
+		list_add_tail(&delayed_sc->list, &ioc->delayed_sc_list);
+		dewtprintk(ioc, pr_info(MPT3SAS_FMT
+		    "DELAYED:sc:handle(0x%04x), (open)\n",
+		    ioc->name, handle));
+		return _scsih_check_for_pending_tm(ioc, smid);
 	}
 
 	dewtprintk(ioc, pr_info(MPT3SAS_FMT
@@ -3317,7 +3325,7 @@
 		pr_err(MPT3SAS_FMT "mpi_reply not valid at %s:%d/%s()!\n",
 		    ioc->name, __FILE__, __LINE__, __func__);
 	}
-	return 1;
+	return mpt3sas_check_for_pending_internal_cmds(ioc, smid);
 }
 
 /**
@@ -3424,6 +3432,142 @@
 	return _scsih_check_for_pending_tm(ioc, smid);
 }
 
+/**
+ * _scsih_issue_delayed_event_ack - issue delayed Event ACK messages
+ * @ioc: per adapter object
+ * @smid: system request message index
+ * @event: Event ID
+ * @event_context: used to track events uniquely
+ *
+ * Context - processed in interrupt context.
+ */
+void
+_scsih_issue_delayed_event_ack(struct MPT3SAS_ADAPTER *ioc, u16 smid, u16 event,
+				u32 event_context)
+{
+	Mpi2EventAckRequest_t *ack_request;
+	int i = smid - ioc->internal_smid;
+	unsigned long flags;
+
+	/* Without releasing the smid just update the
+	 * call back index and reuse the same smid for
+	 * processing this delayed request
+	 */
+	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+	ioc->internal_lookup[i].cb_idx = ioc->base_cb_idx;
+	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+
+	dewtprintk(ioc, pr_info(MPT3SAS_FMT
+		"EVENT ACK: event(0x%04x), smid(%d), cb(%d)\n",
+		ioc->name, le16_to_cpu(event), smid,
+		ioc->base_cb_idx));
+	ack_request = mpt3sas_base_get_msg_frame(ioc, smid);
+	memset(ack_request, 0, sizeof(Mpi2EventAckRequest_t));
+	ack_request->Function = MPI2_FUNCTION_EVENT_ACK;
+	ack_request->Event = event;
+	ack_request->EventContext = event_context;
+	ack_request->VF_ID = 0;  /* TODO */
+	ack_request->VP_ID = 0;
+	mpt3sas_base_put_smid_default(ioc, smid);
+}
+
+/**
+ * _scsih_issue_delayed_sas_io_unit_ctrl - issue delayed
+ *				sas_io_unit_ctrl messages
+ * @ioc: per adapter object
+ * @smid: system request message index
+ * @handle: device handle
+ *
+ * Context - processed in interrupt context.
+ */
+void
+_scsih_issue_delayed_sas_io_unit_ctrl(struct MPT3SAS_ADAPTER *ioc,
+					u16 smid, u16 handle)
+	{
+		Mpi2SasIoUnitControlRequest_t *mpi_request;
+		u32 ioc_state;
+		int i = smid - ioc->internal_smid;
+		unsigned long flags;
+
+		if (ioc->remove_host) {
+			dewtprintk(ioc, pr_info(MPT3SAS_FMT
+			    "%s: host has been removed\n",
+			     __func__, ioc->name));
+			return;
+		} else if (ioc->pci_error_recovery) {
+			dewtprintk(ioc, pr_info(MPT3SAS_FMT
+			    "%s: host in pci error recovery\n",
+			    __func__, ioc->name));
+		return;
+	}
+	ioc_state = mpt3sas_base_get_iocstate(ioc, 1);
+	if (ioc_state != MPI2_IOC_STATE_OPERATIONAL) {
+		dewtprintk(ioc, pr_info(MPT3SAS_FMT
+		    "%s: host is not operational\n",
+		    __func__, ioc->name));
+		return;
+	}
+
+	/* Without releasing the smid just update the
+	 * call back index and reuse the same smid for
+	 * processing this delayed request
+	 */
+	spin_lock_irqsave(&ioc->scsi_lookup_lock, flags);
+	ioc->internal_lookup[i].cb_idx = ioc->tm_sas_control_cb_idx;
+	spin_unlock_irqrestore(&ioc->scsi_lookup_lock, flags);
+
+	dewtprintk(ioc, pr_info(MPT3SAS_FMT
+	    "sc_send:handle(0x%04x), (open), smid(%d), cb(%d)\n",
+	    ioc->name, le16_to_cpu(handle), smid,
+	    ioc->tm_sas_control_cb_idx));
+	mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
+	memset(mpi_request, 0, sizeof(Mpi2SasIoUnitControlRequest_t));
+	mpi_request->Function = MPI2_FUNCTION_SAS_IO_UNIT_CONTROL;
+	mpi_request->Operation = MPI2_SAS_OP_REMOVE_DEVICE;
+	mpi_request->DevHandle = handle;
+	mpt3sas_base_put_smid_default(ioc, smid);
+}
+
+/**
+ * _scsih_check_for_pending_internal_cmds - check for pending internal messages
+ * @ioc: per adapter object
+ * @smid: system request message index
+ *
+ * Context: Executed in interrupt context
+ *
+ * This will check delayed internal messages list, and process the
+ * next request.
+ *
+ * Return 1 meaning mf should be freed from _base_interrupt
+ *        0 means the mf is freed from this function.
+ */
+u8
+mpt3sas_check_for_pending_internal_cmds(struct MPT3SAS_ADAPTER *ioc, u16 smid)
+{
+	struct _sc_list *delayed_sc;
+	struct _event_ack_list *delayed_event_ack;
+
+	if (!list_empty(&ioc->delayed_event_ack_list)) {
+		delayed_event_ack = list_entry(ioc->delayed_event_ack_list.next,
+						struct _event_ack_list, list);
+		_scsih_issue_delayed_event_ack(ioc, smid,
+		  delayed_event_ack->Event, delayed_event_ack->EventContext);
+		list_del(&delayed_event_ack->list);
+		kfree(delayed_event_ack);
+		return 0;
+	}
+
+	if (!list_empty(&ioc->delayed_sc_list)) {
+		delayed_sc = list_entry(ioc->delayed_sc_list.next,
+						struct _sc_list, list);
+		_scsih_issue_delayed_sas_io_unit_ctrl(ioc, smid,
+						 delayed_sc->handle);
+		list_del(&delayed_sc->list);
+		kfree(delayed_sc);
+		return 0;
+	}
+	return 1;
+}
 
 /**
  * _scsih_check_for_pending_tm - check for pending task management
@@ -8589,6 +8733,8 @@
 	INIT_LIST_HEAD(&ioc->raid_device_list);
 	INIT_LIST_HEAD(&ioc->sas_hba.sas_port_list);
 	INIT_LIST_HEAD(&ioc->delayed_tr_list);
+	INIT_LIST_HEAD(&ioc->delayed_sc_list);
+	INIT_LIST_HEAD(&ioc->delayed_event_ack_list);
 	INIT_LIST_HEAD(&ioc->delayed_tr_volume_list);
 	INIT_LIST_HEAD(&ioc->reply_queue_list);