tcm_fc: Convert to per-cpu command map pre-allocation of ft_cmd

This patch converts tcm_fc to use transport_init_session_tags()
pre-allocation logic for struct ft_cmd descriptors using per-cpu
session tag pooling in order to effectively avoid memory allocation
+ release for each received I/O.

It adds percpu_ida_alloc() in ft_recv_cmd() to obtain an tag and
locate ft_cmd from se_sess->sess_cmd_map[], and percpu_ida_free()
in ft_free_cmd() to release the tag based upon se_cmd->map_tag id.

It also uses a TCM_FC_DEFAULT_TAGS value of 512, that puts the
per se_sess->sess_cmd_map allocation at ~360K on 64-bit.

v2 changes:

  - Handle possible tag < 0 failure with GFP_ATOMIC

Cc: Mark Rustad <mark.d.rustad@intel.com>
Cc: Robert Love <robert.w.love@intel.com>
Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Nicholas Bellinger <nab@daterainc.com>
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index 0dd54a4..752863a 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -22,6 +22,7 @@
 #define FT_NAMELEN 32		/* length of ASCII WWPNs including pad */
 #define FT_TPG_NAMELEN 32	/* max length of TPG name */
 #define FT_LUN_NAMELEN 32	/* max length of LUN name */
+#define TCM_FC_DEFAULT_TAGS 512	/* tags used for per-session preallocation */
 
 struct ft_transport_id {
 	__u8	format;
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 0e5a1cae..479ec56 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -28,6 +28,7 @@
 #include <linux/configfs.h>
 #include <linux/ctype.h>
 #include <linux/hash.h>
+#include <linux/percpu_ida.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_host.h>
@@ -89,16 +90,18 @@
 {
 	struct fc_frame *fp;
 	struct fc_lport *lport;
+	struct se_session *se_sess;
 
 	if (!cmd)
 		return;
+	se_sess = cmd->sess->se_sess;
 	fp = cmd->req_frame;
 	lport = fr_dev(fp);
 	if (fr_seq(fp))
 		lport->tt.seq_release(fr_seq(fp));
 	fc_frame_free(fp);
+	percpu_ida_free(&se_sess->sess_tag_pool, cmd->se_cmd.map_tag);
 	ft_sess_put(cmd->sess);	/* undo get from lookup at recv */
-	kfree(cmd);
 }
 
 void ft_release_cmd(struct se_cmd *se_cmd)
@@ -432,14 +435,21 @@
 {
 	struct ft_cmd *cmd;
 	struct fc_lport *lport = sess->tport->lport;
+	struct se_session *se_sess = sess->se_sess;
+	int tag;
 
-	cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
-	if (!cmd)
+	tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_ATOMIC);
+	if (tag < 0)
 		goto busy;
+
+	cmd = &((struct ft_cmd *)se_sess->sess_cmd_map)[tag];
+	memset(cmd, 0, sizeof(struct ft_cmd));
+
+	cmd->se_cmd.map_tag = tag;
 	cmd->sess = sess;
 	cmd->seq = lport->tt.seq_assign(lport, fp);
 	if (!cmd->seq) {
-		kfree(cmd);
+		percpu_ida_free(&se_sess->sess_tag_pool, tag);
 		goto busy;
 	}
 	cmd->req_frame = fp;		/* hold frame during cmd */
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index 4859505..ae52c08 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -210,7 +210,8 @@
 	if (!sess)
 		return NULL;
 
-	sess->se_sess = transport_init_session();
+	sess->se_sess = transport_init_session_tags(TCM_FC_DEFAULT_TAGS,
+						    sizeof(struct ft_cmd));
 	if (IS_ERR(sess->se_sess)) {
 		kfree(sess);
 		return NULL;