sgi-xpc: prevent false heartbeat failures

The heartbeat timeout functionality in sgi-xpc is currently not trained to
the connection time.  If a connection is made and the code is in the last
polling window prior to doing a timeout, the next polling window will see
the heartbeat as unchanged and initiate a no-heartbeat disconnect.

Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index f7fff47..97f7cb2 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -46,8 +46,7 @@
 };
 #endif
 
-static atomic64_t xpc_heartbeat_uv;
-static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
+static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
 
 #define XPC_ACTIVATE_MSG_SIZE_UV	(1 * GRU_CACHE_LINE_BYTES)
 #define XPC_ACTIVATE_MQ_SIZE_UV		(4 * XP_MAX_NPARTITIONS_UV * \
@@ -423,41 +422,6 @@
 		/* syncing of remote_act_state was just done above */
 		break;
 
-	case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
-		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-		msg = container_of(msg_hdr,
-				   struct xpc_activate_mq_msg_heartbeat_req_uv,
-				   hdr);
-		part_uv->heartbeat = msg->heartbeat;
-		break;
-	}
-	case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
-		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-		msg = container_of(msg_hdr,
-				   struct xpc_activate_mq_msg_heartbeat_req_uv,
-				   hdr);
-		part_uv->heartbeat = msg->heartbeat;
-
-		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-		part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
-		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-		break;
-	}
-	case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
-		struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
-
-		msg = container_of(msg_hdr,
-				   struct xpc_activate_mq_msg_heartbeat_req_uv,
-				   hdr);
-		part_uv->heartbeat = msg->heartbeat;
-
-		spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
-		part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
-		spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
-		break;
-	}
 	case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
 		struct xpc_activate_mq_msg_activate_req_uv *msg;
 
@@ -475,6 +439,7 @@
 		part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
 		part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
 		part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
+		part_uv->heartbeat_gpa = msg->heartbeat_gpa;
 
 		if (msg->activate_gru_mq_desc_gpa !=
 		    part_uv->activate_gru_mq_desc_gpa) {
@@ -759,7 +724,7 @@
 
 	/*
 	 * !!! Make our side think that the remote partition sent an activate
-	 * !!! message our way by doing what the activate IRQ handler would
+	 * !!! mq message our way by doing what the activate IRQ handler would
 	 * !!! do had one really been sent.
 	 */
 
@@ -808,88 +773,80 @@
 static int
 xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
 {
-	rp->sn.activate_gru_mq_desc_gpa =
+	xpc_heartbeat_uv =
+	    &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
+	rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
+	rp->sn.uv.activate_gru_mq_desc_gpa =
 	    uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
 	return 0;
 }
 
 static void
-xpc_send_heartbeat_uv(int msg_type)
+xpc_allow_hb_uv(short partid)
 {
-	short partid;
-	struct xpc_partition *part;
-	struct xpc_activate_mq_msg_heartbeat_req_uv msg;
+}
 
-	/*
-	 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
-	 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
-	 * !!! seconds. This is an increase in numalink traffic.
-	 * ??? Is this good?
-	 */
+static void
+xpc_disallow_hb_uv(short partid)
+{
+}
 
-	msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
-
-	partid = find_first_bit(xpc_heartbeating_to_mask_uv,
-				XP_MAX_NPARTITIONS_UV);
-
-	while (partid < XP_MAX_NPARTITIONS_UV) {
-		part = &xpc_partitions[partid];
-
-		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
-					      msg_type);
-
-		partid = find_next_bit(xpc_heartbeating_to_mask_uv,
-				       XP_MAX_NPARTITIONS_UV, partid + 1);
-	}
+static void
+xpc_disallow_all_hbs_uv(void)
+{
 }
 
 static void
 xpc_increment_heartbeat_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV);
+	xpc_heartbeat_uv->value++;
 }
 
 static void
 xpc_offline_heartbeat_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 1;
 }
 
 static void
 xpc_online_heartbeat_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV);
+	xpc_increment_heartbeat_uv();
+	xpc_heartbeat_uv->offline = 0;
 }
 
 static void
 xpc_heartbeat_init_uv(void)
 {
-	atomic64_set(&xpc_heartbeat_uv, 0);
-	bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
-	xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
+	xpc_heartbeat_uv->value = 1;
+	xpc_heartbeat_uv->offline = 0;
 }
 
 static void
 xpc_heartbeat_exit_uv(void)
 {
-	xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV);
+	xpc_offline_heartbeat_uv();
 }
 
 static enum xp_retval
 xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
 {
 	struct xpc_partition_uv *part_uv = &part->sn.uv;
-	enum xp_retval ret = xpNoHeartbeat;
+	enum xp_retval ret;
 
-	if (part_uv->remote_act_state != XPC_P_AS_INACTIVE &&
-	    part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) {
+	ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
+			       part_uv->heartbeat_gpa,
+			       sizeof(struct xpc_heartbeat_uv));
+	if (ret != xpSuccess)
+		return ret;
 
-		if (part_uv->heartbeat != part->last_heartbeat ||
-		    (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) {
+	if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
+	    !part_uv->cached_heartbeat.offline) {
 
-			part->last_heartbeat = part_uv->heartbeat;
-			ret = xpSuccess;
-		}
+		ret = xpNoHeartbeat;
+	} else {
+		part->last_heartbeat = part_uv->cached_heartbeat.value;
 	}
 	return ret;
 }
@@ -904,8 +861,9 @@
 
 	part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
 	part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
+	part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
 	part->sn.uv.activate_gru_mq_desc_gpa =
-	    remote_rp->sn.activate_gru_mq_desc_gpa;
+	    remote_rp->sn.uv.activate_gru_mq_desc_gpa;
 
 	/*
 	 * ??? Is it a good idea to make this conditional on what is
@@ -913,8 +871,9 @@
 	 */
 	if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
 		msg.rp_gpa = uv_gpa(xpc_rsvd_page);
+		msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
 		msg.activate_gru_mq_desc_gpa =
-		    xpc_rsvd_page->sn.activate_gru_mq_desc_gpa;
+		    xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
 		xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
 					   XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
 	}
@@ -1677,6 +1636,10 @@
 	xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
 	xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
 	xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
+
+	xpc_allow_hb = xpc_allow_hb_uv;
+	xpc_disallow_hb = xpc_disallow_hb_uv;
+	xpc_disallow_all_hbs = xpc_disallow_all_hbs_uv;
 	xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
 	xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
 	xpc_online_heartbeat = xpc_online_heartbeat_uv;