x86/UV2: Fix new UV2 hardware by using native UV2 broadcast mode

Update the use of the Broadcast Assist Unit on SGI Altix UV2 to
the use of native UV2 mode on new hardware (not the legacy mode).

UV2 native mode has a different format for a broadcast message.
We also need quick differentiaton between UV1 and UV2.

Signed-off-by: Cliff Wickman <cpw@sgi.com>
Link: http://lkml.kernel.org/r/20120116211750.GA5767@sgi.com
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 8e862aa..4a46b27 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -65,7 +65,7 @@
  * UV2: Bit 19 selects between
  *  (0): 10 microsecond timebase and
  *  (1): 80 microseconds
- *  we're using 655us, similar to UV1: 65 units of 10us
+ *  we're using 560us, similar to UV1: 65 units of 10us
  */
 #define UV1_INTD_SOFT_ACK_TIMEOUT_PERIOD (9UL)
 #define UV2_INTD_SOFT_ACK_TIMEOUT_PERIOD (15UL)
@@ -235,10 +235,10 @@
 
 
 /*
- * Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
+ * UV1 Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
  * see table 4.2.3.0.1 in broacast_assist spec.
  */
-struct bau_msg_header {
+struct uv1_bau_msg_header {
 	unsigned int	dest_subnodeid:6;	/* must be 0x10, for the LB */
 	/* bits 5:0 */
 	unsigned int	base_dest_nasid:15;	/* nasid of the first bit */
@@ -318,19 +318,87 @@
 };
 
 /*
+ * UV2 Message header:  16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
+ * see figure 9-2 of harp_sys.pdf
+ */
+struct uv2_bau_msg_header {
+	unsigned int	base_dest_nasid:15;	/* nasid of the first bit */
+	/* bits 14:0 */				/* in uvhub map */
+	unsigned int	dest_subnodeid:5;	/* must be 0x10, for the LB */
+	/* bits 19:15 */
+	unsigned int	rsvd_1:1;		/* must be zero */
+	/* bit 20 */
+	/* Address bits 59:21 */
+	/* bits 25:2 of address (44:21) are payload */
+	/* these next 24 bits become bytes 12-14 of msg */
+	/* bits 28:21 land in byte 12 */
+	unsigned int	replied_to:1;		/* sent as 0 by the source to
+						   byte 12 */
+	/* bit 21 */
+	unsigned int	msg_type:3;		/* software type of the
+						   message */
+	/* bits 24:22 */
+	unsigned int	canceled:1;		/* message canceled, resource
+						   is to be freed*/
+	/* bit 25 */
+	unsigned int	payload_1:3;		/* not currently used */
+	/* bits 28:26 */
+
+	/* bits 36:29 land in byte 13 */
+	unsigned int	payload_2a:3;		/* not currently used */
+	unsigned int	payload_2b:5;		/* not currently used */
+	/* bits 36:29 */
+
+	/* bits 44:37 land in byte 14 */
+	unsigned int	payload_3:8;		/* not currently used */
+	/* bits 44:37 */
+
+	unsigned int	rsvd_2:7;		/* reserved */
+	/* bits 51:45 */
+	unsigned int	swack_flag:1;		/* software acknowledge flag */
+	/* bit 52 */
+	unsigned int	rsvd_3a:3;		/* must be zero */
+	unsigned int	rsvd_3b:8;		/* must be zero */
+	unsigned int	rsvd_3c:8;		/* must be zero */
+	unsigned int	rsvd_3d:3;		/* must be zero */
+	/* bits 74:53 */
+	unsigned int	fairness:3;		/* usually zero */
+	/* bits 77:75 */
+
+	unsigned int	sequence:16;		/* message sequence number */
+	/* bits 93:78  Suppl_A  */
+	unsigned int	chaining:1;		/* next descriptor is part of
+						   this activation*/
+	/* bit 94 */
+	unsigned int	multilevel:1;		/* multi-level multicast
+						   format */
+	/* bit 95 */
+	unsigned int	rsvd_4:24;		/* ordered / source node /
+						   source subnode / aging
+						   must be zero */
+	/* bits 119:96 */
+	unsigned int	command:8;		/* message type */
+	/* bits 127:120 */
+};
+
+/*
  * The activation descriptor:
  * The format of the message to send, plus all accompanying control
  * Should be 64 bytes
  */
 struct bau_desc {
-	struct pnmask			distribution;
+	struct pnmask				distribution;
 	/*
 	 * message template, consisting of header and payload:
 	 */
-	struct bau_msg_header		header;
-	struct bau_msg_payload		payload;
+	union bau_msg_header {
+		struct uv1_bau_msg_header	uv1_hdr;
+		struct uv2_bau_msg_header	uv2_hdr;
+	} header;
+
+	struct bau_msg_payload			payload;
 };
-/*
+/* UV1:
  *   -payload--    ---------header------
  *   bytes 0-11    bits 41-56  bits 58-81
  *       A           B  (2)      C (3)
@@ -340,6 +408,16 @@
  *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
  *   ------------payload queue-----------
  */
+/* UV2:
+ *   -payload--    ---------header------
+ *   bytes 0-11    bits 70-78  bits 21-44
+ *       A           B  (2)      C (3)
+ *
+ *            A/B/C are moved to:
+ *       A            C          B
+ *   bytes 0-11  bytes 12-14  bytes 16-17  (byte 15 filled in by hw as vector)
+ *   ------------payload queue-----------
+ */
 
 /*
  * The payload queue on the destination side is an array of these.
@@ -511,6 +589,7 @@
 	short			osnode;
 	short			uvhub_cpu;
 	short			uvhub;
+	short			uvhub_version;
 	short			cpus_in_socket;
 	short			cpus_in_uvhub;
 	short			partition_base_pnode;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 5b55219..1341a2e 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -573,7 +573,7 @@
 		right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE);
 	}
 
-	if (is_uv1_hub())
+	if (bcp->uvhub_version == 1)
 		return uv1_wait_completion(bau_desc, mmr_offset, right_shift,
 								bcp, try);
 	else
@@ -757,15 +757,22 @@
 {
 	int seq_number = 0;
 	int completion_stat = 0;
+	int uv1 = 0;
 	long try = 0;
 	unsigned long index;
 	cycles_t time1;
 	cycles_t time2;
 	struct ptc_stats *stat = bcp->statp;
 	struct bau_control *hmaster = bcp->uvhub_master;
+	struct uv1_bau_msg_header *uv1_hdr = NULL;
+	struct uv2_bau_msg_header *uv2_hdr = NULL;
 
-	if (is_uv1_hub())
+	if (bcp->uvhub_version == 1) {
+		uv1 = 1;
 		uv1_throttle(hmaster, stat);
+		uv1_hdr = &bau_desc->header.uv1_hdr;
+	} else
+		uv2_hdr = &bau_desc->header.uv2_hdr;
 
 	while (hmaster->uvhub_quiesce)
 		cpu_relax();
@@ -773,14 +780,23 @@
 	time1 = get_cycles();
 	do {
 		if (try == 0) {
-			bau_desc->header.msg_type = MSG_REGULAR;
+			if (uv1)
+				uv1_hdr->msg_type = MSG_REGULAR;
+			else
+				uv2_hdr->msg_type = MSG_REGULAR;
 			seq_number = bcp->message_number++;
 		} else {
-			bau_desc->header.msg_type = MSG_RETRY;
+			if (uv1)
+				uv1_hdr->msg_type = MSG_RETRY;
+			else
+				uv2_hdr->msg_type = MSG_RETRY;
 			stat->s_retry_messages++;
 		}
 
-		bau_desc->header.sequence = seq_number;
+		if (uv1)
+			uv1_hdr->sequence = seq_number;
+		else
+			uv2_hdr->sequence = seq_number;
 		index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu;
 		bcp->send_message = get_cycles();
 
@@ -967,7 +983,7 @@
 		stat->s_ntargself++;
 
 	bau_desc = bcp->descriptor_base;
-	bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu;
+	bau_desc += (ITEMS_PER_DESC * bcp->uvhub_cpu);
 	bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
 	if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes))
 		return NULL;
@@ -1083,7 +1099,7 @@
 		 */
 		mmr_image |= (1L << SOFTACK_MSHIFT);
 		if (is_uv2_hub()) {
-			mmr_image |= (1L << UV2_LEG_SHFT);
+			mmr_image &= ~(1L << UV2_LEG_SHFT);
 			mmr_image |= (1L << UV2_EXT_SHFT);
 		}
 		write_mmr_misc_control(pnode, mmr_image);
@@ -1432,12 +1448,15 @@
 {
 	int i;
 	int cpu;
+	int uv1 = 0;
 	unsigned long gpa;
 	unsigned long m;
 	unsigned long n;
 	size_t dsize;
 	struct bau_desc *bau_desc;
 	struct bau_desc *bd2;
+	struct uv1_bau_msg_header *uv1_hdr;
+	struct uv2_bau_msg_header *uv2_hdr;
 	struct bau_control *bcp;
 
 	/*
@@ -1451,6 +1470,8 @@
 	gpa = uv_gpa(bau_desc);
 	n = uv_gpa_to_gnode(gpa);
 	m = uv_gpa_to_offset(gpa);
+	if (is_uv1_hub())
+		uv1 = 1;
 
 	/* the 14-bit pnode */
 	write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m));
@@ -1461,21 +1482,33 @@
 	 */
 	for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) {
 		memset(bd2, 0, sizeof(struct bau_desc));
-		bd2->header.swack_flag =	1;
-		/*
-		 * The base_dest_nasid set in the message header is the nasid
-		 * of the first uvhub in the partition. The bit map will
-		 * indicate destination pnode numbers relative to that base.
-		 * They may not be consecutive if nasid striding is being used.
-		 */
-		bd2->header.base_dest_nasid =	UV_PNODE_TO_NASID(base_pnode);
-		bd2->header.dest_subnodeid =	UV_LB_SUBNODEID;
-		bd2->header.command =		UV_NET_ENDPOINT_INTD;
-		bd2->header.int_both =		1;
-		/*
-		 * all others need to be set to zero:
-		 *   fairness chaining multilevel count replied_to
-		 */
+		if (uv1) {
+			uv1_hdr = &bd2->header.uv1_hdr;
+			uv1_hdr->swack_flag =	1;
+			/*
+			 * The base_dest_nasid set in the message header
+			 * is the nasid of the first uvhub in the partition.
+			 * The bit map will indicate destination pnode numbers
+			 * relative to that base. They may not be consecutive
+			 * if nasid striding is being used.
+			 */
+			uv1_hdr->base_dest_nasid =
+						UV_PNODE_TO_NASID(base_pnode);
+			uv1_hdr->dest_subnodeid =	UV_LB_SUBNODEID;
+			uv1_hdr->command =		UV_NET_ENDPOINT_INTD;
+			uv1_hdr->int_both =		1;
+			/*
+			 * all others need to be set to zero:
+			 *   fairness chaining multilevel count replied_to
+			 */
+		} else {
+			uv2_hdr = &bd2->header.uv2_hdr;
+			uv2_hdr->swack_flag =	1;
+			uv2_hdr->base_dest_nasid =
+						UV_PNODE_TO_NASID(base_pnode);
+			uv2_hdr->dest_subnodeid =	UV_LB_SUBNODEID;
+			uv2_hdr->command =		UV_NET_ENDPOINT_INTD;
+		}
 	}
 	for_each_present_cpu(cpu) {
 		if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu)))
@@ -1728,6 +1761,14 @@
 		bcp->cpus_in_socket = sdp->num_cpus;
 		bcp->socket_master = *smasterp;
 		bcp->uvhub = bdp->uvhub;
+		if (is_uv1_hub())
+			bcp->uvhub_version = 1;
+		else if (is_uv2_hub())
+			bcp->uvhub_version = 2;
+		else {
+			printk(KERN_EMERG "uvhub version not 1 or 2\n");
+			return 1;
+		}
 		bcp->uvhub_master = *hmasterp;
 		bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id;
 		if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) {
@@ -1867,7 +1908,8 @@
 			val = 1L << 63;
 			write_gmmr_activation(pnode, val);
 			mmr = 1; /* should be 1 to broadcast to both sockets */
-			write_mmr_data_broadcast(pnode, mmr);
+			if (!is_uv1_hub())
+				write_mmr_data_broadcast(pnode, mmr);
 		}
 	}