Merge branch 'lro'
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index da0d8a8..0db218c 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -57,16 +57,20 @@
 #include <linux/ethtool.h>
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include <asm/div64.h>
 
 /* local include */
 #include "s2io.h"
 #include "s2io-regs.h"
 
-#define DRV_VERSION "Version 2.0.9.4"
+#define DRV_VERSION "2.0.11.2"
 
 /* S2io Driver name & version. */
 static char s2io_driver_name[] = "Neterion";
@@ -168,6 +172,11 @@
 	{"\n DRIVER STATISTICS"},
 	{"single_bit_ecc_errs"},
 	{"double_bit_ecc_errs"},
+	("lro_aggregated_pkts"),
+	("lro_flush_both_count"),
+	("lro_out_of_sequence_pkts"),
+	("lro_flush_due_to_max_pkts"),
+	("lro_avg_aggr_pkts"),
 };
 
 #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN
@@ -317,6 +326,12 @@
 static unsigned int rxsync_frequency = 3;
 /* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */
 static unsigned int intr_type = 0;
+/* Large receive offload feature */
+static unsigned int lro = 0;
+/* Max pkts to be aggregated by LRO at one time. If not specified,
+ * aggregation happens until we hit max IP pkt size(64K)
+ */
+static unsigned int lro_max_pkts = 0xFFFF;
 
 /*
  * S2IO device table.
@@ -1476,6 +1491,19 @@
 	writel((u32) (val64 >> 32), (add + 4));
 	val64 = readq(&bar0->mac_cfg);
 
+	/* Enable FCS stripping by adapter */
+	add = &bar0->mac_cfg;
+	val64 = readq(&bar0->mac_cfg);
+	val64 |= MAC_CFG_RMAC_STRIP_FCS;
+	if (nic->device_type == XFRAME_II_DEVICE)
+		writeq(val64, &bar0->mac_cfg);
+	else {
+		writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+		writel((u32) (val64), add);
+		writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+		writel((u32) (val64 >> 32), (add + 4));
+	}
+
 	/*
 	 * Set the time value to be inserted in the pause frame
 	 * generated by xena.
@@ -2569,6 +2597,8 @@
 #ifndef CONFIG_S2IO_NAPI
 	int pkt_cnt = 0;
 #endif
+	int i;
+
 	spin_lock(&nic->rx_lock);
 	if (atomic_read(&nic->card_state) == CARD_DOWN) {
 		DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n",
@@ -2661,6 +2691,18 @@
 			break;
 #endif
 	}
+	if (nic->lro) {
+		/* Clear all LRO sessions before exiting */
+		for (i=0; i<MAX_LRO_SESSIONS; i++) {
+			lro_t *lro = &nic->lro0_n[i];
+			if (lro->in_use) {
+				update_L3L4_header(nic, lro);
+				queue_rx_frame(lro->parent);
+				clear_lro_session(lro);
+			}
+		}
+	}
+
 	spin_unlock(&nic->rx_lock);
 }
 
@@ -3668,23 +3710,32 @@
 	 * else schedule a tasklet to reallocate the buffers.
 	 */
 	for (i = 0; i < config->rx_ring_num; i++) {
-		int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
-		int level = rx_buffer_level(sp, rxb_size, i);
+		if (!sp->lro) {
+			int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+			int level = rx_buffer_level(sp, rxb_size, i);
 
-		if ((level == PANIC) && (!TASKLET_IN_USE)) {
-			DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
-			DBG_PRINT(INTR_DBG, "PANIC levels\n");
-			if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
-				DBG_PRINT(ERR_DBG, "%s:Out of memory",
-					  dev->name);
-				DBG_PRINT(ERR_DBG, " in ISR!!\n");
+			if ((level == PANIC) && (!TASKLET_IN_USE)) {
+				DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", 
+							dev->name);
+				DBG_PRINT(INTR_DBG, "PANIC levels\n");
+				if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+					DBG_PRINT(ERR_DBG, "%s:Out of memory",
+						  dev->name);
+					DBG_PRINT(ERR_DBG, " in ISR!!\n");
+					clear_bit(0, (&sp->tasklet_status));
+					atomic_dec(&sp->isr_cnt);
+					return IRQ_HANDLED;
+				}
 				clear_bit(0, (&sp->tasklet_status));
-				atomic_dec(&sp->isr_cnt);
-				return IRQ_HANDLED;
+			} else if (level == LOW) {
+				tasklet_schedule(&sp->task);
 			}
-			clear_bit(0, (&sp->tasklet_status));
-		} else if (level == LOW) {
-			tasklet_schedule(&sp->task);
+		}
+		else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+				DBG_PRINT(ERR_DBG, "%s:Out of memory",
+							dev->name);
+				DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+				break;
 		}
 	}
 
@@ -3697,29 +3748,37 @@
 {
 	ring_info_t *ring = (ring_info_t *)dev_id;
 	nic_t *sp = ring->nic;
+	struct net_device *dev = (struct net_device *) dev_id;
 	int rxb_size, level, rng_n;
 
 	atomic_inc(&sp->isr_cnt);
 	rx_intr_handler(ring);
 
 	rng_n = ring->ring_no;
-	rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
-	level = rx_buffer_level(sp, rxb_size, rng_n);
+	if (!sp->lro) {
+		rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
+		level = rx_buffer_level(sp, rxb_size, rng_n);
 
-	if ((level == PANIC) && (!TASKLET_IN_USE)) {
-		int ret;
-		DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
-		DBG_PRINT(INTR_DBG, "PANIC levels\n");
-		if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
-			DBG_PRINT(ERR_DBG, "Out of memory in %s",
-				  __FUNCTION__);
+		if ((level == PANIC) && (!TASKLET_IN_USE)) {
+			int ret;
+			DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
+			DBG_PRINT(INTR_DBG, "PANIC levels\n");
+			if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
+				DBG_PRINT(ERR_DBG, "Out of memory in %s",
+					  __FUNCTION__);
+				clear_bit(0, (&sp->tasklet_status));
+				return IRQ_HANDLED;
+			}
 			clear_bit(0, (&sp->tasklet_status));
-			return IRQ_HANDLED;
+		} else if (level == LOW) {
+			tasklet_schedule(&sp->task);
 		}
-		clear_bit(0, (&sp->tasklet_status));
-	} else if (level == LOW) {
-		tasklet_schedule(&sp->task);
 	}
+	else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) {
+			DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name);
+			DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+	}
+
 	atomic_dec(&sp->isr_cnt);
 
 	return IRQ_HANDLED;
@@ -3875,24 +3934,33 @@
 	 */
 #ifndef CONFIG_S2IO_NAPI
 	for (i = 0; i < config->rx_ring_num; i++) {
-		int ret;
-		int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
-		int level = rx_buffer_level(sp, rxb_size, i);
+		if (!sp->lro) {
+			int ret;
+			int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+			int level = rx_buffer_level(sp, rxb_size, i);
 
-		if ((level == PANIC) && (!TASKLET_IN_USE)) {
-			DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
-			DBG_PRINT(INTR_DBG, "PANIC levels\n");
-			if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
-				DBG_PRINT(ERR_DBG, "%s:Out of memory",
-					  dev->name);
-				DBG_PRINT(ERR_DBG, " in ISR!!\n");
+			if ((level == PANIC) && (!TASKLET_IN_USE)) {
+				DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", 
+							dev->name);
+				DBG_PRINT(INTR_DBG, "PANIC levels\n");
+				if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+					DBG_PRINT(ERR_DBG, "%s:Out of memory",
+						  dev->name);
+					DBG_PRINT(ERR_DBG, " in ISR!!\n");
+					clear_bit(0, (&sp->tasklet_status));
+					atomic_dec(&sp->isr_cnt);
+					return IRQ_HANDLED;
+				}
 				clear_bit(0, (&sp->tasklet_status));
-				atomic_dec(&sp->isr_cnt);
-				return IRQ_HANDLED;
+			} else if (level == LOW) {
+				tasklet_schedule(&sp->task);
 			}
-			clear_bit(0, (&sp->tasklet_status));
-		} else if (level == LOW) {
-			tasklet_schedule(&sp->task);
+		}
+		else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+				DBG_PRINT(ERR_DBG, "%s:Out of memory",
+							dev->name);
+				DBG_PRINT(ERR_DBG, " in Rx intr!!\n");
+				break;
 		}
 	}
 #endif
@@ -5043,6 +5111,7 @@
 	int i = 0;
 	nic_t *sp = dev->priv;
 	StatInfo_t *stat_info = sp->mac_control.stats_info;
+	u64 tmp;
 
 	s2io_updt_stats(sp);
 	tmp_stats[i++] =
@@ -5134,6 +5203,16 @@
 	tmp_stats[i++] = 0;
 	tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs;
 	tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs;
+	tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt;
+	tmp_stats[i++] = stat_info->sw_stat.sending_both;
+	tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts;
+	tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts;
+	tmp = 0;
+	if (stat_info->sw_stat.num_aggregations) {
+		tmp = stat_info->sw_stat.sum_avg_pkts_aggregated;
+		do_div(tmp, stat_info->sw_stat.num_aggregations);
+	}
+	tmp_stats[i++] = tmp;
 }
 
 static int s2io_ethtool_get_regs_len(struct net_device *dev)
@@ -5515,6 +5594,14 @@
 	/* Setting its receive mode */
 	s2io_set_multicast(dev);
 
+	if (sp->lro) {
+		/* Initialize max aggregatable pkts based on MTU */
+		sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu;
+		/* Check if we can use(if specified) user provided value */
+		if (lro_max_pkts < sp->lro_max_aggr_per_sess)
+			sp->lro_max_aggr_per_sess = lro_max_pkts;
+	}
+
 	/* Enable tasklet for the device */
 	tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev);
 
@@ -5607,6 +5694,7 @@
 		((unsigned long) rxdp->Host_Control);
 	int ring_no = ring_data->ring_no;
 	u16 l3_csum, l4_csum;
+	lro_t *lro;
 
 	skb->dev = dev;
 	if (rxdp->Control_1 & RXD_T_CODE) {
@@ -5655,7 +5743,8 @@
 			skb_put(skb, buf2_len);
 	}
 
-	if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) &&
+	if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) ||
+	    (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
 	    (sp->rx_csum)) {
 		l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
 		l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
@@ -5666,6 +5755,54 @@
 			 * a flag in the RxD.
 			 */
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
+			if (sp->lro) {
+				u32 tcp_len;
+				u8 *tcp;
+				int ret = 0;
+
+				ret = s2io_club_tcp_session(skb->data, &tcp,
+						&tcp_len, &lro, rxdp, sp);
+				switch (ret) {
+					case 3: /* Begin anew */
+						lro->parent = skb;
+						goto aggregate;
+					case 1: /* Aggregate */
+					{
+						lro_append_pkt(sp, lro,
+							skb, tcp_len);
+						goto aggregate;
+					}
+					case 4: /* Flush session */
+					{
+						lro_append_pkt(sp, lro,
+							skb, tcp_len);
+						queue_rx_frame(lro->parent);
+						clear_lro_session(lro);
+						sp->mac_control.stats_info->
+						    sw_stat.flush_max_pkts++;
+						goto aggregate;
+					}
+					case 2: /* Flush both */
+						lro->parent->data_len =
+							lro->frags_len;
+						sp->mac_control.stats_info->
+						     sw_stat.sending_both++;
+						queue_rx_frame(lro->parent);
+						clear_lro_session(lro);
+						goto send_up;
+					case 0: /* sessions exceeded */
+					case 5: /*
+						 * First pkt in session not
+						 * L3/L4 aggregatable
+						 */
+						break;
+					default:
+						DBG_PRINT(ERR_DBG,
+							"%s: Samadhana!!\n",
+							 __FUNCTION__);
+						BUG();
+				}
+			}
 		} else {
 			/*
 			 * Packet with erroneous checksum, let the
@@ -5677,25 +5814,31 @@
 		skb->ip_summed = CHECKSUM_NONE;
 	}
 
-	skb->protocol = eth_type_trans(skb, dev);
+	if (!sp->lro) {
+		skb->protocol = eth_type_trans(skb, dev);
 #ifdef CONFIG_S2IO_NAPI
-	if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
-		/* Queueing the vlan frame to the upper layer */
-		vlan_hwaccel_receive_skb(skb, sp->vlgrp,
-			RXD_GET_VLAN_TAG(rxdp->Control_2));
-	} else {
-		netif_receive_skb(skb);
-	}
+		if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+			/* Queueing the vlan frame to the upper layer */
+			vlan_hwaccel_receive_skb(skb, sp->vlgrp,
+				RXD_GET_VLAN_TAG(rxdp->Control_2));
+		} else {
+			netif_receive_skb(skb);
+		}
 #else
-	if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
-		/* Queueing the vlan frame to the upper layer */
-		vlan_hwaccel_rx(skb, sp->vlgrp,
-			RXD_GET_VLAN_TAG(rxdp->Control_2));
-	} else {
-		netif_rx(skb);
-	}
+		if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+			/* Queueing the vlan frame to the upper layer */
+			vlan_hwaccel_rx(skb, sp->vlgrp,
+				RXD_GET_VLAN_TAG(rxdp->Control_2));
+		} else {
+			netif_rx(skb);
+		}
 #endif
+	} else {
+send_up:
+		queue_rx_frame(skb);
+	}		
 	dev->last_rx = jiffies;
+aggregate:
 	atomic_dec(&sp->rx_bufs_left[ring_no]);
 	return SUCCESS;
 }
@@ -5807,6 +5950,8 @@
 #endif
 module_param(rxsync_frequency, int, 0);
 module_param(intr_type, int, 0);
+module_param(lro, int, 0);
+module_param(lro_max_pkts, int, 0);
 
 /**
  *  s2io_init_nic - Initialization of the adapter .
@@ -5938,6 +6083,7 @@
 	else
 		sp->device_type = XFRAME_I_DEVICE;
 
+	sp->lro = lro;
 		
 	/* Initialize some PCI/PCI-X fields of the NIC. */
 	s2io_init_pci(sp);
@@ -6241,6 +6387,10 @@
 		DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been "
 			  "enabled\n",dev->name);
 
+	if (sp->lro)
+		DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
+			dev->name);
+
 	/* Initialize device name */
 	strcpy(sp->name, dev->name);
 	if (sp->device_type & XFRAME_II_DEVICE)
@@ -6351,3 +6501,318 @@
 
 module_init(s2io_starter);
 module_exit(s2io_closer);
+
+static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip, 
+		struct tcphdr **tcp, RxD_t *rxdp)
+{
+	int ip_off;
+	u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len;
+
+	if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) {
+		DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n",
+			  __FUNCTION__);
+		return -1;
+	}
+
+	/* TODO:
+	 * By default the VLAN field in the MAC is stripped by the card, if this
+	 * feature is turned off in rx_pa_cfg register, then the ip_off field
+	 * has to be shifted by a further 2 bytes
+	 */
+	switch (l2_type) {
+		case 0: /* DIX type */
+		case 4: /* DIX type with VLAN */
+			ip_off = HEADER_ETHERNET_II_802_3_SIZE;
+			break;
+		/* LLC, SNAP etc are considered non-mergeable */
+		default:
+			return -1;
+	}
+
+	*ip = (struct iphdr *)((u8 *)buffer + ip_off);
+	ip_len = (u8)((*ip)->ihl);
+	ip_len <<= 2;
+	*tcp = (struct tcphdr *)((unsigned long)*ip + ip_len);
+
+	return 0;
+}
+
+static int check_for_socket_match(lro_t *lro, struct iphdr *ip,
+				  struct tcphdr *tcp)
+{
+	DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+	if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) ||
+	   (lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest))
+		return -1;
+	return 0;
+}
+
+static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp)
+{
+	return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2));
+}
+
+static void initiate_new_session(lro_t *lro, u8 *l2h,
+		     struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+	DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+	lro->l2h = l2h;
+	lro->iph = ip;
+	lro->tcph = tcp;
+	lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq);
+	lro->tcp_ack = ntohl(tcp->ack_seq);
+	lro->sg_num = 1;
+	lro->total_len = ntohs(ip->tot_len);
+	lro->frags_len = 0;
+	/* 
+	 * check if we saw TCP timestamp. Other consistency checks have
+	 * already been done.
+ 	 */
+	if (tcp->doff == 8) {
+		u32 *ptr;
+		ptr = (u32 *)(tcp+1);
+		lro->saw_ts = 1;
+		lro->cur_tsval = *(ptr+1);
+		lro->cur_tsecr = *(ptr+2);
+	}
+	lro->in_use = 1;
+}
+
+static void update_L3L4_header(nic_t *sp, lro_t *lro)
+{
+	struct iphdr *ip = lro->iph;
+	struct tcphdr *tcp = lro->tcph;
+	u16 nchk;
+	StatInfo_t *statinfo = sp->mac_control.stats_info;
+	DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+	/* Update L3 header */
+	ip->tot_len = htons(lro->total_len);
+	ip->check = 0;
+	nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl);
+	ip->check = nchk;
+
+	/* Update L4 header */
+	tcp->ack_seq = lro->tcp_ack;
+	tcp->window = lro->window;
+
+	/* Update tsecr field if this session has timestamps enabled */
+	if (lro->saw_ts) {
+		u32 *ptr = (u32 *)(tcp + 1);
+		*(ptr+2) = lro->cur_tsecr;
+	}
+
+	/* Update counters required for calculation of
+	 * average no. of packets aggregated.
+	 */
+	statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num;
+	statinfo->sw_stat.num_aggregations++;
+}
+
+static void aggregate_new_rx(lro_t *lro, struct iphdr *ip,
+		struct tcphdr *tcp, u32 l4_pyld)
+{
+	DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+	lro->total_len += l4_pyld;
+	lro->frags_len += l4_pyld;
+	lro->tcp_next_seq += l4_pyld;
+	lro->sg_num++;
+
+	/* Update ack seq no. and window ad(from this pkt) in LRO object */
+	lro->tcp_ack = tcp->ack_seq;
+	lro->window = tcp->window;
+	
+	if (lro->saw_ts) {
+		u32 *ptr;
+		/* Update tsecr and tsval from this packet */
+		ptr = (u32 *) (tcp + 1);
+		lro->cur_tsval = *(ptr + 1); 
+		lro->cur_tsecr = *(ptr + 2);
+	}
+}
+
+static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip,
+				    struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+	u8 *ptr;
+
+	DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+	if (!tcp_pyld_len) {
+		/* Runt frame or a pure ack */
+		return -1;
+	}
+
+	if (ip->ihl != 5) /* IP has options */
+		return -1;
+
+	if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin ||
+								!tcp->ack) {
+		/*
+		 * Currently recognize only the ack control word and
+		 * any other control field being set would result in
+		 * flushing the LRO session
+		 */
+		return -1;
+	}
+
+	/* 
+	 * Allow only one TCP timestamp option. Don't aggregate if
+	 * any other options are detected.
+	 */
+	if (tcp->doff != 5 && tcp->doff != 8)
+		return -1;
+
+	if (tcp->doff == 8) {
+		ptr = (u8 *)(tcp + 1);	
+		while (*ptr == TCPOPT_NOP)
+			ptr++;
+		if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP)
+			return -1;
+
+		/* Ensure timestamp value increases monotonically */
+		if (l_lro)
+			if (l_lro->cur_tsval > *((u32 *)(ptr+2)))
+				return -1;
+
+		/* timestamp echo reply should be non-zero */
+		if (*((u32 *)(ptr+6)) == 0) 
+			return -1;
+	}
+
+	return 0;
+}
+
+static int
+s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro,
+		      RxD_t *rxdp, nic_t *sp)
+{
+	struct iphdr *ip;
+	struct tcphdr *tcph;
+	int ret = 0, i;
+
+	if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp,
+					 rxdp))) {
+		DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n",
+			  ip->saddr, ip->daddr);
+	} else {
+		return ret;
+	}
+
+	tcph = (struct tcphdr *)*tcp;
+	*tcp_len = get_l4_pyld_length(ip, tcph);
+	for (i=0; i<MAX_LRO_SESSIONS; i++) {
+		lro_t *l_lro = &sp->lro0_n[i];
+		if (l_lro->in_use) {
+			if (check_for_socket_match(l_lro, ip, tcph))
+				continue;
+			/* Sock pair matched */
+			*lro = l_lro;
+
+			if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) {
+				DBG_PRINT(INFO_DBG, "%s:Out of order. expected "
+					  "0x%x, actual 0x%x\n", __FUNCTION__,
+					  (*lro)->tcp_next_seq,
+					  ntohl(tcph->seq));
+
+				sp->mac_control.stats_info->
+				   sw_stat.outof_sequence_pkts++;
+				ret = 2;
+				break;
+			}
+
+			if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len))
+				ret = 1; /* Aggregate */
+			else
+				ret = 2; /* Flush both */
+			break;
+		}
+	}
+
+	if (ret == 0) {
+		/* Before searching for available LRO objects,
+		 * check if the pkt is L3/L4 aggregatable. If not
+		 * don't create new LRO session. Just send this
+		 * packet up.
+		 */
+		if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) {
+			return 5;
+		}
+
+		for (i=0; i<MAX_LRO_SESSIONS; i++) {
+			lro_t *l_lro = &sp->lro0_n[i];
+			if (!(l_lro->in_use)) {
+				*lro = l_lro;
+				ret = 3; /* Begin anew */
+				break;
+			}
+		}
+	}
+
+	if (ret == 0) { /* sessions exceeded */
+		DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n",
+			  __FUNCTION__);
+		*lro = NULL;
+		return ret;
+	}
+
+	switch (ret) {
+		case 3:
+			initiate_new_session(*lro, buffer, ip, tcph, *tcp_len);
+			break;
+		case 2:
+			update_L3L4_header(sp, *lro);
+			break;
+		case 1:
+			aggregate_new_rx(*lro, ip, tcph, *tcp_len);
+			if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) {
+				update_L3L4_header(sp, *lro);
+				ret = 4; /* Flush the LRO */
+			}
+			break;
+		default:
+			DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n",
+				__FUNCTION__);
+			break;
+	}
+
+	return ret;
+}
+
+static void clear_lro_session(lro_t *lro)
+{
+	static u16 lro_struct_size = sizeof(lro_t);
+
+	memset(lro, 0, lro_struct_size);
+}
+
+static void queue_rx_frame(struct sk_buff *skb)
+{
+	struct net_device *dev = skb->dev;
+
+	skb->protocol = eth_type_trans(skb, dev);
+#ifdef CONFIG_S2IO_NAPI
+	netif_receive_skb(skb);
+#else
+	netif_rx(skb);
+#endif
+}
+
+static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb,
+			   u32 tcp_len)
+{
+	struct sk_buff *tmp, *first = lro->parent;
+
+	first->len += tcp_len;
+	first->data_len = lro->frags_len;
+	skb_pull(skb, (skb->len - tcp_len));
+	if ((tmp = skb_shinfo(first)->frag_list)) {
+		while (tmp->next)
+			tmp = tmp->next;
+		tmp->next = skb;
+	}
+	else
+		skb_shinfo(first)->frag_list = skb;
+	sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++;
+	return;
+}
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index 68ae336..0a0b5b2 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -78,6 +78,13 @@
 typedef struct {
 	unsigned long long single_ecc_errs;
 	unsigned long long double_ecc_errs;
+	/* LRO statistics */
+	unsigned long long clubbed_frms_cnt;
+	unsigned long long sending_both;
+	unsigned long long outof_sequence_pkts;
+	unsigned long long flush_max_pkts;
+	unsigned long long sum_avg_pkts_aggregated;
+	unsigned long long num_aggregations;
 } swStat_t;
 
 /* The statistics block of Xena */
@@ -680,6 +687,24 @@
 	u64 data;
 };
 
+/* Data structure to represent a LRO session */
+typedef struct lro {
+	struct sk_buff	*parent;
+	u8		*l2h;
+	struct iphdr	*iph;
+	struct tcphdr	*tcph;
+	u32		tcp_next_seq;
+	u32		tcp_ack;
+	int		total_len;
+	int		frags_len;
+	int		sg_num;
+	int		in_use;
+	u16		window;
+	u32		cur_tsval;
+	u32		cur_tsecr;
+	u8		saw_ts;
+}lro_t;
+
 /* Structure representing one instance of the NIC */
 struct s2io_nic {
 	int rxd_mode;
@@ -784,6 +809,13 @@
 #define XFRAME_II_DEVICE	2
 	u8 device_type;
 
+#define MAX_LRO_SESSIONS	32
+	lro_t lro0_n[MAX_LRO_SESSIONS];
+	unsigned long	clubbed_frms_cnt;
+	unsigned long	sending_both;
+	u8		lro;
+	u16		lro_max_aggr_per_sess;
+
 #define INTA	0
 #define MSI	1
 #define MSI_X	2
@@ -937,4 +969,10 @@
 static int s2io_card_up(nic_t *nic);
 static int get_xena_rev_id(struct pci_dev *pdev);
 static void restore_xmsi_data(nic_t *nic);
+
+static int s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro, RxD_t *rxdp, nic_t *sp);
+static void clear_lro_session(lro_t *lro);
+static void queue_rx_frame(struct sk_buff *skb);
+static void update_L3L4_header(nic_t *sp, lro_t *lro);
+static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb, u32 tcp_len);
 #endif				/* _S2IO_H */