Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/jkirsher/net-next

Conflicts:
	drivers/net/ethernet/intel/e1000e/netdev.c
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 8533ad7..fa72052 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -461,8 +461,9 @@
 
 #define E1000_RX_DESC_PS(R, i)	    \
 	(&(((union e1000_rx_desc_packet_split *)((R).desc))[i]))
+#define E1000_RX_DESC_EXT(R, i)	    \
+	(&(((union e1000_rx_desc_extended *)((R).desc))[i]))
 #define E1000_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
-#define E1000_RX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_rx_desc)
 #define E1000_TX_DESC(R, i)		E1000_GET_DESC(R, i, e1000_tx_desc)
 #define E1000_CONTEXT_DESC(R, i)	E1000_GET_DESC(R, i, e1000_context_desc)
 
diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c
index 6a0526a..e0cbd6a 100644
--- a/drivers/net/ethernet/intel/e1000e/ethtool.c
+++ b/drivers/net/ethernet/intel/e1000e/ethtool.c
@@ -1195,7 +1195,7 @@
 		goto err_nomem;
 	}
 
-	rx_ring->size = rx_ring->count * sizeof(struct e1000_rx_desc);
+	rx_ring->size = rx_ring->count * sizeof(union e1000_rx_desc_extended);
 	rx_ring->desc = dma_alloc_coherent(&pdev->dev, rx_ring->size,
 					   &rx_ring->dma, GFP_KERNEL);
 	if (!rx_ring->desc) {
@@ -1221,7 +1221,7 @@
 	ew32(RCTL, rctl);
 
 	for (i = 0; i < rx_ring->count; i++) {
-		struct e1000_rx_desc *rx_desc = E1000_RX_DESC(*rx_ring, i);
+		union e1000_rx_desc_extended *rx_desc;
 		struct sk_buff *skb;
 
 		skb = alloc_skb(2048 + NET_IP_ALIGN, GFP_KERNEL);
@@ -1239,8 +1239,9 @@
 			ret_val = 8;
 			goto err_nomem;
 		}
-		rx_desc->buffer_addr =
-			cpu_to_le64(rx_ring->buffer_info[i].dma);
+		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+		rx_desc->read.buffer_addr =
+		    cpu_to_le64(rx_ring->buffer_info[i].dma);
 		memset(skb->data, 0x00, skb->len);
 	}
 
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index b1f925b..9742bc6 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -56,7 +56,7 @@
 
 #define DRV_EXTRAVERSION "-k"
 
-#define DRV_VERSION "1.4.4" DRV_EXTRAVERSION
+#define DRV_VERSION "1.5.1" DRV_EXTRAVERSION
 char e1000e_driver_name[] = "e1000e";
 const char e1000e_driver_version[] = DRV_VERSION;
 
@@ -192,7 +192,7 @@
 	struct e1000_buffer *buffer_info;
 	struct e1000_ring *rx_ring = adapter->rx_ring;
 	union e1000_rx_desc_packet_split *rx_desc_ps;
-	struct e1000_rx_desc *rx_desc;
+	union e1000_rx_desc_extended *rx_desc;
 	struct my_u1 {
 		u64 a;
 		u64 b;
@@ -399,41 +399,70 @@
 		break;
 	default:
 	case 0:
-		/* Legacy Receive Descriptor Format
+		/* Extended Receive Descriptor (Read) Format
 		 *
-		 * +-----------------------------------------------------+
-		 * |                Buffer Address [63:0]                |
-		 * +-----------------------------------------------------+
-		 * | VLAN Tag | Errors | Status 0 | Packet csum | Length |
-		 * +-----------------------------------------------------+
-		 * 63       48 47    40 39      32 31         16 15      0
+		 *   +-----------------------------------------------------+
+		 * 0 |                Buffer Address [63:0]                |
+		 *   +-----------------------------------------------------+
+		 * 8 |                      Reserved                       |
+		 *   +-----------------------------------------------------+
 		 */
-		printk(KERN_INFO "Rl[desc]     [address 63:0  ] "
-		       "[vl er S cks ln] [bi->dma       ] [bi->skb] "
-		       "<-- Legacy format\n");
-		for (i = 0; rx_ring->desc && (i < rx_ring->count); i++) {
-			rx_desc = E1000_RX_DESC(*rx_ring, i);
+		printk(KERN_INFO "R  [desc]      [buf addr 63:0 ] "
+		       "[reserved 63:0 ] [bi->dma       ] "
+		       "[bi->skb] <-- Ext (Read) format\n");
+		/* Extended Receive Descriptor (Write-Back) Format
+		 *
+		 *   63       48 47    32 31    24 23            4 3        0
+		 *   +------------------------------------------------------+
+		 *   |     RSS Hash      |        |               |         |
+		 * 0 +-------------------+  Rsvd  |   Reserved    | MRQ RSS |
+		 *   | Packet   | IP     |        |               |  Type   |
+		 *   | Checksum | Ident  |        |               |         |
+		 *   +------------------------------------------------------+
+		 * 8 | VLAN Tag | Length | Extended Error | Extended Status |
+		 *   +------------------------------------------------------+
+		 *   63       48 47    32 31            20 19               0
+		 */
+		printk(KERN_INFO "RWB[desc]      [cs ipid    mrq] "
+		       "[vt   ln xe  xs] "
+		       "[bi->skb] <-- Ext (Write-Back) format\n");
+
+		for (i = 0; i < rx_ring->count; i++) {
 			buffer_info = &rx_ring->buffer_info[i];
-			u0 = (struct my_u0 *)rx_desc;
-			printk(KERN_INFO "Rl[0x%03X]    %016llX %016llX "
-			       "%016llX %p", i,
-			       (unsigned long long)le64_to_cpu(u0->a),
-			       (unsigned long long)le64_to_cpu(u0->b),
-			       (unsigned long long)buffer_info->dma,
-			       buffer_info->skb);
+			rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+			u1 = (struct my_u1 *)rx_desc;
+			staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
+			if (staterr & E1000_RXD_STAT_DD) {
+				/* Descriptor Done */
+				printk(KERN_INFO "RWB[0x%03X]     %016llX "
+				       "%016llX ---------------- %p", i,
+				       (unsigned long long)le64_to_cpu(u1->a),
+				       (unsigned long long)le64_to_cpu(u1->b),
+				       buffer_info->skb);
+			} else {
+				printk(KERN_INFO "R  [0x%03X]     %016llX "
+				       "%016llX %016llX %p", i,
+				       (unsigned long long)le64_to_cpu(u1->a),
+				       (unsigned long long)le64_to_cpu(u1->b),
+				       (unsigned long long)buffer_info->dma,
+				       buffer_info->skb);
+
+				if (netif_msg_pktdata(adapter))
+					print_hex_dump(KERN_INFO, "",
+						       DUMP_PREFIX_ADDRESS, 16,
+						       1,
+						       phys_to_virt
+						       (buffer_info->dma),
+						       adapter->rx_buffer_len,
+						       true);
+			}
+
 			if (i == rx_ring->next_to_use)
 				printk(KERN_CONT " NTU\n");
 			else if (i == rx_ring->next_to_clean)
 				printk(KERN_CONT " NTC\n");
 			else
 				printk(KERN_CONT "\n");
-
-			if (netif_msg_pktdata(adapter))
-				print_hex_dump(KERN_INFO, "",
-					       DUMP_PREFIX_ADDRESS,
-					       16, 1,
-					       phys_to_virt(buffer_info->dma),
-					       adapter->rx_buffer_len, true);
 		}
 	}
 
@@ -576,7 +605,7 @@
 }
 
 /**
- * e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
+ * e1000_alloc_rx_buffers - Replace used receive buffers
  * @adapter: address of board private structure
  **/
 static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
@@ -585,7 +614,7 @@
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	struct e1000_ring *rx_ring = adapter->rx_ring;
-	struct e1000_rx_desc *rx_desc;
+	union e1000_rx_desc_extended *rx_desc;
 	struct e1000_buffer *buffer_info;
 	struct sk_buff *skb;
 	unsigned int i;
@@ -619,8 +648,8 @@
 			break;
 		}
 
-		rx_desc = E1000_RX_DESC(*rx_ring, i);
-		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
 
 		if (unlikely(!(i & (E1000_RX_BUFFER_WRITE - 1)))) {
 			/*
@@ -761,7 +790,7 @@
 {
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
-	struct e1000_rx_desc *rx_desc;
+	union e1000_rx_desc_extended *rx_desc;
 	struct e1000_ring *rx_ring = adapter->rx_ring;
 	struct e1000_buffer *buffer_info;
 	struct sk_buff *skb;
@@ -802,8 +831,8 @@
 			                                PAGE_SIZE,
 							DMA_FROM_DEVICE);
 
-		rx_desc = E1000_RX_DESC(*rx_ring, i);
-		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
+		rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+		rx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
 
 		if (unlikely(++i == rx_ring->count))
 			i = 0;
@@ -841,28 +870,27 @@
 	struct pci_dev *pdev = adapter->pdev;
 	struct e1000_hw *hw = &adapter->hw;
 	struct e1000_ring *rx_ring = adapter->rx_ring;
-	struct e1000_rx_desc *rx_desc, *next_rxd;
+	union e1000_rx_desc_extended *rx_desc, *next_rxd;
 	struct e1000_buffer *buffer_info, *next_buffer;
-	u32 length;
+	u32 length, staterr;
 	unsigned int i;
 	int cleaned_count = 0;
 	bool cleaned = 0;
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 
 	i = rx_ring->next_to_clean;
-	rx_desc = E1000_RX_DESC(*rx_ring, i);
+	rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 	buffer_info = &rx_ring->buffer_info[i];
 
-	while (rx_desc->status & E1000_RXD_STAT_DD) {
+	while (staterr & E1000_RXD_STAT_DD) {
 		struct sk_buff *skb;
-		u8 status;
 
 		if (*work_done >= work_to_do)
 			break;
 		(*work_done)++;
 		rmb();	/* read descriptor and rx_buffer_info after status DD */
 
-		status = rx_desc->status;
 		skb = buffer_info->skb;
 		buffer_info->skb = NULL;
 
@@ -871,7 +899,7 @@
 		i++;
 		if (i == rx_ring->count)
 			i = 0;
-		next_rxd = E1000_RX_DESC(*rx_ring, i);
+		next_rxd = E1000_RX_DESC_EXT(*rx_ring, i);
 		prefetch(next_rxd);
 
 		next_buffer = &rx_ring->buffer_info[i];
@@ -884,7 +912,7 @@
 				 DMA_FROM_DEVICE);
 		buffer_info->dma = 0;
 
-		length = le16_to_cpu(rx_desc->length);
+		length = le16_to_cpu(rx_desc->wb.upper.length);
 
 		/*
 		 * !EOP means multiple descriptors were used to store a single
@@ -893,7 +921,7 @@
 		 * next frame that _does_ have the EOP bit set, as it is by
 		 * definition only a frame fragment
 		 */
-		if (unlikely(!(status & E1000_RXD_STAT_EOP)))
+		if (unlikely(!(staterr & E1000_RXD_STAT_EOP)))
 			adapter->flags2 |= FLAG2_IS_DISCARDING;
 
 		if (adapter->flags2 & FLAG2_IS_DISCARDING) {
@@ -901,12 +929,12 @@
 			e_dbg("Receive packet consumed multiple buffers\n");
 			/* recycle */
 			buffer_info->skb = skb;
-			if (status & E1000_RXD_STAT_EOP)
+			if (staterr & E1000_RXD_STAT_EOP)
 				adapter->flags2 &= ~FLAG2_IS_DISCARDING;
 			goto next_desc;
 		}
 
-		if (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
+		if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
 			/* recycle */
 			buffer_info->skb = skb;
 			goto next_desc;
@@ -944,15 +972,15 @@
 		skb_put(skb, length);
 
 		/* Receive Checksum Offload */
-		e1000_rx_checksum(adapter,
-				  (u32)(status) |
-				  ((u32)(rx_desc->errors) << 24),
-				  le16_to_cpu(rx_desc->csum), skb);
+		e1000_rx_checksum(adapter, staterr,
+				  le16_to_cpu(rx_desc->wb.lower.hi_dword.
+					      csum_ip.csum), skb);
 
-		e1000_receive_skb(adapter, netdev, skb,status,rx_desc->special);
+		e1000_receive_skb(adapter, netdev, skb, staterr,
+				  rx_desc->wb.upper.vlan);
 
 next_desc:
-		rx_desc->status = 0;
+		rx_desc->wb.upper.status_error &= cpu_to_le32(~0xFF);
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= E1000_RX_BUFFER_WRITE) {
@@ -964,6 +992,8 @@
 		/* use prefetched values */
 		rx_desc = next_rxd;
 		buffer_info = next_buffer;
+
+		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 	}
 	rx_ring->next_to_clean = i;
 
@@ -1347,35 +1377,34 @@
 	struct net_device *netdev = adapter->netdev;
 	struct pci_dev *pdev = adapter->pdev;
 	struct e1000_ring *rx_ring = adapter->rx_ring;
-	struct e1000_rx_desc *rx_desc, *next_rxd;
+	union e1000_rx_desc_extended *rx_desc, *next_rxd;
 	struct e1000_buffer *buffer_info, *next_buffer;
-	u32 length;
+	u32 length, staterr;
 	unsigned int i;
 	int cleaned_count = 0;
 	bool cleaned = false;
 	unsigned int total_rx_bytes=0, total_rx_packets=0;
 
 	i = rx_ring->next_to_clean;
-	rx_desc = E1000_RX_DESC(*rx_ring, i);
+	rx_desc = E1000_RX_DESC_EXT(*rx_ring, i);
+	staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 	buffer_info = &rx_ring->buffer_info[i];
 
-	while (rx_desc->status & E1000_RXD_STAT_DD) {
+	while (staterr & E1000_RXD_STAT_DD) {
 		struct sk_buff *skb;
-		u8 status;
 
 		if (*work_done >= work_to_do)
 			break;
 		(*work_done)++;
 		rmb();	/* read descriptor and rx_buffer_info after status DD */
 
-		status = rx_desc->status;
 		skb = buffer_info->skb;
 		buffer_info->skb = NULL;
 
 		++i;
 		if (i == rx_ring->count)
 			i = 0;
-		next_rxd = E1000_RX_DESC(*rx_ring, i);
+		next_rxd = E1000_RX_DESC_EXT(*rx_ring, i);
 		prefetch(next_rxd);
 
 		next_buffer = &rx_ring->buffer_info[i];
@@ -1386,23 +1415,22 @@
 			       DMA_FROM_DEVICE);
 		buffer_info->dma = 0;
 
-		length = le16_to_cpu(rx_desc->length);
+		length = le16_to_cpu(rx_desc->wb.upper.length);
 
 		/* errors is only valid for DD + EOP descriptors */
-		if (unlikely((status & E1000_RXD_STAT_EOP) &&
-		    (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) {
-				/* recycle both page and skb */
-				buffer_info->skb = skb;
-				/* an error means any chain goes out the window
-				 * too */
-				if (rx_ring->rx_skb_top)
-					dev_kfree_skb_irq(rx_ring->rx_skb_top);
-				rx_ring->rx_skb_top = NULL;
-				goto next_desc;
+		if (unlikely((staterr & E1000_RXD_STAT_EOP) &&
+			     (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK))) {
+			/* recycle both page and skb */
+			buffer_info->skb = skb;
+			/* an error means any chain goes out the window too */
+			if (rx_ring->rx_skb_top)
+				dev_kfree_skb_irq(rx_ring->rx_skb_top);
+			rx_ring->rx_skb_top = NULL;
+			goto next_desc;
 		}
 
 #define rxtop (rx_ring->rx_skb_top)
-		if (!(status & E1000_RXD_STAT_EOP)) {
+		if (!(staterr & E1000_RXD_STAT_EOP)) {
 			/* this descriptor is only the beginning (or middle) */
 			if (!rxtop) {
 				/* this is the beginning of a chain */
@@ -1457,10 +1485,9 @@
 		}
 
 		/* Receive Checksum Offload XXX recompute due to CRC strip? */
-		e1000_rx_checksum(adapter,
-		                  (u32)(status) |
-		                  ((u32)(rx_desc->errors) << 24),
-		                  le16_to_cpu(rx_desc->csum), skb);
+		e1000_rx_checksum(adapter, staterr,
+				  le16_to_cpu(rx_desc->wb.lower.hi_dword.
+					      csum_ip.csum), skb);
 
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
@@ -1473,11 +1500,11 @@
 			goto next_desc;
 		}
 
-		e1000_receive_skb(adapter, netdev, skb, status,
-		                  rx_desc->special);
+		e1000_receive_skb(adapter, netdev, skb, staterr,
+				  rx_desc->wb.upper.vlan);
 
 next_desc:
-		rx_desc->status = 0;
+		rx_desc->wb.upper.status_error &= cpu_to_le32(~0xFF);
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
@@ -1489,6 +1516,8 @@
 		/* use prefetched values */
 		rx_desc = next_rxd;
 		buffer_info = next_buffer;
+
+		staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
 	}
 	rx_ring->next_to_clean = i;
 
@@ -2887,6 +2916,10 @@
 		break;
 	}
 
+	/* Enable Extended Status in all Receive Descriptors */
+	rfctl = er32(RFCTL);
+	rfctl |= E1000_RFCTL_EXTEN;
+
 	/*
 	 * 82571 and greater support packet-split where the protocol
 	 * header is placed in skb->data and the packet data is
@@ -2912,9 +2945,6 @@
 	if (adapter->rx_ps_pages) {
 		u32 psrctl = 0;
 
-		/* Configure extra packet-split registers */
-		rfctl = er32(RFCTL);
-		rfctl |= E1000_RFCTL_EXTEN;
 		/*
 		 * disable packet split support for IPv6 extension headers,
 		 * because some malformed IPv6 headers can hang the Rx
@@ -2922,8 +2952,6 @@
 		rfctl |= (E1000_RFCTL_IPV6_EX_DIS |
 			  E1000_RFCTL_NEW_IPV6_EXT_DIS);
 
-		ew32(RFCTL, rfctl);
-
 		/* Enable Packet split descriptors */
 		rctl |= E1000_RCTL_DTYP_PS;
 
@@ -2946,6 +2974,7 @@
 		ew32(PSRCTL, psrctl);
 	}
 
+	ew32(RFCTL, rfctl);
 	ew32(RCTL, rctl);
 	/* just started the receive unit, no need to restart */
 	adapter->flags &= ~FLAG_RX_RESTART_NOW;
@@ -2971,11 +3000,11 @@
 		adapter->clean_rx = e1000_clean_rx_irq_ps;
 		adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps;
 	} else if (adapter->netdev->mtu > ETH_FRAME_LEN + ETH_FCS_LEN) {
-		rdlen = rx_ring->count * sizeof(struct e1000_rx_desc);
+		rdlen = rx_ring->count * sizeof(union e1000_rx_desc_extended);
 		adapter->clean_rx = e1000_clean_jumbo_rx_irq;
 		adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
 	} else {
-		rdlen = rx_ring->count * sizeof(struct e1000_rx_desc);
+		rdlen = rx_ring->count * sizeof(union e1000_rx_desc_extended);
 		adapter->clean_rx = e1000_clean_rx_irq;
 		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index e04a8e4..378ce46 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -91,13 +91,16 @@
 #define IXGBE_RX_BUFFER_WRITE	16	/* Must be power of 2 */
 
 #define IXGBE_TX_FLAGS_CSUM		(u32)(1)
-#define IXGBE_TX_FLAGS_VLAN		(u32)(1 << 1)
-#define IXGBE_TX_FLAGS_TSO		(u32)(1 << 2)
-#define IXGBE_TX_FLAGS_IPV4		(u32)(1 << 3)
-#define IXGBE_TX_FLAGS_FCOE		(u32)(1 << 4)
-#define IXGBE_TX_FLAGS_FSO		(u32)(1 << 5)
+#define IXGBE_TX_FLAGS_HW_VLAN		(u32)(1 << 1)
+#define IXGBE_TX_FLAGS_SW_VLAN		(u32)(1 << 2)
+#define IXGBE_TX_FLAGS_TSO		(u32)(1 << 3)
+#define IXGBE_TX_FLAGS_IPV4		(u32)(1 << 4)
+#define IXGBE_TX_FLAGS_FCOE		(u32)(1 << 5)
+#define IXGBE_TX_FLAGS_FSO		(u32)(1 << 6)
+#define IXGBE_TX_FLAGS_MAPPED_AS_PAGE	(u32)(1 << 7)
 #define IXGBE_TX_FLAGS_VLAN_MASK	0xffff0000
-#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK   0x0000e000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK	0xe0000000
+#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT  29
 #define IXGBE_TX_FLAGS_VLAN_SHIFT	16
 
 #define IXGBE_MAX_RSC_INT_RATE          162760
@@ -141,14 +144,14 @@
 /* wrapper around a pointer to a socket buffer,
  * so a DMA handle can be stored along with the buffer */
 struct ixgbe_tx_buffer {
-	struct sk_buff *skb;
-	dma_addr_t dma;
+	union ixgbe_adv_tx_desc *next_to_watch;
 	unsigned long time_stamp;
-	u16 length;
-	u16 next_to_watch;
-	unsigned int bytecount;
+	dma_addr_t dma;
+	u32 length;
+	u32 tx_flags;
+	struct sk_buff *skb;
+	u32 bytecount;
 	u16 gso_segs;
-	u8 mapped_as_page;
 };
 
 struct ixgbe_rx_buffer {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
index 0ace6ce..da6d53e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_nl.c
@@ -414,7 +414,7 @@
 		u8 prio_tc[MAX_TRAFFIC_CLASS] = {0, 1, 2, 3, 4, 5, 6, 7};
 		int max_frame = adapter->netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
 
-#ifdef CONFIG_FCOE
+#ifdef IXGBE_FCOE
 		if (adapter->netdev->features & NETIF_F_FCOE_MTU)
 			max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
 #endif
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
index 824edae..e9b992f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c
@@ -241,10 +241,12 @@
 	 */
 	if (lastsize == bufflen) {
 		if (j >= IXGBE_BUFFCNT_MAX) {
-			e_err(drv, "xid=%x:%d,%d,%d:addr=%llx "
-				"not enough user buffers. We need an extra "
-				"buffer because lastsize is bufflen.\n",
-				xid, i, j, dmacount, (u64)addr);
+			printk_once("Will NOT use DDP since there are not "
+				    "enough user buffers. We need an  extra "
+				    "buffer because lastsize is bufflen. "
+				    "xid=%x:%d,%d,%d:addr=%llx\n",
+				    xid, i, j, dmacount, (u64)addr);
+
 			goto out_noddp_free;
 		}
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b73194c..e8aad76 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -385,7 +385,7 @@
 		tx_ring = adapter->tx_ring[n];
 		tx_buffer_info =
 			&tx_ring->tx_buffer_info[tx_ring->next_to_clean];
-		pr_info(" %5d %5X %5X %016llX %04X %3X %016llX\n",
+		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
 			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
 			   (u64)tx_buffer_info->dma,
 			   tx_buffer_info->length,
@@ -424,7 +424,7 @@
 			tx_buffer_info = &tx_ring->tx_buffer_info[i];
 			u0 = (struct my_u0 *)tx_desc;
 			pr_info("T [0x%03X]    %016llX %016llX %016llX"
-				" %04X  %3X %016llX %p", i,
+				" %04X  %p %016llX %p", i,
 				le64_to_cpu(u0->a),
 				le64_to_cpu(u0->b),
 				(u64)tx_buffer_info->dma,
@@ -643,27 +643,31 @@
 	}
 }
 
+static inline void ixgbe_unmap_tx_resource(struct ixgbe_ring *ring,
+					   struct ixgbe_tx_buffer *tx_buffer)
+{
+	if (tx_buffer->dma) {
+		if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_MAPPED_AS_PAGE)
+			dma_unmap_page(ring->dev,
+			               tx_buffer->dma,
+			               tx_buffer->length,
+			               DMA_TO_DEVICE);
+		else
+			dma_unmap_single(ring->dev,
+			                 tx_buffer->dma,
+			                 tx_buffer->length,
+			                 DMA_TO_DEVICE);
+	}
+	tx_buffer->dma = 0;
+}
+
 void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *tx_ring,
 				      struct ixgbe_tx_buffer *tx_buffer_info)
 {
-	if (tx_buffer_info->dma) {
-		if (tx_buffer_info->mapped_as_page)
-			dma_unmap_page(tx_ring->dev,
-				       tx_buffer_info->dma,
-				       tx_buffer_info->length,
-				       DMA_TO_DEVICE);
-		else
-			dma_unmap_single(tx_ring->dev,
-					 tx_buffer_info->dma,
-					 tx_buffer_info->length,
-					 DMA_TO_DEVICE);
-		tx_buffer_info->dma = 0;
-	}
-	if (tx_buffer_info->skb) {
+	ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info);
+	if (tx_buffer_info->skb)
 		dev_kfree_skb_any(tx_buffer_info->skb);
-		tx_buffer_info->skb = NULL;
-	}
-	tx_buffer_info->time_stamp = 0;
+	tx_buffer_info->skb = NULL;
 	/* tx_buffer_info must be completely set up in the transmit path */
 }
 
@@ -797,56 +801,72 @@
 			       struct ixgbe_ring *tx_ring)
 {
 	struct ixgbe_adapter *adapter = q_vector->adapter;
-	union ixgbe_adv_tx_desc *tx_desc, *eop_desc;
-	struct ixgbe_tx_buffer *tx_buffer_info;
+	struct ixgbe_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
 	unsigned int total_bytes = 0, total_packets = 0;
-	u16 i, eop, count = 0;
+	u16 i = tx_ring->next_to_clean;
+	u16 count;
 
-	i = tx_ring->next_to_clean;
-	eop = tx_ring->tx_buffer_info[i].next_to_watch;
-	eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i);
 
-	while ((eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)) &&
-	       (count < q_vector->tx.work_limit)) {
-		bool cleaned = false;
-		rmb(); /* read buffer_info after eop_desc */
-		for ( ; !cleaned; count++) {
-			tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i);
-			tx_buffer_info = &tx_ring->tx_buffer_info[i];
+	for (count = 0; count < q_vector->tx.work_limit; count++) {
+		union ixgbe_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
 
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD)))
+			break;
+
+		/* count the packet as being completed */
+		tx_ring->tx_stats.completed++;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* prevent any other reads prior to eop_desc being verified */
+		rmb();
+
+		do {
+			ixgbe_unmap_tx_resource(tx_ring, tx_buffer);
 			tx_desc->wb.status = 0;
-			cleaned = (i == eop);
+			if (likely(tx_desc == eop_desc)) {
+				eop_desc = NULL;
+				dev_kfree_skb_any(tx_buffer->skb);
+				tx_buffer->skb = NULL;
 
-			i++;
-			if (i == tx_ring->count)
-				i = 0;
-
-			if (cleaned && tx_buffer_info->skb) {
-				total_bytes += tx_buffer_info->bytecount;
-				total_packets += tx_buffer_info->gso_segs;
+				total_bytes += tx_buffer->bytecount;
+				total_packets += tx_buffer->gso_segs;
 			}
 
-			ixgbe_unmap_and_free_tx_resource(tx_ring,
-							 tx_buffer_info);
-		}
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
 
-		tx_ring->tx_stats.completed++;
-		eop = tx_ring->tx_buffer_info[i].next_to_watch;
-		eop_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = IXGBE_TX_DESC_ADV(tx_ring, 0);
+			}
+
+		} while (eop_desc);
 	}
 
 	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
 	tx_ring->stats.bytes += total_bytes;
 	tx_ring->stats.packets += total_packets;
-	u64_stats_update_begin(&tx_ring->syncp);
+	u64_stats_update_end(&tx_ring->syncp);
 	q_vector->tx.total_bytes += total_bytes;
 	q_vector->tx.total_packets += total_packets;
-	u64_stats_update_end(&tx_ring->syncp);
 
 	if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
 		/* schedule immediate reset if we believe we hung */
 		struct ixgbe_hw *hw = &adapter->hw;
-		tx_desc = IXGBE_TX_DESC_ADV(tx_ring, eop);
+		tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i);
 		e_err(drv, "Detected Tx Unit Hang\n"
 			"  Tx Queue             <%d>\n"
 			"  TDH, TDT             <%x>, <%x>\n"
@@ -858,8 +878,8 @@
 			tx_ring->queue_index,
 			IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
 			IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
-			tx_ring->next_to_use, eop,
-			tx_ring->tx_buffer_info[eop].time_stamp, jiffies);
+			tx_ring->next_to_use, i,
+			tx_ring->tx_buffer_info[i].time_stamp, jiffies);
 
 		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
 
@@ -3597,7 +3617,7 @@
 
 	/* reconfigure the hardware */
 	if (adapter->dcbx_cap & DCB_CAP_DCBX_VER_CEE) {
-#ifdef CONFIG_FCOE
+#ifdef IXGBE_FCOE
 		if (adapter->netdev->features & NETIF_F_FCOE_MTU)
 			max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE);
 #endif
@@ -6351,7 +6371,7 @@
 	u32 type_tucmd = 0;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL) {
-	    if (!(tx_flags & IXGBE_TX_FLAGS_VLAN))
+	    if (!(tx_flags & IXGBE_TX_FLAGS_HW_VLAN))
 			return false;
 	} else {
 		u8 l4_hdr = 0;
@@ -6408,90 +6428,161 @@
 	return (skb->ip_summed == CHECKSUM_PARTIAL);
 }
 
-static int ixgbe_tx_map(struct ixgbe_adapter *adapter,
-			struct ixgbe_ring *tx_ring,
-			struct sk_buff *skb, u32 tx_flags,
-			unsigned int first, const u8 hdr_len)
+static __le32 ixgbe_tx_cmd_type(u32 tx_flags)
+{
+	/* set type for advanced descriptor with frame checksum insertion */
+	__le32 cmd_type = cpu_to_le32(IXGBE_ADVTXD_DTYP_DATA |
+				      IXGBE_ADVTXD_DCMD_IFCS |
+				      IXGBE_ADVTXD_DCMD_DEXT);
+
+	/* set HW vlan bit if vlan is present */
+	if (tx_flags & IXGBE_TX_FLAGS_HW_VLAN)
+		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_VLE);
+
+	/* set segmentation enable bits for TSO/FSO */
+#ifdef IXGBE_FCOE
+	if ((tx_flags & IXGBE_TX_FLAGS_TSO) || (tx_flags & IXGBE_TX_FLAGS_FSO))
+#else
+	if (tx_flags & IXGBE_TX_FLAGS_TSO)
+#endif
+		cmd_type |= cpu_to_le32(IXGBE_ADVTXD_DCMD_TSE);
+
+	return cmd_type;
+}
+
+static __le32 ixgbe_tx_olinfo_status(u32 tx_flags, unsigned int paylen)
+{
+	__le32 olinfo_status =
+		cpu_to_le32(paylen << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+	if (tx_flags & IXGBE_TX_FLAGS_TSO) {
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM |
+					    (1 << IXGBE_ADVTXD_IDX_SHIFT));
+		/* enble IPv4 checksum for TSO */
+		if (tx_flags & IXGBE_TX_FLAGS_IPV4)
+			olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_IXSM);
+	}
+
+	/* enable L4 checksum for TSO and TX checksum offload */
+	if (tx_flags & IXGBE_TX_FLAGS_CSUM)
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_POPTS_TXSM);
+
+#ifdef IXGBE_FCOE
+	/* use index 1 context for FCOE/FSO */
+	if (tx_flags & IXGBE_TX_FLAGS_FCOE)
+		olinfo_status |= cpu_to_le32(IXGBE_ADVTXD_CC |
+					    (1 << IXGBE_ADVTXD_IDX_SHIFT));
+
+#endif
+	return olinfo_status;
+}
+
+#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \
+		       IXGBE_TXD_CMD_RS)
+
+static void ixgbe_tx_map(struct ixgbe_ring *tx_ring,
+			 struct sk_buff *skb,
+			 struct ixgbe_tx_buffer *first,
+			 u32 tx_flags,
+			 const u8 hdr_len)
 {
 	struct device *dev = tx_ring->dev;
 	struct ixgbe_tx_buffer *tx_buffer_info;
-	unsigned int len;
-	unsigned int total = skb->len;
-	unsigned int offset = 0, size, count = 0;
-	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
-	unsigned int f;
-	unsigned int bytecount = skb->len;
-	u16 gso_segs = 1;
-	u16 i;
+	union ixgbe_adv_tx_desc *tx_desc;
+	dma_addr_t dma;
+	__le32 cmd_type, olinfo_status;
+	struct skb_frag_struct *frag;
+	unsigned int f = 0;
+	unsigned int data_len = skb->data_len;
+	unsigned int size = skb_headlen(skb);
+	u32 offset = 0;
+	u32 paylen = skb->len - hdr_len;
+	u16 i = tx_ring->next_to_use;
+	u16 gso_segs;
 
-	i = tx_ring->next_to_use;
-
-	if (tx_flags & IXGBE_TX_FLAGS_FCOE)
-		/* excluding fcoe_crc_eof for FCoE */
-		total -= sizeof(struct fcoe_crc_eof);
-
-	len = min(skb_headlen(skb), total);
-	while (len) {
-		tx_buffer_info = &tx_ring->tx_buffer_info[i];
-		size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
-
-		tx_buffer_info->length = size;
-		tx_buffer_info->mapped_as_page = false;
-		tx_buffer_info->dma = dma_map_single(dev,
-						     skb->data + offset,
-						     size, DMA_TO_DEVICE);
-		if (dma_mapping_error(dev, tx_buffer_info->dma))
-			goto dma_error;
-		tx_buffer_info->time_stamp = jiffies;
-		tx_buffer_info->next_to_watch = i;
-
-		len -= size;
-		total -= size;
-		offset += size;
-		count++;
-
-		if (len) {
-			i++;
-			if (i == tx_ring->count)
-				i = 0;
+#ifdef IXGBE_FCOE
+	if (tx_flags & IXGBE_TX_FLAGS_FCOE) {
+		if (data_len >= sizeof(struct fcoe_crc_eof)) {
+			data_len -= sizeof(struct fcoe_crc_eof);
+		} else {
+			size -= sizeof(struct fcoe_crc_eof) - data_len;
+			data_len = 0;
 		}
 	}
 
-	for (f = 0; f < nr_frags; f++) {
-		struct skb_frag_struct *frag;
+#endif
+	dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, dma))
+		goto dma_error;
+
+	cmd_type = ixgbe_tx_cmd_type(tx_flags);
+	olinfo_status = ixgbe_tx_olinfo_status(tx_flags, paylen);
+
+	tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i);
+
+	for (;;) {
+		while (size > IXGBE_MAX_DATA_PER_TXD) {
+			tx_desc->read.buffer_addr = cpu_to_le64(dma + offset);
+			tx_desc->read.cmd_type_len =
+				cmd_type | cpu_to_le32(IXGBE_MAX_DATA_PER_TXD);
+			tx_desc->read.olinfo_status = olinfo_status;
+
+			offset += IXGBE_MAX_DATA_PER_TXD;
+			size -= IXGBE_MAX_DATA_PER_TXD;
+
+			tx_desc++;
+			i++;
+			if (i == tx_ring->count) {
+				tx_desc = IXGBE_TX_DESC_ADV(tx_ring, 0);
+				i = 0;
+			}
+		}
+
+		tx_buffer_info = &tx_ring->tx_buffer_info[i];
+		tx_buffer_info->length = offset + size;
+		tx_buffer_info->tx_flags = tx_flags;
+		tx_buffer_info->dma = dma;
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma + offset);
+		tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
+		tx_desc->read.olinfo_status = olinfo_status;
+
+		if (!data_len)
+			break;
 
 		frag = &skb_shinfo(skb)->frags[f];
-		len = min((unsigned int)frag->size, total);
-		offset = frag->page_offset;
+#ifdef IXGBE_FCOE
+		size = min_t(unsigned int, data_len, frag->size);
+#else
+		size = frag->size;
+#endif
+		data_len -= size;
+		f++;
 
-		while (len) {
-			i++;
-			if (i == tx_ring->count)
-				i = 0;
+		offset = 0;
+		tx_flags |= IXGBE_TX_FLAGS_MAPPED_AS_PAGE;
 
-			tx_buffer_info = &tx_ring->tx_buffer_info[i];
-			size = min(len, (uint)IXGBE_MAX_DATA_PER_TXD);
+		dma = dma_map_page(dev, frag->page, frag->page_offset,
+				   size, DMA_TO_DEVICE);
+		if (dma_mapping_error(dev, dma))
+			goto dma_error;
 
-			tx_buffer_info->length = size;
-			tx_buffer_info->dma = dma_map_page(dev,
-							   frag->page,
-							   offset, size,
-							   DMA_TO_DEVICE);
-			tx_buffer_info->mapped_as_page = true;
-			if (dma_mapping_error(dev, tx_buffer_info->dma))
-				goto dma_error;
-			tx_buffer_info->time_stamp = jiffies;
-			tx_buffer_info->next_to_watch = i;
-
-			len -= size;
-			total -= size;
-			offset += size;
-			count++;
+		tx_desc++;
+		i++;
+		if (i == tx_ring->count) {
+			tx_desc = IXGBE_TX_DESC_ADV(tx_ring, 0);
+			i = 0;
 		}
-		if (total == 0)
-			break;
 	}
 
+	tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD);
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
 	if (tx_flags & IXGBE_TX_FLAGS_TSO)
 		gso_segs = skb_shinfo(skb)->gso_segs;
 #ifdef IXGBE_FCOE
@@ -6500,93 +6591,16 @@
 		gso_segs = DIV_ROUND_UP(skb->len - hdr_len,
 					skb_shinfo(skb)->gso_size);
 #endif /* IXGBE_FCOE */
-	bytecount += (gso_segs - 1) * hdr_len;
+	else
+		gso_segs = 1;
 
 	/* multiply data chunks by size of headers */
-	tx_ring->tx_buffer_info[i].bytecount = bytecount;
-	tx_ring->tx_buffer_info[i].gso_segs = gso_segs;
-	tx_ring->tx_buffer_info[i].skb = skb;
-	tx_ring->tx_buffer_info[first].next_to_watch = i;
+	tx_buffer_info->bytecount = paylen + (gso_segs * hdr_len);
+	tx_buffer_info->gso_segs = gso_segs;
+	tx_buffer_info->skb = skb;
 
-	return count;
-
-dma_error:
-	e_dev_err("TX DMA map failed\n");
-
-	/* clear timestamp and dma mappings for failed tx_buffer_info map */
-	tx_buffer_info->dma = 0;
-	tx_buffer_info->time_stamp = 0;
-	tx_buffer_info->next_to_watch = 0;
-	if (count)
-		count--;
-
-	/* clear timestamp and dma mappings for remaining portion of packet */
-	while (count--) {
-		if (i == 0)
-			i += tx_ring->count;
-		i--;
-		tx_buffer_info = &tx_ring->tx_buffer_info[i];
-		ixgbe_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
-	}
-
-	return 0;
-}
-
-static void ixgbe_tx_queue(struct ixgbe_ring *tx_ring,
-			   int tx_flags, int count, u32 paylen, u8 hdr_len)
-{
-	union ixgbe_adv_tx_desc *tx_desc = NULL;
-	struct ixgbe_tx_buffer *tx_buffer_info;
-	u32 olinfo_status = 0, cmd_type_len = 0;
-	unsigned int i;
-	u32 txd_cmd = IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS | IXGBE_TXD_CMD_IFCS;
-
-	cmd_type_len |= IXGBE_ADVTXD_DTYP_DATA;
-
-	cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT;
-
-	if (tx_flags & IXGBE_TX_FLAGS_VLAN)
-		cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
-
-	if (tx_flags & IXGBE_TX_FLAGS_TSO) {
-		cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
-
-		olinfo_status |= IXGBE_TXD_POPTS_TXSM <<
-				 IXGBE_ADVTXD_POPTS_SHIFT;
-
-		/* use index 1 context for tso */
-		olinfo_status |= (1 << IXGBE_ADVTXD_IDX_SHIFT);
-		if (tx_flags & IXGBE_TX_FLAGS_IPV4)
-			olinfo_status |= IXGBE_TXD_POPTS_IXSM <<
-					 IXGBE_ADVTXD_POPTS_SHIFT;
-
-	} else if (tx_flags & IXGBE_TX_FLAGS_CSUM)
-		olinfo_status |= IXGBE_TXD_POPTS_TXSM <<
-				 IXGBE_ADVTXD_POPTS_SHIFT;
-
-	if (tx_flags & IXGBE_TX_FLAGS_FCOE) {
-		olinfo_status |= IXGBE_ADVTXD_CC;
-		olinfo_status |= (1 << IXGBE_ADVTXD_IDX_SHIFT);
-		if (tx_flags & IXGBE_TX_FLAGS_FSO)
-			cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
-	}
-
-	olinfo_status |= ((paylen - hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT);
-
-	i = tx_ring->next_to_use;
-	while (count--) {
-		tx_buffer_info = &tx_ring->tx_buffer_info[i];
-		tx_desc = IXGBE_TX_DESC_ADV(tx_ring, i);
-		tx_desc->read.buffer_addr = cpu_to_le64(tx_buffer_info->dma);
-		tx_desc->read.cmd_type_len =
-			cpu_to_le32(cmd_type_len | tx_buffer_info->length);
-		tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
-		i++;
-		if (i == tx_ring->count)
-			i = 0;
-	}
-
-	tx_desc->read.cmd_type_len |= cpu_to_le32(txd_cmd);
+	/* set the timestamp */
+	first->time_stamp = jiffies;
 
 	/*
 	 * Force memory writes to complete before letting h/w
@@ -6596,8 +6610,30 @@
 	 */
 	wmb();
 
-	tx_ring->next_to_use = i;
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	/* notify HW of packet */
 	writel(i, tx_ring->tail);
+
+	return;
+dma_error:
+	dev_err(dev, "TX DMA map failed\n");
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	for (;;) {
+		tx_buffer_info = &tx_ring->tx_buffer_info[i];
+		ixgbe_unmap_tx_resource(tx_ring, tx_buffer_info);
+		if (tx_buffer_info == first)
+			break;
+		if (i == 0)
+			i = tx_ring->count;
+		i--;
+	}
+
+	dev_kfree_skb_any(skb);
+
+	tx_ring->next_to_use = i;
 }
 
 static void ixgbe_atr(struct ixgbe_ring *ring, struct sk_buff *skb,
@@ -6636,8 +6672,8 @@
 
 	th = tcp_hdr(skb);
 
-	/* skip this packet since the socket is closing */
-	if (th->fin)
+	/* skip this packet since it is invalid or the socket is closing */
+	if (!th || th->fin)
 		return;
 
 	/* sample on all syn packets or once every atr sample count */
@@ -6662,7 +6698,7 @@
 	 * since src port and flex bytes occupy the same word XOR them together
 	 * and write the value to source port portion of compressed dword
 	 */
-	if (vlan_id)
+	if (tx_flags & (IXGBE_TX_FLAGS_SW_VLAN | IXGBE_TX_FLAGS_HW_VLAN))
 		common.port.src ^= th->dest ^ __constant_htons(ETH_P_8021Q);
 	else
 		common.port.src ^= th->dest ^ protocol;
@@ -6744,14 +6780,14 @@
 			  struct ixgbe_adapter *adapter,
 			  struct ixgbe_ring *tx_ring)
 {
+	struct ixgbe_tx_buffer *first;
 	int tso;
-	u32  tx_flags = 0;
+	u32 tx_flags = 0;
 #if PAGE_SIZE > IXGBE_MAX_DATA_PER_TXD
 	unsigned short f;
 #endif
-	u16 first;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
-	__be16 protocol;
+	__be16 protocol = skb->protocol;
 	u8 hdr_len = 0;
 
 	/*
@@ -6772,68 +6808,82 @@
 		return NETDEV_TX_BUSY;
 	}
 
-	protocol = vlan_get_protocol(skb);
-
+	/* if we have a HW VLAN tag being added default to the HW one */
 	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb);
-		if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) {
-			tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
-			tx_flags |= tx_ring->dcb_tc << 13;
-		}
-		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
-		tx_flags |= IXGBE_TX_FLAGS_VLAN;
-	} else if (adapter->flags & IXGBE_FLAG_DCB_ENABLED &&
-		   skb->priority != TC_PRIO_CONTROL) {
-		tx_flags |= tx_ring->dcb_tc << 13;
-		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
-		tx_flags |= IXGBE_TX_FLAGS_VLAN;
+		tx_flags |= vlan_tx_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
+	/* else if it is a SW VLAN check the next protocol and store the tag */
+	} else if (protocol == __constant_htons(ETH_P_8021Q)) {
+		struct vlan_hdr *vhdr, _vhdr;
+		vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
+		if (!vhdr)
+			goto out_drop;
+
+		protocol = vhdr->h_vlan_encapsulated_proto;
+		tx_flags |= ntohs(vhdr->h_vlan_TCI) << IXGBE_TX_FLAGS_VLAN_SHIFT;
+		tx_flags |= IXGBE_TX_FLAGS_SW_VLAN;
 	}
 
-#ifdef IXGBE_FCOE
-	/* for FCoE with DCB, we force the priority to what
-	 * was specified by the switch */
-	if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED &&
-	    (protocol == htons(ETH_P_FCOE)))
-		tx_flags |= IXGBE_TX_FLAGS_FCOE;
+	if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) &&
+	    skb->priority != TC_PRIO_CONTROL) {
+		tx_flags &= ~IXGBE_TX_FLAGS_VLAN_PRIO_MASK;
+		tx_flags |= tx_ring->dcb_tc <<
+			    IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT;
+		if (tx_flags & IXGBE_TX_FLAGS_SW_VLAN) {
+			struct vlan_ethhdr *vhdr;
+			if (skb_header_cloned(skb) &&
+			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+				goto out_drop;
+			vhdr = (struct vlan_ethhdr *)skb->data;
+			vhdr->h_vlan_TCI = htons(tx_flags >>
+						 IXGBE_TX_FLAGS_VLAN_SHIFT);
+		} else {
+			tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
+		}
+	}
 
-#endif
 	/* record the location of the first descriptor for this packet */
-	first = tx_ring->next_to_use;
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
 
-	if (tx_flags & IXGBE_TX_FLAGS_FCOE) {
 #ifdef IXGBE_FCOE
-		/* setup tx offload for FCoE */
+	/* setup tx offload for FCoE */
+	if ((protocol == __constant_htons(ETH_P_FCOE)) &&
+	    (adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) {
 		tso = ixgbe_fso(tx_ring, skb, tx_flags, &hdr_len);
 		if (tso < 0)
 			goto out_drop;
 		else if (tso)
-			tx_flags |= IXGBE_TX_FLAGS_FSO;
+			tx_flags |= IXGBE_TX_FLAGS_FSO |
+				    IXGBE_TX_FLAGS_FCOE;
+		else
+			tx_flags |= IXGBE_TX_FLAGS_FCOE;
+
+		goto xmit_fcoe;
+	}
+
 #endif /* IXGBE_FCOE */
-	} else {
-		if (protocol == htons(ETH_P_IP))
-			tx_flags |= IXGBE_TX_FLAGS_IPV4;
-		tso = ixgbe_tso(tx_ring, skb, tx_flags, protocol, &hdr_len);
-		if (tso < 0)
-			goto out_drop;
-		else if (tso)
-			tx_flags |= IXGBE_TX_FLAGS_TSO;
-		else if (ixgbe_tx_csum(tx_ring, skb, tx_flags, protocol))
-			tx_flags |= IXGBE_TX_FLAGS_CSUM;
-	}
+	/* setup IPv4/IPv6 offloads */
+	if (protocol == __constant_htons(ETH_P_IP))
+		tx_flags |= IXGBE_TX_FLAGS_IPV4;
 
-	count = ixgbe_tx_map(adapter, tx_ring, skb, tx_flags, first, hdr_len);
-	if (count) {
-		/* add the ATR filter if ATR is on */
-		if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
-			ixgbe_atr(tx_ring, skb, tx_flags, protocol);
-		ixgbe_tx_queue(tx_ring, tx_flags, count, skb->len, hdr_len);
-		ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
-
-	} else {
-		tx_ring->tx_buffer_info[first].time_stamp = 0;
-		tx_ring->next_to_use = first;
+	tso = ixgbe_tso(tx_ring, skb, tx_flags, protocol, &hdr_len);
+	if (tso < 0)
 		goto out_drop;
-	}
+	else if (tso)
+		tx_flags |= IXGBE_TX_FLAGS_TSO;
+	else if (ixgbe_tx_csum(tx_ring, skb, tx_flags, protocol))
+		tx_flags |= IXGBE_TX_FLAGS_CSUM;
+
+	/* add the ATR filter if ATR is on */
+	if (test_bit(__IXGBE_TX_FDIR_INIT_DONE, &tx_ring->state))
+		ixgbe_atr(tx_ring, skb, tx_flags, protocol);
+
+#ifdef IXGBE_FCOE
+xmit_fcoe:
+#endif /* IXGBE_FCOE */
+	ixgbe_tx_map(tx_ring, skb, first, tx_flags, hdr_len);
+
+	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	return NETDEV_TX_OK;