Blame - drivers/net/sfc/tx.c - kernel/msm-4.9

blob: 75eb0fd5fd2b7e97b0a79e2621eba72b784b7de4 [file] [log] [blame]

Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	1	/****************************************************************************
				2	* Driver for Solarflare Solarstorm network controllers and boards
				3	* Copyright 2005-2006 Fen Systems Ltd.
				4	* Copyright 2005-2008 Solarflare Communications Inc.
				5	*
				6	* This program is free software; you can redistribute it and/or modify it
				7	* under the terms of the GNU General Public License version 2 as published
				8	* by the Free Software Foundation, incorporated herein by reference.
				9	*/
				10
				11	#include <linux/pci.h>
				12	#include <linux/tcp.h>
				13	#include <linux/ip.h>
				14	#include <linux/in.h>
				15	#include <linux/if_ether.h>
				16	#include <linux/highmem.h>
				17	#include "net_driver.h"
				18	#include "tx.h"
				19	#include "efx.h"
				20	#include "falcon.h"
				21	#include "workarounds.h"
				22
				23	/*
				24	* TX descriptor ring full threshold
				25	*
				26	* The tx_queue descriptor ring fill-level must fall below this value
				27	* before we restart the netif queue
				28	*/
				29	#define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \
				30	(_tx_queue->efx->type->txd_ring_mask / 2u)
				31
				32	/* We want to be able to nest calls to netif_stop_queue(), since each
				33	* channel can have an individual stop on the queue.
				34	*/
				35	void efx_stop_queue(struct efx_nic *efx)
				36	{
				37	spin_lock_bh(&efx->netif_stop_lock);
				38	EFX_TRACE(efx, "stop TX queue\n");
				39
				40	atomic_inc(&efx->netif_stop_count);
				41	netif_stop_queue(efx->net_dev);
				42
				43	spin_unlock_bh(&efx->netif_stop_lock);
				44	}
				45
				46	/* Wake netif's TX queue
				47	* We want to be able to nest calls to netif_stop_queue(), since each
				48	* channel can have an individual stop on the queue.
				49	*/
				50	inline void efx_wake_queue(struct efx_nic *efx)
				51	{
				52	local_bh_disable();
				53	if (atomic_dec_and_lock(&efx->netif_stop_count,
				54	&efx->netif_stop_lock)) {
				55	EFX_TRACE(efx, "waking TX queue\n");
				56	netif_wake_queue(efx->net_dev);
				57	spin_unlock(&efx->netif_stop_lock);
				58	}
				59	local_bh_enable();
				60	}
				61
				62	static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
				63	struct efx_tx_buffer *buffer)
				64	{
				65	if (buffer->unmap_len) {
				66	struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
				67	if (buffer->unmap_single)
				68	pci_unmap_single(pci_dev, buffer->unmap_addr,
				69	buffer->unmap_len, PCI_DMA_TODEVICE);
				70	else
				71	pci_unmap_page(pci_dev, buffer->unmap_addr,
				72	buffer->unmap_len, PCI_DMA_TODEVICE);
				73	buffer->unmap_len = 0;
				74	buffer->unmap_single = 0;
				75	}
				76
				77	if (buffer->skb) {
				78	dev_kfree_skb_any((struct sk_buff *) buffer->skb);
				79	buffer->skb = NULL;
				80	EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
				81	"complete\n", tx_queue->queue, read_ptr);
				82	}
				83	}
				84
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	85	/**
				86	* struct efx_tso_header - a DMA mapped buffer for packet headers
				87	* @next: Linked list of free ones.
				88	* The list is protected by the TX queue lock.
				89	* @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
				90	* @dma_addr: The DMA address of the header below.
				91	*
				92	* This controls the memory used for a TSO header. Use TSOH_DATA()
				93	* to find the packet header data. Use TSOH_SIZE() to calculate the
				94	* total size required for a given packet header length. TSO headers
				95	* in the free list are exactly %TSOH_STD_SIZE bytes in size.
				96	*/
				97	struct efx_tso_header {
				98	union {
				99	struct efx_tso_header *next;
				100	size_t unmap_len;
				101	};
				102	dma_addr_t dma_addr;
				103	};
				104
				105	static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
				106	const struct sk_buff *skb);
				107	static void efx_fini_tso(struct efx_tx_queue *tx_queue);
				108	static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
				109	struct efx_tso_header *tsoh);
				110
				111	static inline void efx_tsoh_free(struct efx_tx_queue *tx_queue,
				112	struct efx_tx_buffer *buffer)
				113	{
				114	if (buffer->tsoh) {
				115	if (likely(!buffer->tsoh->unmap_len)) {
				116	buffer->tsoh->next = tx_queue->tso_headers_free;
				117	tx_queue->tso_headers_free = buffer->tsoh;
				118	} else {
				119	efx_tsoh_heap_free(tx_queue, buffer->tsoh);
				120	}
				121	buffer->tsoh = NULL;
				122	}
				123	}
				124
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	125
				126	/*
				127	* Add a socket buffer to a TX queue
				128	*
				129	* This maps all fragments of a socket buffer for DMA and adds them to
				130	* the TX queue. The queue's insert pointer will be incremented by
				131	* the number of fragments in the socket buffer.
				132	*
				133	* If any DMA mapping fails, any mapped fragments will be unmapped,
				134	* the queue's insert pointer will be restored to its original value.
				135	*
				136	* Returns NETDEV_TX_OK or NETDEV_TX_BUSY
				137	* You must hold netif_tx_lock() to call this function.
				138	*/
				139	static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
				140	const struct sk_buff *skb)
				141	{
				142	struct efx_nic *efx = tx_queue->efx;
				143	struct pci_dev *pci_dev = efx->pci_dev;
				144	struct efx_tx_buffer *buffer;
				145	skb_frag_t *fragment;
				146	struct page *page;
				147	int page_offset;
				148	unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
				149	dma_addr_t dma_addr, unmap_addr = 0;
				150	unsigned int dma_len;
				151	unsigned unmap_single;
				152	int q_space, i = 0;
				153	int rc = NETDEV_TX_OK;
				154
				155	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
				156
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	157	if (skb_shinfo((struct sk_buff *)skb)->gso_size)
				158	return efx_enqueue_skb_tso(tx_queue, skb);
				159
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	160	/* Get size of the initial fragment */
				161	len = skb_headlen(skb);
				162
				163	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
				164	q_space = efx->type->txd_ring_mask - 1 - fill_level;
				165
				166	/* Map for DMA. Use pci_map_single rather than pci_map_page
				167	* since this is more efficient on machines with sparse
				168	* memory.
				169	*/
				170	unmap_single = 1;
				171	dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
				172
				173	/* Process all fragments */
				174	while (1) {
				175	if (unlikely(pci_dma_mapping_error(dma_addr)))
				176	goto pci_err;
				177
				178	/* Store fields for marking in the per-fragment final
				179	* descriptor */
				180	unmap_len = len;
				181	unmap_addr = dma_addr;
				182
				183	/* Add to TX queue, splitting across DMA boundaries */
				184	do {
				185	if (unlikely(q_space-- <= 0)) {
				186	/* It might be that completions have
				187	* happened since the xmit path last
				188	* checked. Update the xmit path's
				189	* copy of read_count.
				190	*/
				191	++tx_queue->stopped;
				192	/* This memory barrier protects the
				193	* change of stopped from the access
				194	* of read_count. */
				195	smp_mb();
				196	tx_queue->old_read_count =
				197	(volatile unsigned )
				198	&tx_queue->read_count;
				199	fill_level = (tx_queue->insert_count
				200	- tx_queue->old_read_count);
				201	q_space = (efx->type->txd_ring_mask - 1 -
				202	fill_level);
				203	if (unlikely(q_space-- <= 0))
				204	goto stop;
				205	smp_mb();
				206	--tx_queue->stopped;
				207	}
				208
				209	insert_ptr = (tx_queue->insert_count &
				210	efx->type->txd_ring_mask);
				211	buffer = &tx_queue->buffer[insert_ptr];
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	212	efx_tsoh_free(tx_queue, buffer);
				213	EFX_BUG_ON_PARANOID(buffer->tsoh);
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	214	EFX_BUG_ON_PARANOID(buffer->skb);
				215	EFX_BUG_ON_PARANOID(buffer->len);
				216	EFX_BUG_ON_PARANOID(buffer->continuation != 1);
				217	EFX_BUG_ON_PARANOID(buffer->unmap_len);
				218
				219	dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
				220	if (likely(dma_len > len))
				221	dma_len = len;
				222
				223	misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
				224	if (misalign && dma_len + misalign > 512)
				225	dma_len = 512 - misalign;
				226
				227	/* Fill out per descriptor fields */
				228	buffer->len = dma_len;
				229	buffer->dma_addr = dma_addr;
				230	len -= dma_len;
				231	dma_addr += dma_len;
				232	++tx_queue->insert_count;
				233	} while (len);
				234
				235	/* Transfer ownership of the unmapping to the final buffer */
				236	buffer->unmap_addr = unmap_addr;
				237	buffer->unmap_single = unmap_single;
				238	buffer->unmap_len = unmap_len;
				239	unmap_len = 0;
				240
				241	/* Get address and size of next fragment */
				242	if (i >= skb_shinfo(skb)->nr_frags)
				243	break;
				244	fragment = &skb_shinfo(skb)->frags[i];
				245	len = fragment->size;
				246	page = fragment->page;
				247	page_offset = fragment->page_offset;
				248	i++;
				249	/* Map for DMA */
				250	unmap_single = 0;
				251	dma_addr = pci_map_page(pci_dev, page, page_offset, len,
				252	PCI_DMA_TODEVICE);
				253	}
				254
				255	/* Transfer ownership of the skb to the final buffer */
				256	buffer->skb = skb;
				257	buffer->continuation = 0;
				258
				259	/* Pass off to hardware */
				260	falcon_push_buffers(tx_queue);
				261
				262	return NETDEV_TX_OK;
				263
				264	pci_err:
				265	EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
				266	"fragments for DMA\n", tx_queue->queue, skb->len,
				267	skb_shinfo(skb)->nr_frags + 1);
				268
				269	/* Mark the packet as transmitted, and free the SKB ourselves */
				270	dev_kfree_skb_any((struct sk_buff *)skb);
				271	goto unwind;
				272
				273	stop:
				274	rc = NETDEV_TX_BUSY;
				275
				276	if (tx_queue->stopped == 1)
				277	efx_stop_queue(efx);
				278
				279	unwind:
				280	/* Work backwards until we hit the original insert pointer value */
				281	while (tx_queue->insert_count != tx_queue->write_count) {
				282	--tx_queue->insert_count;
				283	insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
				284	buffer = &tx_queue->buffer[insert_ptr];
				285	efx_dequeue_buffer(tx_queue, buffer);
				286	buffer->len = 0;
				287	}
				288
				289	/* Free the fragment we were mid-way through pushing */
				290	if (unmap_len)
				291	pci_unmap_page(pci_dev, unmap_addr, unmap_len,
				292	PCI_DMA_TODEVICE);
				293
				294	return rc;
				295	}
				296
				297	/* Remove packets from the TX queue
				298	*
				299	* This removes packets from the TX queue, up to and including the
				300	* specified index.
				301	*/
				302	static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
				303	unsigned int index)
				304	{
				305	struct efx_nic *efx = tx_queue->efx;
				306	unsigned int stop_index, read_ptr;
				307	unsigned int mask = tx_queue->efx->type->txd_ring_mask;
				308
				309	stop_index = (index + 1) & mask;
				310	read_ptr = tx_queue->read_count & mask;
				311
				312	while (read_ptr != stop_index) {
				313	struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
				314	if (unlikely(buffer->len == 0)) {
				315	EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
				316	"completion id %x\n", tx_queue->queue,
				317	read_ptr);
				318	efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
				319	return;
				320	}
				321
				322	efx_dequeue_buffer(tx_queue, buffer);
				323	buffer->continuation = 1;
				324	buffer->len = 0;
				325
				326	++tx_queue->read_count;
				327	read_ptr = tx_queue->read_count & mask;
				328	}
				329	}
				330
				331	/* Initiate a packet transmission on the specified TX queue.
				332	* Note that returning anything other than NETDEV_TX_OK will cause the
				333	* OS to free the skb.
				334	*
				335	* This function is split out from efx_hard_start_xmit to allow the
				336	* loopback test to direct packets via specific TX queues. It is
				337	* therefore a non-static inline, so as not to penalise performance
				338	* for non-loopback transmissions.
				339	*
				340	* Context: netif_tx_lock held
				341	*/
				342	inline int efx_xmit(struct efx_nic *efx,
				343	struct efx_tx_queue tx_queue, struct sk_buff skb)
				344	{
				345	int rc;
				346
				347	/* Map fragments for DMA and add to TX queue */
				348	rc = efx_enqueue_skb(tx_queue, skb);
				349	if (unlikely(rc != NETDEV_TX_OK))
				350	goto out;
				351
				352	/* Update last TX timer */
				353	efx->net_dev->trans_start = jiffies;
				354
				355	out:
				356	return rc;
				357	}
				358
				359	/* Initiate a packet transmission. We use one channel per CPU
				360	* (sharing when we have more CPUs than channels). On Falcon, the TX
				361	* completion events will be directed back to the CPU that transmitted
				362	* the packet, which should be cache-efficient.
				363	*
				364	* Context: non-blocking.
				365	* Note that returning anything other than NETDEV_TX_OK will cause the
				366	* OS to free the skb.
				367	*/
				368	int efx_hard_start_xmit(struct sk_buff skb, struct net_device net_dev)
				369	{
				370	struct efx_nic *efx = net_dev->priv;
				371	return efx_xmit(efx, &efx->tx_queue[0], skb);
				372	}
				373
				374	void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
				375	{
				376	unsigned fill_level;
				377	struct efx_nic *efx = tx_queue->efx;
				378
				379	EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask);
				380
				381	efx_dequeue_buffers(tx_queue, index);
				382
				383	/* See if we need to restart the netif queue. This barrier
				384	* separates the update of read_count from the test of
				385	* stopped. */
				386	smp_mb();
				387	if (unlikely(tx_queue->stopped)) {
				388	fill_level = tx_queue->insert_count - tx_queue->read_count;
				389	if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) {
				390	EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx));
				391
				392	/* Do this under netif_tx_lock(), to avoid racing
				393	* with efx_xmit(). */
				394	netif_tx_lock(efx->net_dev);
				395	if (tx_queue->stopped) {
				396	tx_queue->stopped = 0;
				397	efx_wake_queue(efx);
				398	}
				399	netif_tx_unlock(efx->net_dev);
				400	}
				401	}
				402	}
				403
				404	int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
				405	{
				406	struct efx_nic *efx = tx_queue->efx;
				407	unsigned int txq_size;
				408	int i, rc;
				409
				410	EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
				411
				412	/* Allocate software ring */
				413	txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer);
				414	tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
				415	if (!tx_queue->buffer) {
				416	rc = -ENOMEM;
				417	goto fail1;
				418	}
				419	for (i = 0; i <= efx->type->txd_ring_mask; ++i)
				420	tx_queue->buffer[i].continuation = 1;
				421
				422	/* Allocate hardware ring */
				423	rc = falcon_probe_tx(tx_queue);
				424	if (rc)
				425	goto fail2;
				426
				427	return 0;
				428
				429	fail2:
				430	kfree(tx_queue->buffer);
				431	tx_queue->buffer = NULL;
				432	fail1:
				433	tx_queue->used = 0;
				434
				435	return rc;
				436	}
				437
				438	int efx_init_tx_queue(struct efx_tx_queue *tx_queue)
				439	{
				440	EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
				441
				442	tx_queue->insert_count = 0;
				443	tx_queue->write_count = 0;
				444	tx_queue->read_count = 0;
				445	tx_queue->old_read_count = 0;
				446	BUG_ON(tx_queue->stopped);
				447
				448	/* Set up TX descriptor ring */
				449	return falcon_init_tx(tx_queue);
				450	}
				451
				452	void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
				453	{
				454	struct efx_tx_buffer *buffer;
				455
				456	if (!tx_queue->buffer)
				457	return;
				458
				459	/* Free any buffers left in the ring */
				460	while (tx_queue->read_count != tx_queue->write_count) {
				461	buffer = &tx_queue->buffer[tx_queue->read_count &
				462	tx_queue->efx->type->txd_ring_mask];
				463	efx_dequeue_buffer(tx_queue, buffer);
				464	buffer->continuation = 1;
				465	buffer->len = 0;
				466
				467	++tx_queue->read_count;
				468	}
				469	}
				470
				471	void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
				472	{
				473	EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
				474
				475	/* Flush TX queue, remove descriptor ring */
				476	falcon_fini_tx(tx_queue);
				477
				478	efx_release_tx_buffers(tx_queue);
				479
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	480	/* Free up TSO header cache */
				481	efx_fini_tso(tx_queue);
				482
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	483	/* Release queue's stop on port, if any */
				484	if (tx_queue->stopped) {
				485	tx_queue->stopped = 0;
				486	efx_wake_queue(tx_queue->efx);
				487	}
				488	}
				489
				490	void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
				491	{
				492	EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
				493	falcon_remove_tx(tx_queue);
				494
				495	kfree(tx_queue->buffer);
				496	tx_queue->buffer = NULL;
				497	tx_queue->used = 0;
				498	}
				499
				500
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	501	/* Efx TCP segmentation acceleration.
				502	*
				503	* Why? Because by doing it here in the driver we can go significantly
				504	* faster than the GSO.
				505	*
				506	* Requires TX checksum offload support.
				507	*/
				508
				509	/* Number of bytes inserted at the start of a TSO header buffer,
				510	* similar to NET_IP_ALIGN.
				511	*/
				512	#if defined(__i386__) \|\| defined(__x86_64__)
				513	#define TSOH_OFFSET 0
				514	#else
				515	#define TSOH_OFFSET NET_IP_ALIGN
				516	#endif
				517
				518	#define TSOH_BUFFER(tsoh) ((u8 *)(tsoh + 1) + TSOH_OFFSET)
				519
				520	/* Total size of struct efx_tso_header, buffer and padding */
				521	#define TSOH_SIZE(hdr_len) \
				522	(sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
				523
				524	/* Size of blocks on free list. Larger blocks must be allocated from
				525	* the heap.
				526	*/
				527	#define TSOH_STD_SIZE 128
				528
				529	#define PTR_DIFF(p1, p2) ((u8 )(p1) - (u8 )(p2))
				530	#define ETH_HDR_LEN(skb) (skb_network_header(skb) - (skb)->data)
				531	#define SKB_TCP_OFF(skb) PTR_DIFF(tcp_hdr(skb), (skb)->data)
				532	#define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
				533
				534	/**
				535	* struct tso_state - TSO state for an SKB
				536	* @remaining_len: Bytes of data we've yet to segment
				537	* @seqnum: Current sequence number
				538	* @packet_space: Remaining space in current packet
				539	* @ifc: Input fragment cursor.
				540	* Where we are in the current fragment of the incoming SKB. These
				541	* values get updated in place when we split a fragment over
				542	* multiple packets.
				543	* @p: Parameters.
				544	* These values are set once at the start of the TSO send and do
				545	* not get changed as the routine progresses.
				546	*
				547	* The state used during segmentation. It is put into this data structure
				548	* just to make it easy to pass into inline functions.
				549	*/
				550	struct tso_state {
				551	unsigned remaining_len;
				552	unsigned seqnum;
				553	unsigned packet_space;
				554
				555	struct {
				556	/* DMA address of current position */
				557	dma_addr_t dma_addr;
				558	/* Remaining length */
				559	unsigned int len;
				560	/* DMA address and length of the whole fragment */
				561	unsigned int unmap_len;
				562	dma_addr_t unmap_addr;
				563	struct page *page;
				564	unsigned page_off;
				565	} ifc;
				566
				567	struct {
				568	/* The number of bytes of header */
				569	unsigned int header_length;
				570
				571	/* The number of bytes to put in each outgoing segment. */
				572	int full_packet_size;
				573
				574	/* Current IPv4 ID, host endian. */
				575	unsigned ipv4_id;
				576	} p;
				577	};
				578
				579
				580	/*
				581	* Verify that our various assumptions about sk_buffs and the conditions
				582	* under which TSO will be attempted hold true.
				583	*/
				584	static inline void efx_tso_check_safe(const struct sk_buff *skb)
				585	{
				586	EFX_BUG_ON_PARANOID(skb->protocol != htons(ETH_P_IP));
				587	EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
				588	skb->protocol);
				589	EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
				590	EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
				591	+ (tcp_hdr(skb)->doff << 2u)) >
				592	skb_headlen(skb));
				593	}
				594
				595
				596	/*
				597	* Allocate a page worth of efx_tso_header structures, and string them
				598	* into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
				599	*/
				600	static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
				601	{
				602
				603	struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
				604	struct efx_tso_header *tsoh;
				605	dma_addr_t dma_addr;
				606	u8 base_kva, kva;
				607
				608	base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
				609	if (base_kva == NULL) {
				610	EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO"
				611	" headers\n");
				612	return -ENOMEM;
				613	}
				614
				615	/* pci_alloc_consistent() allocates pages. */
				616	EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
				617
				618	for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
				619	tsoh = (struct efx_tso_header *)kva;
				620	tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
				621	tsoh->next = tx_queue->tso_headers_free;
				622	tx_queue->tso_headers_free = tsoh;
				623	}
				624
				625	return 0;
				626	}
				627
				628
				629	/* Free up a TSO header, and all others in the same page. */
				630	static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
				631	struct efx_tso_header *tsoh,
				632	struct pci_dev *pci_dev)
				633	{
				634	struct efx_tso_header **p;
				635	unsigned long base_kva;
				636	dma_addr_t base_dma;
				637
				638	base_kva = (unsigned long)tsoh & PAGE_MASK;
				639	base_dma = tsoh->dma_addr & PAGE_MASK;
				640
				641	p = &tx_queue->tso_headers_free;
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	642	while (*p != NULL) {
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	643	if (((unsigned long)*p & PAGE_MASK) == base_kva)
				644	p = (p)->next;
				645	else
				646	p = &(*p)->next;
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	647	}
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	648
				649	pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
				650	}
				651
				652	static struct efx_tso_header *
				653	efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
				654	{
				655	struct efx_tso_header *tsoh;
				656
				657	tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC \| GFP_DMA);
				658	if (unlikely(!tsoh))
				659	return NULL;
				660
				661	tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
				662	TSOH_BUFFER(tsoh), header_len,
				663	PCI_DMA_TODEVICE);
				664	if (unlikely(pci_dma_mapping_error(tsoh->dma_addr))) {
				665	kfree(tsoh);
				666	return NULL;
				667	}
				668
				669	tsoh->unmap_len = header_len;
				670	return tsoh;
				671	}
				672
				673	static void
				674	efx_tsoh_heap_free(struct efx_tx_queue tx_queue, struct efx_tso_header tsoh)
				675	{
				676	pci_unmap_single(tx_queue->efx->pci_dev,
				677	tsoh->dma_addr, tsoh->unmap_len,
				678	PCI_DMA_TODEVICE);
				679	kfree(tsoh);
				680	}
				681
				682	/**
				683	* efx_tx_queue_insert - push descriptors onto the TX queue
				684	* @tx_queue: Efx TX queue
				685	* @dma_addr: DMA address of fragment
				686	* @len: Length of fragment
				687	* @skb: Only non-null for end of last segment
				688	* @end_of_packet: True if last fragment in a packet
				689	* @unmap_addr: DMA address of fragment for unmapping
				690	* @unmap_len: Only set this in last segment of a fragment
				691	*
				692	* Push descriptors onto the TX queue. Return 0 on success or 1 if
				693	* @tx_queue full.
				694	*/
				695	static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
				696	dma_addr_t dma_addr, unsigned len,
				697	const struct sk_buff *skb, int end_of_packet,
				698	dma_addr_t unmap_addr, unsigned unmap_len)
				699	{
				700	struct efx_tx_buffer *buffer;
				701	struct efx_nic *efx = tx_queue->efx;
				702	unsigned dma_len, fill_level, insert_ptr, misalign;
				703	int q_space;
				704
				705	EFX_BUG_ON_PARANOID(len <= 0);
				706
				707	fill_level = tx_queue->insert_count - tx_queue->old_read_count;
				708	/* -1 as there is no way to represent all descriptors used */
				709	q_space = efx->type->txd_ring_mask - 1 - fill_level;
				710
				711	while (1) {
				712	if (unlikely(q_space-- <= 0)) {
				713	/* It might be that completions have happened
				714	* since the xmit path last checked. Update
				715	* the xmit path's copy of read_count.
				716	*/
				717	++tx_queue->stopped;
				718	/* This memory barrier protects the change of
				719	* stopped from the access of read_count. */
				720	smp_mb();
				721	tx_queue->old_read_count =
				722	(volatile unsigned )&tx_queue->read_count;
				723	fill_level = (tx_queue->insert_count
				724	- tx_queue->old_read_count);
				725	q_space = efx->type->txd_ring_mask - 1 - fill_level;
				726	if (unlikely(q_space-- <= 0))
				727	return 1;
				728	smp_mb();
				729	--tx_queue->stopped;
				730	}
				731
				732	insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
				733	buffer = &tx_queue->buffer[insert_ptr];
				734	++tx_queue->insert_count;
				735
				736	EFX_BUG_ON_PARANOID(tx_queue->insert_count -
				737	tx_queue->read_count >
				738	efx->type->txd_ring_mask);
				739
				740	efx_tsoh_free(tx_queue, buffer);
				741	EFX_BUG_ON_PARANOID(buffer->len);
				742	EFX_BUG_ON_PARANOID(buffer->unmap_len);
				743	EFX_BUG_ON_PARANOID(buffer->skb);
				744	EFX_BUG_ON_PARANOID(buffer->continuation != 1);
				745	EFX_BUG_ON_PARANOID(buffer->tsoh);
				746
				747	buffer->dma_addr = dma_addr;
				748
				749	/* Ensure we do not cross a boundary unsupported by H/W */
				750	dma_len = (~dma_addr & efx->type->tx_dma_mask) + 1;
				751
				752	misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
				753	if (misalign && dma_len + misalign > 512)
				754	dma_len = 512 - misalign;
				755
				756	/* If there is enough space to send then do so */
				757	if (dma_len >= len)
				758	break;
				759
				760	buffer->len = dma_len; /* Don't set the other members */
				761	dma_addr += dma_len;
				762	len -= dma_len;
				763	}
				764
				765	EFX_BUG_ON_PARANOID(!len);
				766	buffer->len = len;
				767	buffer->skb = skb;
				768	buffer->continuation = !end_of_packet;
				769	buffer->unmap_addr = unmap_addr;
				770	buffer->unmap_len = unmap_len;
				771	return 0;
				772	}
				773
				774
				775	/*
				776	* Put a TSO header into the TX queue.
				777	*
				778	* This is special-cased because we know that it is small enough to fit in
				779	* a single fragment, and we know it doesn't cross a page boundary. It
				780	* also allows us to not worry about end-of-packet etc.
				781	*/
				782	static inline void efx_tso_put_header(struct efx_tx_queue *tx_queue,
				783	struct efx_tso_header *tsoh, unsigned len)
				784	{
				785	struct efx_tx_buffer *buffer;
				786
				787	buffer = &tx_queue->buffer[tx_queue->insert_count &
				788	tx_queue->efx->type->txd_ring_mask];
				789	efx_tsoh_free(tx_queue, buffer);
				790	EFX_BUG_ON_PARANOID(buffer->len);
				791	EFX_BUG_ON_PARANOID(buffer->unmap_len);
				792	EFX_BUG_ON_PARANOID(buffer->skb);
				793	EFX_BUG_ON_PARANOID(buffer->continuation != 1);
				794	EFX_BUG_ON_PARANOID(buffer->tsoh);
				795	buffer->len = len;
				796	buffer->dma_addr = tsoh->dma_addr;
				797	buffer->tsoh = tsoh;
				798
				799	++tx_queue->insert_count;
				800	}
				801
				802
				803	/* Remove descriptors put into a tx_queue. */
				804	static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
				805	{
				806	struct efx_tx_buffer *buffer;
				807
				808	/* Work backwards until we hit the original insert pointer value */
				809	while (tx_queue->insert_count != tx_queue->write_count) {
				810	--tx_queue->insert_count;
				811	buffer = &tx_queue->buffer[tx_queue->insert_count &
				812	tx_queue->efx->type->txd_ring_mask];
				813	efx_tsoh_free(tx_queue, buffer);
				814	EFX_BUG_ON_PARANOID(buffer->skb);
				815	buffer->len = 0;
				816	buffer->continuation = 1;
				817	if (buffer->unmap_len) {
				818	pci_unmap_page(tx_queue->efx->pci_dev,
				819	buffer->unmap_addr,
				820	buffer->unmap_len, PCI_DMA_TODEVICE);
				821	buffer->unmap_len = 0;
				822	}
				823	}
				824	}
				825
				826
				827	/* Parse the SKB header and initialise state. */
				828	static inline void tso_start(struct tso_state st, const struct sk_buff skb)
				829	{
				830	/* All ethernet/IP/TCP headers combined size is TCP header size
				831	* plus offset of TCP header relative to start of packet.
				832	*/
				833	st->p.header_length = ((tcp_hdr(skb)->doff << 2u)
				834	+ PTR_DIFF(tcp_hdr(skb), skb->data));
				835	st->p.full_packet_size = (st->p.header_length
				836	+ skb_shinfo(skb)->gso_size);
				837
				838	st->p.ipv4_id = ntohs(ip_hdr(skb)->id);
				839	st->seqnum = ntohl(tcp_hdr(skb)->seq);
				840
				841	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
				842	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
				843	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
				844
				845	st->packet_space = st->p.full_packet_size;
				846	st->remaining_len = skb->len - st->p.header_length;
				847	}
				848
				849
				850	/**
				851	* tso_get_fragment - record fragment details and map for DMA
				852	* @st: TSO state
				853	* @efx: Efx NIC
				854	* @data: Pointer to fragment data
				855	* @len: Length of fragment
				856	*
				857	* Record fragment details and map for DMA. Return 0 on success, or
				858	* -%ENOMEM if DMA mapping fails.
				859	*/
				860	static inline int tso_get_fragment(struct tso_state st, struct efx_nic efx,
				861	int len, struct page *page, int page_off)
				862	{
				863
				864	st->ifc.unmap_addr = pci_map_page(efx->pci_dev, page, page_off,
				865	len, PCI_DMA_TODEVICE);
				866	if (likely(!pci_dma_mapping_error(st->ifc.unmap_addr))) {
				867	st->ifc.unmap_len = len;
				868	st->ifc.len = len;
				869	st->ifc.dma_addr = st->ifc.unmap_addr;
				870	st->ifc.page = page;
				871	st->ifc.page_off = page_off;
				872	return 0;
				873	}
				874	return -ENOMEM;
				875	}
				876
				877
				878	/**
				879	* tso_fill_packet_with_fragment - form descriptors for the current fragment
				880	* @tx_queue: Efx TX queue
				881	* @skb: Socket buffer
				882	* @st: TSO state
				883	*
				884	* Form descriptors for the current fragment, until we reach the end
				885	* of fragment or end-of-packet. Return 0 on success, 1 if not enough
				886	* space in @tx_queue.
				887	*/
				888	static inline int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
				889	const struct sk_buff *skb,
				890	struct tso_state *st)
				891	{
				892
				893	int n, end_of_packet, rc;
				894
				895	if (st->ifc.len == 0)
				896	return 0;
				897	if (st->packet_space == 0)
				898	return 0;
				899
				900	EFX_BUG_ON_PARANOID(st->ifc.len <= 0);
				901	EFX_BUG_ON_PARANOID(st->packet_space <= 0);
				902
				903	n = min(st->ifc.len, st->packet_space);
				904
				905	st->packet_space -= n;
				906	st->remaining_len -= n;
				907	st->ifc.len -= n;
				908	st->ifc.page_off += n;
				909	end_of_packet = st->remaining_len == 0 \|\| st->packet_space == 0;
				910
				911	rc = efx_tx_queue_insert(tx_queue, st->ifc.dma_addr, n,
				912	st->remaining_len ? NULL : skb,
				913	end_of_packet, st->ifc.unmap_addr,
				914	st->ifc.len ? 0 : st->ifc.unmap_len);
				915
				916	st->ifc.dma_addr += n;
				917
				918	return rc;
				919	}
				920
				921
				922	/**
				923	* tso_start_new_packet - generate a new header and prepare for the new packet
				924	* @tx_queue: Efx TX queue
				925	* @skb: Socket buffer
				926	* @st: TSO state
				927	*
				928	* Generate a new header and prepare for the new packet. Return 0 on
				929	* success, or -1 if failed to alloc header.
				930	*/
				931	static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue,
				932	const struct sk_buff *skb,
				933	struct tso_state *st)
				934	{
				935	struct efx_tso_header *tsoh;
				936	struct iphdr *tsoh_iph;
				937	struct tcphdr *tsoh_th;
				938	unsigned ip_length;
				939	u8 *header;
				940
				941	/* Allocate a DMA-mapped header buffer. */
				942	if (likely(TSOH_SIZE(st->p.header_length) <= TSOH_STD_SIZE)) {
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	943	if (tx_queue->tso_headers_free == NULL) {
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	944	if (efx_tsoh_block_alloc(tx_queue))
				945	return -1;
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	946	}
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	947	EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
				948	tsoh = tx_queue->tso_headers_free;
				949	tx_queue->tso_headers_free = tsoh->next;
				950	tsoh->unmap_len = 0;
				951	} else {
				952	tx_queue->tso_long_headers++;
				953	tsoh = efx_tsoh_heap_alloc(tx_queue, st->p.header_length);
				954	if (unlikely(!tsoh))
				955	return -1;
				956	}
				957
				958	header = TSOH_BUFFER(tsoh);
				959	tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
				960	tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb));
				961
				962	/* Copy and update the headers. */
				963	memcpy(header, skb->data, st->p.header_length);
				964
				965	tsoh_th->seq = htonl(st->seqnum);
				966	st->seqnum += skb_shinfo(skb)->gso_size;
				967	if (st->remaining_len > skb_shinfo(skb)->gso_size) {
				968	/* This packet will not finish the TSO burst. */
				969	ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb);
				970	tsoh_th->fin = 0;
				971	tsoh_th->psh = 0;
				972	} else {
				973	/* This packet will be the last in the TSO burst. */
				974	ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
				975	+ st->remaining_len);
				976	tsoh_th->fin = tcp_hdr(skb)->fin;
				977	tsoh_th->psh = tcp_hdr(skb)->psh;
				978	}
				979	tsoh_iph->tot_len = htons(ip_length);
				980
				981	/* Linux leaves suitable gaps in the IP ID space for us to fill. */
				982	tsoh_iph->id = htons(st->p.ipv4_id);
				983	st->p.ipv4_id++;
				984
				985	st->packet_space = skb_shinfo(skb)->gso_size;
				986	++tx_queue->tso_packets;
				987
				988	/* Form a descriptor for this header. */
				989	efx_tso_put_header(tx_queue, tsoh, st->p.header_length);
				990
				991	return 0;
				992	}
				993
				994
				995	/**
				996	* efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
				997	* @tx_queue: Efx TX queue
				998	* @skb: Socket buffer
				999	*
				1000	* Context: You must hold netif_tx_lock() to call this function.
				1001	*
				1002	* Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
				1003	* @skb was not enqueued. In all cases @skb is consumed. Return
				1004	* %NETDEV_TX_OK or %NETDEV_TX_BUSY.
				1005	*/
				1006	static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
				1007	const struct sk_buff *skb)
				1008	{
				1009	int frag_i, rc, rc2 = NETDEV_TX_OK;
				1010	struct tso_state state;
				1011	skb_frag_t *f;
				1012
				1013	/* Verify TSO is safe - these checks should never fail. */
				1014	efx_tso_check_safe(skb);
				1015
				1016	EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
				1017
				1018	tso_start(&state, skb);
				1019
				1020	/* Assume that skb header area contains exactly the headers, and
				1021	* all payload is in the frag list.
				1022	*/
				1023	if (skb_headlen(skb) == state.p.header_length) {
				1024	/* Grab the first payload fragment. */
				1025	EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
				1026	frag_i = 0;
				1027	f = &skb_shinfo(skb)->frags[frag_i];
				1028	rc = tso_get_fragment(&state, tx_queue->efx,
				1029	f->size, f->page, f->page_offset);
				1030	if (rc)
				1031	goto mem_err;
				1032	} else {
				1033	/* It may look like this code fragment assumes that the
				1034	* skb->data portion does not cross a page boundary, but
				1035	* that is not the case. It is guaranteed to be direct
				1036	* mapped memory, and therefore is physically contiguous,
				1037	* and so DMA will work fine. kmap_atomic() on this region
				1038	* will just return the direct mapping, so that will work
				1039	* too.
				1040	*/
				1041	int page_off = (unsigned long)skb->data & (PAGE_SIZE - 1);
				1042	int hl = state.p.header_length;
				1043	rc = tso_get_fragment(&state, tx_queue->efx,
				1044	skb_headlen(skb) - hl,
				1045	virt_to_page(skb->data), page_off + hl);
				1046	if (rc)
				1047	goto mem_err;
				1048	frag_i = -1;
				1049	}
				1050
				1051	if (tso_start_new_packet(tx_queue, skb, &state) < 0)
				1052	goto mem_err;
				1053
				1054	while (1) {
				1055	rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
				1056	if (unlikely(rc))
				1057	goto stop;
				1058
				1059	/* Move onto the next fragment? */
				1060	if (state.ifc.len == 0) {
				1061	if (++frag_i >= skb_shinfo(skb)->nr_frags)
				1062	/* End of payload reached. */
				1063	break;
				1064	f = &skb_shinfo(skb)->frags[frag_i];
				1065	rc = tso_get_fragment(&state, tx_queue->efx,
				1066	f->size, f->page, f->page_offset);
				1067	if (rc)
				1068	goto mem_err;
				1069	}
				1070
				1071	/* Start at new packet? */
				1072	if (state.packet_space == 0 &&
				1073	tso_start_new_packet(tx_queue, skb, &state) < 0)
				1074	goto mem_err;
				1075	}
				1076
				1077	/* Pass off to hardware */
				1078	falcon_push_buffers(tx_queue);
				1079
				1080	tx_queue->tso_bursts++;
				1081	return NETDEV_TX_OK;
				1082
				1083	mem_err:
				1084	EFX_ERR(tx_queue->efx, "Out of memory for TSO headers, or PCI mapping"
				1085	" error\n");
				1086	dev_kfree_skb_any((struct sk_buff *)skb);
				1087	goto unwind;
				1088
				1089	stop:
				1090	rc2 = NETDEV_TX_BUSY;
				1091
				1092	/* Stop the queue if it wasn't stopped before. */
				1093	if (tx_queue->stopped == 1)
				1094	efx_stop_queue(tx_queue->efx);
				1095
				1096	unwind:
				1097	efx_enqueue_unwind(tx_queue);
				1098	return rc2;
				1099	}
				1100
				1101
				1102	/*
				1103	* Free up all TSO datastructures associated with tx_queue. This
				1104	* routine should be called only once the tx_queue is both empty and
				1105	* will no longer be used.
				1106	*/
				1107	static void efx_fini_tso(struct efx_tx_queue *tx_queue)
				1108	{
				1109	unsigned i;
				1110
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	1111	if (tx_queue->buffer) {
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	1112	for (i = 0; i <= tx_queue->efx->type->txd_ring_mask; ++i)
				1113	efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	1114	}
Ben Hutchings	b9b39b6	2008-05-07 12:51:12 +0100	[diff] [blame]	1115
				1116	while (tx_queue->tso_headers_free != NULL)
				1117	efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
				1118	tx_queue->efx->pci_dev);
				1119	}