Blame - drivers/net/sfc/rx.c - fp2-dev/kernel/msm

blob: a6413309c5772e7bf9931b8e0cf13a2e86b55e1a [file] [log] [blame]

Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	1	/****************************************************************************
				2	* Driver for Solarflare Solarstorm network controllers and boards
				3	* Copyright 2005-2006 Fen Systems Ltd.
				4	* Copyright 2005-2008 Solarflare Communications Inc.
				5	*
				6	* This program is free software; you can redistribute it and/or modify it
				7	* under the terms of the GNU General Public License version 2 as published
				8	* by the Free Software Foundation, incorporated herein by reference.
				9	*/
				10
				11	#include <linux/socket.h>
				12	#include <linux/in.h>
				13	#include <linux/ip.h>
				14	#include <linux/tcp.h>
				15	#include <linux/udp.h>
				16	#include <net/ip.h>
				17	#include <net/checksum.h>
				18	#include "net_driver.h"
				19	#include "rx.h"
				20	#include "efx.h"
				21	#include "falcon.h"
Ben Hutchings	3273c2e	2008-05-07 13:36:19 +0100	[diff] [blame]	22	#include "selftest.h"
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	23	#include "workarounds.h"
				24
				25	/* Number of RX descriptors pushed at once. */
				26	#define EFX_RX_BATCH 8
				27
				28	/* Size of buffer allocated for skb header area. */
				29	#define EFX_SKB_HEADERS 64u
				30
				31	/*
				32	* rx_alloc_method - RX buffer allocation method
				33	*
				34	* This driver supports two methods for allocating and using RX buffers:
				35	* each RX buffer may be backed by an skb or by an order-n page.
				36	*
				37	* When LRO is in use then the second method has a lower overhead,
				38	* since we don't have to allocate then free skbs on reassembled frames.
				39	*
				40	* Values:
				41	* - RX_ALLOC_METHOD_AUTO = 0
				42	* - RX_ALLOC_METHOD_SKB = 1
				43	* - RX_ALLOC_METHOD_PAGE = 2
				44	*
				45	* The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count
				46	* controlled by the parameters below.
				47	*
				48	* - Since pushing and popping descriptors are separated by the rx_queue
				49	* size, so the watermarks should be ~rxd_size.
				50	* - The performance win by using page-based allocation for LRO is less
				51	* than the performance hit of using page-based allocation of non-LRO,
				52	* so the watermarks should reflect this.
				53	*
				54	* Per channel we maintain a single variable, updated by each channel:
				55	*
				56	* rx_alloc_level += (lro_performed ? RX_ALLOC_FACTOR_LRO :
				57	* RX_ALLOC_FACTOR_SKB)
				58	* Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which
				59	* limits the hysteresis), and update the allocation strategy:
				60	*
				61	* rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_LRO ?
				62	* RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB)
				63	*/
				64	static int rx_alloc_method = RX_ALLOC_METHOD_PAGE;
				65
				66	#define RX_ALLOC_LEVEL_LRO 0x2000
				67	#define RX_ALLOC_LEVEL_MAX 0x3000
				68	#define RX_ALLOC_FACTOR_LRO 1
				69	#define RX_ALLOC_FACTOR_SKB (-2)
				70
				71	/* This is the percentage fill level below which new RX descriptors
				72	* will be added to the RX descriptor ring.
				73	*/
				74	static unsigned int rx_refill_threshold = 90;
				75
				76	/* This is the percentage fill level to which an RX queue will be refilled
				77	* when the "RX refill threshold" is reached.
				78	*/
				79	static unsigned int rx_refill_limit = 95;
				80
				81	/*
				82	* RX maximum head room required.
				83	*
				84	* This must be at least 1 to prevent overflow and at least 2 to allow
				85	* pipelined receives.
				86	*/
				87	#define EFX_RXD_HEAD_ROOM 2
				88
				89	/* Macros for zero-order pages (potentially) containing multiple RX buffers */
				90	#define RX_DATA_OFFSET(_data) \
				91	(((unsigned long) (_data)) & (PAGE_SIZE-1))
				92	#define RX_BUF_OFFSET(_rx_buf) \
				93	RX_DATA_OFFSET((_rx_buf)->data)
				94
				95	#define RX_PAGE_SIZE(_efx) \
				96	(PAGE_SIZE * (1u << (_efx)->rx_buffer_order))
				97
				98
				99	/**************************************************************************
				100	*
				101	* Linux generic LRO handling
				102	*
				103	**************************************************************************
				104	*/
				105
				106	static int efx_lro_get_skb_hdr(struct sk_buff skb, void *ip_hdr,
				107	void *tcpudp_hdr, u64 hdr_flags, void *priv)
				108	{
				109	struct efx_channel channel = (struct efx_channel )priv;
				110	struct iphdr *iph;
				111	struct tcphdr *th;
				112
				113	iph = (struct iphdr *)skb->data;
				114	if (skb->protocol != htons(ETH_P_IP) \|\| iph->protocol != IPPROTO_TCP)
				115	goto fail;
				116
				117	th = (struct tcphdr )(skb->data + iph->ihl 4);
				118
				119	*tcpudp_hdr = th;
				120	*ip_hdr = iph;
				121	*hdr_flags = LRO_IPV4 \| LRO_TCP;
				122
				123	channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO;
				124	return 0;
				125	fail:
				126	channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
				127	return -1;
				128	}
				129
				130	static int efx_get_frag_hdr(struct skb_frag_struct frag, void *mac_hdr,
				131	void ip_hdr, void tcpudp_hdr, u64 *hdr_flags,
				132	void *priv)
				133	{
				134	struct efx_channel channel = (struct efx_channel )priv;
				135	struct ethhdr *eh;
				136	struct iphdr *iph;
				137
				138	/* We support EtherII and VLAN encapsulated IPv4 */
				139	eh = (struct ethhdr *)(page_address(frag->page) + frag->page_offset);
				140	*mac_hdr = eh;
				141
				142	if (eh->h_proto == htons(ETH_P_IP)) {
				143	iph = (struct iphdr *)(eh + 1);
				144	} else {
				145	struct vlan_ethhdr veh = (struct vlan_ethhdr )eh;
				146	if (veh->h_vlan_encapsulated_proto != htons(ETH_P_IP))
				147	goto fail;
				148
				149	iph = (struct iphdr *)(veh + 1);
				150	}
				151	*ip_hdr = iph;
				152
				153	/* We can only do LRO over TCP */
				154	if (iph->protocol != IPPROTO_TCP)
				155	goto fail;
				156
				157	*hdr_flags = LRO_IPV4 \| LRO_TCP;
				158	tcpudp_hdr = (struct tcphdr )((u8 ) iph + iph->ihl 4);
				159
				160	channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO;
				161	return 0;
				162	fail:
				163	channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
				164	return -1;
				165	}
				166
				167	int efx_lro_init(struct net_lro_mgr lro_mgr, struct efx_nic efx)
				168	{
				169	size_t s = sizeof(struct net_lro_desc) * EFX_MAX_LRO_DESCRIPTORS;
				170	struct net_lro_desc *lro_arr;
				171
				172	/* Allocate the LRO descriptors structure */
				173	lro_arr = kzalloc(s, GFP_KERNEL);
				174	if (lro_arr == NULL)
				175	return -ENOMEM;
				176
				177	lro_mgr->lro_arr = lro_arr;
				178	lro_mgr->max_desc = EFX_MAX_LRO_DESCRIPTORS;
				179	lro_mgr->max_aggr = EFX_MAX_LRO_AGGR;
				180	lro_mgr->frag_align_pad = EFX_PAGE_SKB_ALIGN;
				181
				182	lro_mgr->get_skb_header = efx_lro_get_skb_hdr;
				183	lro_mgr->get_frag_header = efx_get_frag_hdr;
				184	lro_mgr->dev = efx->net_dev;
				185
				186	lro_mgr->features = LRO_F_NAPI;
				187
				188	/* We can pass packets up with the checksum intact */
				189	lro_mgr->ip_summed = CHECKSUM_UNNECESSARY;
				190
				191	lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY;
				192
				193	return 0;
				194	}
				195
				196	void efx_lro_fini(struct net_lro_mgr *lro_mgr)
				197	{
				198	kfree(lro_mgr->lro_arr);
				199	lro_mgr->lro_arr = NULL;
				200	}
				201
				202	/**
				203	* efx_init_rx_buffer_skb - create new RX buffer using skb-based allocation
				204	*
				205	* @rx_queue: Efx RX queue
				206	* @rx_buf: RX buffer structure to populate
				207	*
				208	* This allocates memory for a new receive buffer, maps it for DMA,
				209	* and populates a struct efx_rx_buffer with the relevant
				210	* information. Return a negative error code or 0 on success.
				211	*/
				212	static inline int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue,
				213	struct efx_rx_buffer *rx_buf)
				214	{
				215	struct efx_nic *efx = rx_queue->efx;
				216	struct net_device *net_dev = efx->net_dev;
				217	int skb_len = efx->rx_buffer_len;
				218
				219	rx_buf->skb = netdev_alloc_skb(net_dev, skb_len);
				220	if (unlikely(!rx_buf->skb))
				221	return -ENOMEM;
				222
				223	/* Adjust the SKB for padding and checksum */
				224	skb_reserve(rx_buf->skb, NET_IP_ALIGN);
				225	rx_buf->len = skb_len - NET_IP_ALIGN;
				226	rx_buf->data = (char *)rx_buf->skb->data;
				227	rx_buf->skb->ip_summed = CHECKSUM_UNNECESSARY;
				228
				229	rx_buf->dma_addr = pci_map_single(efx->pci_dev,
				230	rx_buf->data, rx_buf->len,
				231	PCI_DMA_FROMDEVICE);
				232
				233	if (unlikely(pci_dma_mapping_error(rx_buf->dma_addr))) {
				234	dev_kfree_skb_any(rx_buf->skb);
				235	rx_buf->skb = NULL;
				236	return -EIO;
				237	}
				238
				239	return 0;
				240	}
				241
				242	/**
				243	* efx_init_rx_buffer_page - create new RX buffer using page-based allocation
				244	*
				245	* @rx_queue: Efx RX queue
				246	* @rx_buf: RX buffer structure to populate
				247	*
				248	* This allocates memory for a new receive buffer, maps it for DMA,
				249	* and populates a struct efx_rx_buffer with the relevant
				250	* information. Return a negative error code or 0 on success.
				251	*/
				252	static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue,
				253	struct efx_rx_buffer *rx_buf)
				254	{
				255	struct efx_nic *efx = rx_queue->efx;
				256	int bytes, space, offset;
				257
				258	bytes = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN;
				259
				260	/* If there is space left in the previously allocated page,
				261	* then use it. Otherwise allocate a new one */
				262	rx_buf->page = rx_queue->buf_page;
				263	if (rx_buf->page == NULL) {
				264	dma_addr_t dma_addr;
				265
				266	rx_buf->page = alloc_pages(__GFP_COLD \| __GFP_COMP \| GFP_ATOMIC,
				267	efx->rx_buffer_order);
				268	if (unlikely(rx_buf->page == NULL))
				269	return -ENOMEM;
				270
				271	dma_addr = pci_map_page(efx->pci_dev, rx_buf->page,
				272	0, RX_PAGE_SIZE(efx),
				273	PCI_DMA_FROMDEVICE);
				274
				275	if (unlikely(pci_dma_mapping_error(dma_addr))) {
				276	__free_pages(rx_buf->page, efx->rx_buffer_order);
				277	rx_buf->page = NULL;
				278	return -EIO;
				279	}
				280
				281	rx_queue->buf_page = rx_buf->page;
				282	rx_queue->buf_dma_addr = dma_addr;
				283	rx_queue->buf_data = ((char *) page_address(rx_buf->page) +
				284	EFX_PAGE_IP_ALIGN);
				285	}
				286
				287	offset = RX_DATA_OFFSET(rx_queue->buf_data);
				288	rx_buf->len = bytes;
				289	rx_buf->dma_addr = rx_queue->buf_dma_addr + offset;
				290	rx_buf->data = rx_queue->buf_data;
				291
				292	/* Try to pack multiple buffers per page */
				293	if (efx->rx_buffer_order == 0) {
				294	/* The next buffer starts on the next 512 byte boundary */
				295	rx_queue->buf_data += ((bytes + 0x1ff) & ~0x1ff);
				296	offset += ((bytes + 0x1ff) & ~0x1ff);
				297
				298	space = RX_PAGE_SIZE(efx) - offset;
				299	if (space >= bytes) {
				300	/* Refs dropped on kernel releasing each skb */
				301	get_page(rx_queue->buf_page);
				302	goto out;
				303	}
				304	}
				305
				306	/* This is the final RX buffer for this page, so mark it for
				307	* unmapping */
				308	rx_queue->buf_page = NULL;
				309	rx_buf->unmap_addr = rx_queue->buf_dma_addr;
				310
				311	out:
				312	return 0;
				313	}
				314
				315	/* This allocates memory for a new receive buffer, maps it for DMA,
				316	* and populates a struct efx_rx_buffer with the relevant
				317	* information.
				318	*/
				319	static inline int efx_init_rx_buffer(struct efx_rx_queue *rx_queue,
				320	struct efx_rx_buffer *new_rx_buf)
				321	{
				322	int rc = 0;
				323
				324	if (rx_queue->channel->rx_alloc_push_pages) {
				325	new_rx_buf->skb = NULL;
				326	rc = efx_init_rx_buffer_page(rx_queue, new_rx_buf);
				327	rx_queue->alloc_page_count++;
				328	} else {
				329	new_rx_buf->page = NULL;
				330	rc = efx_init_rx_buffer_skb(rx_queue, new_rx_buf);
				331	rx_queue->alloc_skb_count++;
				332	}
				333
				334	if (unlikely(rc < 0))
				335	EFX_LOG_RL(rx_queue->efx, "%s RXQ[%d] =%d\n", __func__,
				336	rx_queue->queue, rc);
				337	return rc;
				338	}
				339
				340	static inline void efx_unmap_rx_buffer(struct efx_nic *efx,
				341	struct efx_rx_buffer *rx_buf)
				342	{
				343	if (rx_buf->page) {
				344	EFX_BUG_ON_PARANOID(rx_buf->skb);
				345	if (rx_buf->unmap_addr) {
				346	pci_unmap_page(efx->pci_dev, rx_buf->unmap_addr,
				347	RX_PAGE_SIZE(efx), PCI_DMA_FROMDEVICE);
				348	rx_buf->unmap_addr = 0;
				349	}
				350	} else if (likely(rx_buf->skb)) {
				351	pci_unmap_single(efx->pci_dev, rx_buf->dma_addr,
				352	rx_buf->len, PCI_DMA_FROMDEVICE);
				353	}
				354	}
				355
				356	static inline void efx_free_rx_buffer(struct efx_nic *efx,
				357	struct efx_rx_buffer *rx_buf)
				358	{
				359	if (rx_buf->page) {
				360	__free_pages(rx_buf->page, efx->rx_buffer_order);
				361	rx_buf->page = NULL;
				362	} else if (likely(rx_buf->skb)) {
				363	dev_kfree_skb_any(rx_buf->skb);
				364	rx_buf->skb = NULL;
				365	}
				366	}
				367
				368	static inline void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
				369	struct efx_rx_buffer *rx_buf)
				370	{
				371	efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
				372	efx_free_rx_buffer(rx_queue->efx, rx_buf);
				373	}
				374
				375	/**
				376	* efx_fast_push_rx_descriptors - push new RX descriptors quickly
				377	* @rx_queue: RX descriptor queue
				378	* @retry: Recheck the fill level
				379	* This will aim to fill the RX descriptor queue up to
				380	* @rx_queue->@fast_fill_limit. If there is insufficient atomic
				381	* memory to do so, the caller should retry.
				382	*/
				383	static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue,
				384	int retry)
				385	{
				386	struct efx_rx_buffer *rx_buf;
				387	unsigned fill_level, index;
				388	int i, space, rc = 0;
				389
				390	/* Calculate current fill level. Do this outside the lock,
				391	* because most of the time we'll end up not wanting to do the
				392	* fill anyway.
				393	*/
				394	fill_level = (rx_queue->added_count - rx_queue->removed_count);
				395	EFX_BUG_ON_PARANOID(fill_level >
				396	rx_queue->efx->type->rxd_ring_mask + 1);
				397
				398	/* Don't fill if we don't need to */
				399	if (fill_level >= rx_queue->fast_fill_trigger)
				400	return 0;
				401
				402	/* Record minimum fill level */
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	403	if (unlikely(fill_level < rx_queue->min_fill)) {
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	404	if (fill_level)
				405	rx_queue->min_fill = fill_level;
Ben Hutchings	b347564	2008-05-16 21:15:49 +0100	[diff] [blame^]	406	}
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	407
				408	/* Acquire RX add lock. If this lock is contended, then a fast
				409	* fill must already be in progress (e.g. in the refill
				410	* tasklet), so we don't need to do anything
				411	*/
				412	if (!spin_trylock_bh(&rx_queue->add_lock))
				413	return -1;
				414
				415	retry:
				416	/* Recalculate current fill level now that we have the lock */
				417	fill_level = (rx_queue->added_count - rx_queue->removed_count);
				418	EFX_BUG_ON_PARANOID(fill_level >
				419	rx_queue->efx->type->rxd_ring_mask + 1);
				420	space = rx_queue->fast_fill_limit - fill_level;
				421	if (space < EFX_RX_BATCH)
				422	goto out_unlock;
				423
				424	EFX_TRACE(rx_queue->efx, "RX queue %d fast-filling descriptor ring from"
				425	" level %d to level %d using %s allocation\n",
				426	rx_queue->queue, fill_level, rx_queue->fast_fill_limit,
				427	rx_queue->channel->rx_alloc_push_pages ? "page" : "skb");
				428
				429	do {
				430	for (i = 0; i < EFX_RX_BATCH; ++i) {
				431	index = (rx_queue->added_count &
				432	rx_queue->efx->type->rxd_ring_mask);
				433	rx_buf = efx_rx_buffer(rx_queue, index);
				434	rc = efx_init_rx_buffer(rx_queue, rx_buf);
				435	if (unlikely(rc))
				436	goto out;
				437	++rx_queue->added_count;
				438	}
				439	} while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH);
				440
				441	EFX_TRACE(rx_queue->efx, "RX queue %d fast-filled descriptor ring "
				442	"to level %d\n", rx_queue->queue,
				443	rx_queue->added_count - rx_queue->removed_count);
				444
				445	out:
				446	/* Send write pointer to card. */
				447	falcon_notify_rx_desc(rx_queue);
				448
				449	/* If the fast fill is running inside from the refill tasklet, then
				450	* for SMP systems it may be running on a different CPU to
				451	* RX event processing, which means that the fill level may now be
				452	* out of date. */
				453	if (unlikely(retry && (rc == 0)))
				454	goto retry;
				455
				456	out_unlock:
				457	spin_unlock_bh(&rx_queue->add_lock);
				458
				459	return rc;
				460	}
				461
				462	/**
				463	* efx_fast_push_rx_descriptors - push new RX descriptors quickly
				464	* @rx_queue: RX descriptor queue
				465	*
				466	* This will aim to fill the RX descriptor queue up to
				467	* @rx_queue->@fast_fill_limit. If there is insufficient memory to do so,
				468	* it will schedule a work item to immediately continue the fast fill
				469	*/
				470	void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
				471	{
				472	int rc;
				473
				474	rc = __efx_fast_push_rx_descriptors(rx_queue, 0);
				475	if (unlikely(rc)) {
				476	/* Schedule the work item to run immediately. The hope is
				477	* that work is immediately pending to free some memory
				478	* (e.g. an RX event or TX completion)
				479	*/
				480	efx_schedule_slow_fill(rx_queue, 0);
				481	}
				482	}
				483
				484	void efx_rx_work(struct work_struct *data)
				485	{
				486	struct efx_rx_queue *rx_queue;
				487	int rc;
				488
				489	rx_queue = container_of(data, struct efx_rx_queue, work.work);
				490
				491	if (unlikely(!rx_queue->channel->enabled))
				492	return;
				493
				494	EFX_TRACE(rx_queue->efx, "RX queue %d worker thread executing on CPU "
				495	"%d\n", rx_queue->queue, raw_smp_processor_id());
				496
				497	++rx_queue->slow_fill_count;
				498	/* Push new RX descriptors, allowing at least 1 jiffy for
				499	* the kernel to free some more memory. */
				500	rc = __efx_fast_push_rx_descriptors(rx_queue, 1);
				501	if (rc)
				502	efx_schedule_slow_fill(rx_queue, 1);
				503	}
				504
				505	static inline void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
				506	struct efx_rx_buffer *rx_buf,
				507	int len, int *discard,
				508	int *leak_packet)
				509	{
				510	struct efx_nic *efx = rx_queue->efx;
				511	unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
				512
				513	if (likely(len <= max_len))
				514	return;
				515
				516	/* The packet must be discarded, but this is only a fatal error
				517	* if the caller indicated it was
				518	*/
				519	*discard = 1;
				520
				521	if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) {
				522	EFX_ERR_RL(efx, " RX queue %d seriously overlength "
				523	"RX event (0x%x > 0x%x+0x%x). Leaking\n",
				524	rx_queue->queue, len, max_len,
				525	efx->type->rx_buffer_padding);
				526	/* If this buffer was skb-allocated, then the meta
				527	* data at the end of the skb will be trashed. So
				528	* we have no choice but to leak the fragment.
				529	*/
				530	*leak_packet = (rx_buf->skb != NULL);
				531	efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
				532	} else {
				533	EFX_ERR_RL(efx, " RX queue %d overlength RX event "
				534	"(0x%x > 0x%x)\n", rx_queue->queue, len, max_len);
				535	}
				536
				537	rx_queue->channel->n_rx_overlength++;
				538	}
				539
				540	/* Pass a received packet up through the generic LRO stack
				541	*
				542	* Handles driverlink veto, and passes the fragment up via
				543	* the appropriate LRO method
				544	*/
				545	static inline void efx_rx_packet_lro(struct efx_channel *channel,
				546	struct efx_rx_buffer *rx_buf)
				547	{
				548	struct net_lro_mgr *lro_mgr = &channel->lro_mgr;
				549	void *priv = channel;
				550
				551	/* Pass the skb/page into the LRO engine */
				552	if (rx_buf->page) {
				553	struct skb_frag_struct frags;
				554
				555	frags.page = rx_buf->page;
				556	frags.page_offset = RX_BUF_OFFSET(rx_buf);
				557	frags.size = rx_buf->len;
				558
				559	lro_receive_frags(lro_mgr, &frags, rx_buf->len,
				560	rx_buf->len, priv, 0);
				561
				562	EFX_BUG_ON_PARANOID(rx_buf->skb);
				563	rx_buf->page = NULL;
				564	} else {
				565	EFX_BUG_ON_PARANOID(!rx_buf->skb);
				566
				567	lro_receive_skb(lro_mgr, rx_buf->skb, priv);
				568	rx_buf->skb = NULL;
				569	}
				570	}
				571
				572	/* Allocate and construct an SKB around a struct page.*/
				573	static inline struct sk_buff efx_rx_mk_skb(struct efx_rx_buffer rx_buf,
				574	struct efx_nic *efx,
				575	int hdr_len)
				576	{
				577	struct sk_buff *skb;
				578
				579	/* Allocate an SKB to store the headers */
				580	skb = netdev_alloc_skb(efx->net_dev, hdr_len + EFX_PAGE_SKB_ALIGN);
				581	if (unlikely(skb == NULL)) {
				582	EFX_ERR_RL(efx, "RX out of memory for skb\n");
				583	return NULL;
				584	}
				585
				586	EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags);
				587	EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
				588
				589	skb->ip_summed = CHECKSUM_UNNECESSARY;
				590	skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
				591
				592	skb->len = rx_buf->len;
				593	skb->truesize = rx_buf->len + sizeof(struct sk_buff);
				594	memcpy(skb->data, rx_buf->data, hdr_len);
				595	skb->tail += hdr_len;
				596
				597	/* Append the remaining page onto the frag list */
				598	if (unlikely(rx_buf->len > hdr_len)) {
				599	struct skb_frag_struct *frag = skb_shinfo(skb)->frags;
				600	frag->page = rx_buf->page;
				601	frag->page_offset = RX_BUF_OFFSET(rx_buf) + hdr_len;
				602	frag->size = skb->len - hdr_len;
				603	skb_shinfo(skb)->nr_frags = 1;
				604	skb->data_len = frag->size;
				605	} else {
				606	__free_pages(rx_buf->page, efx->rx_buffer_order);
				607	skb->data_len = 0;
				608	}
				609
				610	/* Ownership has transferred from the rx_buf to skb */
				611	rx_buf->page = NULL;
				612
				613	/* Move past the ethernet header */
				614	skb->protocol = eth_type_trans(skb, efx->net_dev);
				615
				616	return skb;
				617	}
				618
				619	void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
				620	unsigned int len, int checksummed, int discard)
				621	{
				622	struct efx_nic *efx = rx_queue->efx;
				623	struct efx_rx_buffer *rx_buf;
				624	int leak_packet = 0;
				625
				626	rx_buf = efx_rx_buffer(rx_queue, index);
				627	EFX_BUG_ON_PARANOID(!rx_buf->data);
				628	EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page);
				629	EFX_BUG_ON_PARANOID(!(rx_buf->skb \|\| rx_buf->page));
				630
				631	/* This allows the refill path to post another buffer.
				632	* EFX_RXD_HEAD_ROOM ensures that the slot we are using
				633	* isn't overwritten yet.
				634	*/
				635	rx_queue->removed_count++;
				636
				637	/* Validate the length encoded in the event vs the descriptor pushed */
				638	efx_rx_packet__check_len(rx_queue, rx_buf, len,
				639	&discard, &leak_packet);
				640
				641	EFX_TRACE(efx, "RX queue %d received id %x at %llx+%x %s%s\n",
				642	rx_queue->queue, index,
				643	(unsigned long long)rx_buf->dma_addr, len,
				644	(checksummed ? " [SUMMED]" : ""),
				645	(discard ? " [DISCARD]" : ""));
				646
				647	/* Discard packet, if instructed to do so */
				648	if (unlikely(discard)) {
				649	if (unlikely(leak_packet))
				650	rx_queue->channel->n_skbuff_leaks++;
				651	else
				652	/* We haven't called efx_unmap_rx_buffer yet,
				653	* so fini the entire rx_buffer here */
				654	efx_fini_rx_buffer(rx_queue, rx_buf);
				655	return;
				656	}
				657
				658	/* Release card resources - assumes all RX buffers consumed in-order
				659	* per RX queue
				660	*/
				661	efx_unmap_rx_buffer(efx, rx_buf);
				662
				663	/* Prefetch nice and early so data will (hopefully) be in cache by
				664	* the time we look at it.
				665	*/
				666	prefetch(rx_buf->data);
				667
				668	/* Pipeline receives so that we give time for packet headers to be
				669	* prefetched into cache.
				670	*/
				671	rx_buf->len = len;
				672	if (rx_queue->channel->rx_pkt)
				673	__efx_rx_packet(rx_queue->channel,
				674	rx_queue->channel->rx_pkt,
				675	rx_queue->channel->rx_pkt_csummed);
				676	rx_queue->channel->rx_pkt = rx_buf;
				677	rx_queue->channel->rx_pkt_csummed = checksummed;
				678	}
				679
				680	/* Handle a received packet. Second half: Touches packet payload. */
				681	void __efx_rx_packet(struct efx_channel *channel,
				682	struct efx_rx_buffer *rx_buf, int checksummed)
				683	{
				684	struct efx_nic *efx = channel->efx;
				685	struct sk_buff *skb;
				686	int lro = efx->net_dev->features & NETIF_F_LRO;
				687
Ben Hutchings	3273c2e	2008-05-07 13:36:19 +0100	[diff] [blame]	688	/* If we're in loopback test, then pass the packet directly to the
				689	* loopback layer, and free the rx_buf here
				690	*/
				691	if (unlikely(efx->loopback_selftest)) {
				692	efx_loopback_rx_packet(efx, rx_buf->data, rx_buf->len);
				693	efx_free_rx_buffer(efx, rx_buf);
				694	goto done;
				695	}
				696
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	697	if (rx_buf->skb) {
				698	prefetch(skb_shinfo(rx_buf->skb));
				699
				700	skb_put(rx_buf->skb, rx_buf->len);
				701
				702	/* Move past the ethernet header. rx_buf->data still points
				703	* at the ethernet header */
				704	rx_buf->skb->protocol = eth_type_trans(rx_buf->skb,
				705	efx->net_dev);
				706	}
				707
				708	/* Both our generic-LRO and SFC-SSR support skb and page based
				709	* allocation, but neither support switching from one to the
				710	* other on the fly. If we spot that the allocation mode has
				711	* changed, then flush the LRO state.
				712	*/
				713	if (unlikely(channel->rx_alloc_pop_pages != (rx_buf->page != NULL))) {
				714	efx_flush_lro(channel);
				715	channel->rx_alloc_pop_pages = (rx_buf->page != NULL);
				716	}
				717	if (likely(checksummed && lro)) {
				718	efx_rx_packet_lro(channel, rx_buf);
				719	goto done;
				720	}
				721
				722	/* Form an skb if required */
				723	if (rx_buf->page) {
				724	int hdr_len = min(rx_buf->len, EFX_SKB_HEADERS);
				725	skb = efx_rx_mk_skb(rx_buf, efx, hdr_len);
				726	if (unlikely(skb == NULL)) {
				727	efx_free_rx_buffer(efx, rx_buf);
				728	goto done;
				729	}
				730	} else {
				731	/* We now own the SKB */
				732	skb = rx_buf->skb;
				733	rx_buf->skb = NULL;
				734	}
				735
				736	EFX_BUG_ON_PARANOID(rx_buf->page);
				737	EFX_BUG_ON_PARANOID(rx_buf->skb);
				738	EFX_BUG_ON_PARANOID(!skb);
				739
				740	/* Set the SKB flags */
				741	if (unlikely(!checksummed \|\| !efx->rx_checksum_enabled))
				742	skb->ip_summed = CHECKSUM_NONE;
				743
				744	/* Pass the packet up */
				745	netif_receive_skb(skb);
				746
				747	/* Update allocation strategy method */
				748	channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
				749
Ben Hutchings	8ceee66	2008-04-27 12:55:59 +0100	[diff] [blame]	750	done:
				751	efx->net_dev->last_rx = jiffies;
				752	}
				753
				754	void efx_rx_strategy(struct efx_channel *channel)
				755	{
				756	enum efx_rx_alloc_method method = rx_alloc_method;
				757
				758	/* Only makes sense to use page based allocation if LRO is enabled */
				759	if (!(channel->efx->net_dev->features & NETIF_F_LRO)) {
				760	method = RX_ALLOC_METHOD_SKB;
				761	} else if (method == RX_ALLOC_METHOD_AUTO) {
				762	/* Constrain the rx_alloc_level */
				763	if (channel->rx_alloc_level < 0)
				764	channel->rx_alloc_level = 0;
				765	else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX)
				766	channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX;
				767
				768	/* Decide on the allocation method */
				769	method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_LRO) ?
				770	RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB);
				771	}
				772
				773	/* Push the option */
				774	channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE);
				775	}
				776
				777	int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
				778	{
				779	struct efx_nic *efx = rx_queue->efx;
				780	unsigned int rxq_size;
				781	int rc;
				782
				783	EFX_LOG(efx, "creating RX queue %d\n", rx_queue->queue);
				784
				785	/* Allocate RX buffers */
				786	rxq_size = (efx->type->rxd_ring_mask + 1) * sizeof(*rx_queue->buffer);
				787	rx_queue->buffer = kzalloc(rxq_size, GFP_KERNEL);
				788	if (!rx_queue->buffer) {
				789	rc = -ENOMEM;
				790	goto fail1;
				791	}
				792
				793	rc = falcon_probe_rx(rx_queue);
				794	if (rc)
				795	goto fail2;
				796
				797	return 0;
				798
				799	fail2:
				800	kfree(rx_queue->buffer);
				801	rx_queue->buffer = NULL;
				802	fail1:
				803	rx_queue->used = 0;
				804
				805	return rc;
				806	}
				807
				808	int efx_init_rx_queue(struct efx_rx_queue *rx_queue)
				809	{
				810	struct efx_nic *efx = rx_queue->efx;
				811	unsigned int max_fill, trigger, limit;
				812
				813	EFX_LOG(rx_queue->efx, "initialising RX queue %d\n", rx_queue->queue);
				814
				815	/* Initialise ptr fields */
				816	rx_queue->added_count = 0;
				817	rx_queue->notified_count = 0;
				818	rx_queue->removed_count = 0;
				819	rx_queue->min_fill = -1U;
				820	rx_queue->min_overfill = -1U;
				821
				822	/* Initialise limit fields */
				823	max_fill = efx->type->rxd_ring_mask + 1 - EFX_RXD_HEAD_ROOM;
				824	trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
				825	limit = max_fill * min(rx_refill_limit, 100U) / 100U;
				826
				827	rx_queue->max_fill = max_fill;
				828	rx_queue->fast_fill_trigger = trigger;
				829	rx_queue->fast_fill_limit = limit;
				830
				831	/* Set up RX descriptor ring */
				832	return falcon_init_rx(rx_queue);
				833	}
				834
				835	void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
				836	{
				837	int i;
				838	struct efx_rx_buffer *rx_buf;
				839
				840	EFX_LOG(rx_queue->efx, "shutting down RX queue %d\n", rx_queue->queue);
				841
				842	falcon_fini_rx(rx_queue);
				843
				844	/* Release RX buffers NB start at index 0 not current HW ptr */
				845	if (rx_queue->buffer) {
				846	for (i = 0; i <= rx_queue->efx->type->rxd_ring_mask; i++) {
				847	rx_buf = efx_rx_buffer(rx_queue, i);
				848	efx_fini_rx_buffer(rx_queue, rx_buf);
				849	}
				850	}
				851
				852	/* For a page that is part-way through splitting into RX buffers */
				853	if (rx_queue->buf_page != NULL) {
				854	pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr,
				855	RX_PAGE_SIZE(rx_queue->efx), PCI_DMA_FROMDEVICE);
				856	__free_pages(rx_queue->buf_page,
				857	rx_queue->efx->rx_buffer_order);
				858	rx_queue->buf_page = NULL;
				859	}
				860	}
				861
				862	void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
				863	{
				864	EFX_LOG(rx_queue->efx, "destroying RX queue %d\n", rx_queue->queue);
				865
				866	falcon_remove_rx(rx_queue);
				867
				868	kfree(rx_queue->buffer);
				869	rx_queue->buffer = NULL;
				870	rx_queue->used = 0;
				871	}
				872
				873	void efx_flush_lro(struct efx_channel *channel)
				874	{
				875	lro_flush_all(&channel->lro_mgr);
				876	}
				877
				878
				879	module_param(rx_alloc_method, int, 0644);
				880	MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers");
				881
				882	module_param(rx_refill_threshold, uint, 0444);
				883	MODULE_PARM_DESC(rx_refill_threshold,
				884	"RX descriptor ring fast/slow fill threshold (%)");
				885