Blame - drivers/net/cxgb4/sge.c - kernel/msm

blob: d1f8f225e45a7799dd6e028a5980c39357907e5d [file] [log] [blame]

Dimitris Michailidis	fd3a479	2010-04-01 15:28:24 +0000	[diff] [blame]	1	/*
				2	* This file is part of the Chelsio T4 Ethernet driver for Linux.
				3	*
				4	* Copyright (c) 2003-2010 Chelsio Communications, Inc. All rights reserved.
				5	*
				6	* This software is available to you under a choice of one of two
				7	* licenses. You may choose to be licensed under the terms of the GNU
				8	* General Public License (GPL) Version 2, available from the file
				9	* COPYING in the main directory of this source tree, or the
				10	* OpenIB.org BSD license below:
				11	*
				12	* Redistribution and use in source and binary forms, with or
				13	* without modification, are permitted provided that the following
				14	* conditions are met:
				15	*
				16	* - Redistributions of source code must retain the above
				17	* copyright notice, this list of conditions and the following
				18	* disclaimer.
				19	*
				20	* - Redistributions in binary form must reproduce the above
				21	* copyright notice, this list of conditions and the following
				22	* disclaimer in the documentation and/or other materials
				23	* provided with the distribution.
				24	*
				25	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
				26	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
				27	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
				28	* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
				29	* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
				30	* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
				31	* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
				32	* SOFTWARE.
				33	*/
				34
				35	#include <linux/skbuff.h>
				36	#include <linux/netdevice.h>
				37	#include <linux/etherdevice.h>
				38	#include <linux/if_vlan.h>
				39	#include <linux/ip.h>
				40	#include <linux/dma-mapping.h>
				41	#include <linux/jiffies.h>
				42	#include <net/ipv6.h>
				43	#include <net/tcp.h>
				44	#include "cxgb4.h"
				45	#include "t4_regs.h"
				46	#include "t4_msg.h"
				47	#include "t4fw_api.h"
				48
				49	/*
				50	* Rx buffer size. We use largish buffers if possible but settle for single
				51	* pages under memory shortage.
				52	*/
				53	#if PAGE_SHIFT >= 16
				54	# define FL_PG_ORDER 0
				55	#else
				56	# define FL_PG_ORDER (16 - PAGE_SHIFT)
				57	#endif
				58
				59	/* RX_PULL_LEN should be <= RX_COPY_THRES */
				60	#define RX_COPY_THRES 256
				61	#define RX_PULL_LEN 128
				62
				63	/*
				64	* Main body length for sk_buffs used for Rx Ethernet packets with fragments.
				65	* Should be >= RX_PULL_LEN but possibly bigger to give pskb_may_pull some room.
				66	*/
				67	#define RX_PKT_SKB_LEN 512
				68
				69	/* Ethernet header padding prepended to RX_PKTs */
				70	#define RX_PKT_PAD 2
				71
				72	/*
				73	* Max number of Tx descriptors we clean up at a time. Should be modest as
				74	* freeing skbs isn't cheap and it happens while holding locks. We just need
				75	* to free packets faster than they arrive, we eventually catch up and keep
				76	* the amortized cost reasonable. Must be >= 2 * TXQ_STOP_THRES.
				77	*/
				78	#define MAX_TX_RECLAIM 16
				79
				80	/*
				81	* Max number of Rx buffers we replenish at a time. Again keep this modest,
				82	* allocating buffers isn't cheap either.
				83	*/
				84	#define MAX_RX_REFILL 16U
				85
				86	/*
				87	* Period of the Rx queue check timer. This timer is infrequent as it has
				88	* something to do only when the system experiences severe memory shortage.
				89	*/
				90	#define RX_QCHECK_PERIOD (HZ / 2)
				91
				92	/*
				93	* Period of the Tx queue check timer.
				94	*/
				95	#define TX_QCHECK_PERIOD (HZ / 2)
				96
				97	/*
				98	* Max number of Tx descriptors to be reclaimed by the Tx timer.
				99	*/
				100	#define MAX_TIMER_TX_RECLAIM 100
				101
				102	/*
				103	* Timer index used when backing off due to memory shortage.
				104	*/
				105	#define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
				106
				107	/*
				108	* An FL with <= FL_STARVE_THRES buffers is starving and a periodic timer will
				109	* attempt to refill it.
				110	*/
				111	#define FL_STARVE_THRES 4
				112
				113	/*
				114	* Suspend an Ethernet Tx queue with fewer available descriptors than this.
				115	* This is the same as calc_tx_descs() for a TSO packet with
				116	* nr_frags == MAX_SKB_FRAGS.
				117	*/
				118	#define ETHTXQ_STOP_THRES \
				119	(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
				120
				121	/*
				122	* Suspension threshold for non-Ethernet Tx queues. We require enough room
				123	* for a full sized WR.
				124	*/
				125	#define TXQ_STOP_THRES (SGE_MAX_WR_LEN / sizeof(struct tx_desc))
				126
				127	/*
				128	* Max Tx descriptor space we allow for an Ethernet packet to be inlined
				129	* into a WR.
				130	*/
				131	#define MAX_IMM_TX_PKT_LEN 128
				132
				133	/*
				134	* Max size of a WR sent through a control Tx queue.
				135	*/
				136	#define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
				137
				138	enum {
				139	/* packet alignment in FL buffers */
				140	FL_ALIGN = L1_CACHE_BYTES < 32 ? 32 : L1_CACHE_BYTES,
				141	/* egress status entry size */
				142	STAT_LEN = L1_CACHE_BYTES > 64 ? 128 : 64
				143	};
				144
				145	struct tx_sw_desc { /* SW state per Tx descriptor */
				146	struct sk_buff *skb;
				147	struct ulptx_sgl *sgl;
				148	};
				149
				150	struct rx_sw_desc { /* SW state per Rx descriptor */
				151	struct page *page;
				152	dma_addr_t dma_addr;
				153	};
				154
				155	/*
				156	* The low bits of rx_sw_desc.dma_addr have special meaning.
				157	*/
				158	enum {
				159	RX_LARGE_BUF = 1 << 0, /* buffer is larger than PAGE_SIZE */
				160	RX_UNMAPPED_BUF = 1 << 1, /* buffer is not mapped */
				161	};
				162
				163	static inline dma_addr_t get_buf_addr(const struct rx_sw_desc *d)
				164	{
				165	return d->dma_addr & ~(dma_addr_t)(RX_LARGE_BUF \| RX_UNMAPPED_BUF);
				166	}
				167
				168	static inline bool is_buf_mapped(const struct rx_sw_desc *d)
				169	{
				170	return !(d->dma_addr & RX_UNMAPPED_BUF);
				171	}
				172
				173	/**
				174	* txq_avail - return the number of available slots in a Tx queue
				175	* @q: the Tx queue
				176	*
				177	* Returns the number of descriptors in a Tx queue available to write new
				178	* packets.
				179	*/
				180	static inline unsigned int txq_avail(const struct sge_txq *q)
				181	{
				182	return q->size - 1 - q->in_use;
				183	}
				184
				185	/**
				186	* fl_cap - return the capacity of a free-buffer list
				187	* @fl: the FL
				188	*
				189	* Returns the capacity of a free-buffer list. The capacity is less than
				190	* the size because one descriptor needs to be left unpopulated, otherwise
				191	* HW will think the FL is empty.
				192	*/
				193	static inline unsigned int fl_cap(const struct sge_fl *fl)
				194	{
				195	return fl->size - 8; /* 1 descriptor = 8 buffers */
				196	}
				197
				198	static inline bool fl_starving(const struct sge_fl *fl)
				199	{
				200	return fl->avail - fl->pend_cred <= FL_STARVE_THRES;
				201	}
				202
				203	static int map_skb(struct device dev, const struct sk_buff skb,
				204	dma_addr_t *addr)
				205	{
				206	const skb_frag_t fp, end;
				207	const struct skb_shared_info *si;
				208
				209	*addr = dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
				210	if (dma_mapping_error(dev, *addr))
				211	goto out_err;
				212
				213	si = skb_shinfo(skb);
				214	end = &si->frags[si->nr_frags];
				215
				216	for (fp = si->frags; fp < end; fp++) {
				217	*++addr = dma_map_page(dev, fp->page, fp->page_offset, fp->size,
				218	DMA_TO_DEVICE);
				219	if (dma_mapping_error(dev, *addr))
				220	goto unwind;
				221	}
				222	return 0;
				223
				224	unwind:
				225	while (fp-- > si->frags)
				226	dma_unmap_page(dev, *--addr, fp->size, DMA_TO_DEVICE);
				227
				228	dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE);
				229	out_err:
				230	return -ENOMEM;
				231	}
				232
				233	#ifdef CONFIG_NEED_DMA_MAP_STATE
				234	static void unmap_skb(struct device dev, const struct sk_buff skb,
				235	const dma_addr_t *addr)
				236	{
				237	const skb_frag_t fp, end;
				238	const struct skb_shared_info *si;
				239
				240	dma_unmap_single(dev, *addr++, skb_headlen(skb), DMA_TO_DEVICE);
				241
				242	si = skb_shinfo(skb);
				243	end = &si->frags[si->nr_frags];
				244	for (fp = si->frags; fp < end; fp++)
				245	dma_unmap_page(dev, *addr++, fp->size, DMA_TO_DEVICE);
				246	}
				247
				248	/**
				249	* deferred_unmap_destructor - unmap a packet when it is freed
				250	* @skb: the packet
				251	*
				252	* This is the packet destructor used for Tx packets that need to remain
				253	* mapped until they are freed rather than until their Tx descriptors are
				254	* freed.
				255	*/
				256	static void deferred_unmap_destructor(struct sk_buff *skb)
				257	{
				258	unmap_skb(skb->dev->dev.parent, skb, (dma_addr_t *)skb->head);
				259	}
				260	#endif
				261
				262	static void unmap_sgl(struct device dev, const struct sk_buff skb,
				263	const struct ulptx_sgl sgl, const struct sge_txq q)
				264	{
				265	const struct ulptx_sge_pair *p;
				266	unsigned int nfrags = skb_shinfo(skb)->nr_frags;
				267
				268	if (likely(skb_headlen(skb)))
				269	dma_unmap_single(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
				270	DMA_TO_DEVICE);
				271	else {
				272	dma_unmap_page(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0),
				273	DMA_TO_DEVICE);
				274	nfrags--;
				275	}
				276
				277	/*
				278	* the complexity below is because of the possibility of a wrap-around
				279	* in the middle of an SGL
				280	*/
				281	for (p = sgl->sge; nfrags >= 2; nfrags -= 2) {
				282	if (likely((u8 )(p + 1) <= (u8 )q->stat)) {
				283	unmap: dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
				284	ntohl(p->len[0]), DMA_TO_DEVICE);
				285	dma_unmap_page(dev, be64_to_cpu(p->addr[1]),
				286	ntohl(p->len[1]), DMA_TO_DEVICE);
				287	p++;
				288	} else if ((u8 )p == (u8 )q->stat) {
				289	p = (const struct ulptx_sge_pair *)q->desc;
				290	goto unmap;
				291	} else if ((u8 )p + 8 == (u8 )q->stat) {
				292	const __be64 addr = (const __be64 )q->desc;
				293
				294	dma_unmap_page(dev, be64_to_cpu(addr[0]),
				295	ntohl(p->len[0]), DMA_TO_DEVICE);
				296	dma_unmap_page(dev, be64_to_cpu(addr[1]),
				297	ntohl(p->len[1]), DMA_TO_DEVICE);
				298	p = (const struct ulptx_sge_pair *)&addr[2];
				299	} else {
				300	const __be64 addr = (const __be64 )q->desc;
				301
				302	dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
				303	ntohl(p->len[0]), DMA_TO_DEVICE);
				304	dma_unmap_page(dev, be64_to_cpu(addr[0]),
				305	ntohl(p->len[1]), DMA_TO_DEVICE);
				306	p = (const struct ulptx_sge_pair *)&addr[1];
				307	}
				308	}
				309	if (nfrags) {
				310	__be64 addr;
				311
				312	if ((u8 )p == (u8 )q->stat)
				313	p = (const struct ulptx_sge_pair *)q->desc;
				314	addr = (u8 )p + 16 <= (u8 )q->stat ? p->addr[0] :
				315	(const __be64 )q->desc;
				316	dma_unmap_page(dev, be64_to_cpu(addr), ntohl(p->len[0]),
				317	DMA_TO_DEVICE);
				318	}
				319	}
				320
				321	/**
				322	* free_tx_desc - reclaims Tx descriptors and their buffers
				323	* @adapter: the adapter
				324	* @q: the Tx queue to reclaim descriptors from
				325	* @n: the number of descriptors to reclaim
				326	* @unmap: whether the buffers should be unmapped for DMA
				327	*
				328	* Reclaims Tx descriptors from an SGE Tx queue and frees the associated
				329	* Tx buffers. Called with the Tx queue lock held.
				330	*/
				331	static void free_tx_desc(struct adapter adap, struct sge_txq q,
				332	unsigned int n, bool unmap)
				333	{
				334	struct tx_sw_desc *d;
				335	unsigned int cidx = q->cidx;
				336	struct device *dev = adap->pdev_dev;
				337
				338	d = &q->sdesc[cidx];
				339	while (n--) {
				340	if (d->skb) { /* an SGL is present */
				341	if (unmap)
				342	unmap_sgl(dev, d->skb, d->sgl, q);
				343	kfree_skb(d->skb);
				344	d->skb = NULL;
				345	}
				346	++d;
				347	if (++cidx == q->size) {
				348	cidx = 0;
				349	d = q->sdesc;
				350	}
				351	}
				352	q->cidx = cidx;
				353	}
				354
				355	/*
				356	* Return the number of reclaimable descriptors in a Tx queue.
				357	*/
				358	static inline int reclaimable(const struct sge_txq *q)
				359	{
				360	int hw_cidx = ntohs(q->stat->cidx);
				361	hw_cidx -= q->cidx;
				362	return hw_cidx < 0 ? hw_cidx + q->size : hw_cidx;
				363	}
				364
				365	/**
				366	* reclaim_completed_tx - reclaims completed Tx descriptors
				367	* @adap: the adapter
				368	* @q: the Tx queue to reclaim completed descriptors from
				369	* @unmap: whether the buffers should be unmapped for DMA
				370	*
				371	* Reclaims Tx descriptors that the SGE has indicated it has processed,
				372	* and frees the associated buffers if possible. Called with the Tx
				373	* queue locked.
				374	*/
				375	static inline void reclaim_completed_tx(struct adapter adap, struct sge_txq q,
				376	bool unmap)
				377	{
				378	int avail = reclaimable(q);
				379
				380	if (avail) {
				381	/*
				382	* Limit the amount of clean up work we do at a time to keep
				383	* the Tx lock hold time O(1).
				384	*/
				385	if (avail > MAX_TX_RECLAIM)
				386	avail = MAX_TX_RECLAIM;
				387
				388	free_tx_desc(adap, q, avail, unmap);
				389	q->in_use -= avail;
				390	}
				391	}
				392
				393	static inline int get_buf_size(const struct rx_sw_desc *d)
				394	{
				395	#if FL_PG_ORDER > 0
				396	return (d->dma_addr & RX_LARGE_BUF) ? (PAGE_SIZE << FL_PG_ORDER) :
				397	PAGE_SIZE;
				398	#else
				399	return PAGE_SIZE;
				400	#endif
				401	}
				402
				403	/**
				404	* free_rx_bufs - free the Rx buffers on an SGE free list
				405	* @adap: the adapter
				406	* @q: the SGE free list to free buffers from
				407	* @n: how many buffers to free
				408	*
				409	* Release the next @n buffers on an SGE free-buffer Rx queue. The
				410	* buffers must be made inaccessible to HW before calling this function.
				411	*/
				412	static void free_rx_bufs(struct adapter adap, struct sge_fl q, int n)
				413	{
				414	while (n--) {
				415	struct rx_sw_desc *d = &q->sdesc[q->cidx];
				416
				417	if (is_buf_mapped(d))
				418	dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
				419	get_buf_size(d), PCI_DMA_FROMDEVICE);
				420	put_page(d->page);
				421	d->page = NULL;
				422	if (++q->cidx == q->size)
				423	q->cidx = 0;
				424	q->avail--;
				425	}
				426	}
				427
				428	/**
				429	* unmap_rx_buf - unmap the current Rx buffer on an SGE free list
				430	* @adap: the adapter
				431	* @q: the SGE free list
				432	*
				433	* Unmap the current buffer on an SGE free-buffer Rx queue. The
				434	* buffer must be made inaccessible to HW before calling this function.
				435	*
				436	* This is similar to @free_rx_bufs above but does not free the buffer.
				437	* Do note that the FL still loses any further access to the buffer.
				438	*/
				439	static void unmap_rx_buf(struct adapter adap, struct sge_fl q)
				440	{
				441	struct rx_sw_desc *d = &q->sdesc[q->cidx];
				442
				443	if (is_buf_mapped(d))
				444	dma_unmap_page(adap->pdev_dev, get_buf_addr(d),
				445	get_buf_size(d), PCI_DMA_FROMDEVICE);
				446	d->page = NULL;
				447	if (++q->cidx == q->size)
				448	q->cidx = 0;
				449	q->avail--;
				450	}
				451
				452	static inline void ring_fl_db(struct adapter adap, struct sge_fl q)
				453	{
				454	if (q->pend_cred >= 8) {
				455	wmb();
				456	t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO \|
				457	QID(q->cntxt_id) \| PIDX(q->pend_cred / 8));
				458	q->pend_cred &= 7;
				459	}
				460	}
				461
				462	static inline void set_rx_sw_desc(struct rx_sw_desc sd, struct page pg,
				463	dma_addr_t mapping)
				464	{
				465	sd->page = pg;
				466	sd->dma_addr = mapping; /* includes size low bits */
				467	}
				468
				469	/**
				470	* refill_fl - refill an SGE Rx buffer ring
				471	* @adap: the adapter
				472	* @q: the ring to refill
				473	* @n: the number of new buffers to allocate
				474	* @gfp: the gfp flags for the allocations
				475	*
				476	* (Re)populate an SGE free-buffer queue with up to @n new packet buffers,
				477	* allocated with the supplied gfp flags. The caller must assure that
				478	* @n does not exceed the queue's capacity. If afterwards the queue is
				479	* found critically low mark it as starving in the bitmap of starving FLs.
				480	*
				481	* Returns the number of buffers allocated.
				482	*/
				483	static unsigned int refill_fl(struct adapter adap, struct sge_fl q, int n,
				484	gfp_t gfp)
				485	{
				486	struct page *pg;
				487	dma_addr_t mapping;
				488	unsigned int cred = q->avail;
				489	__be64 *d = &q->desc[q->pidx];
				490	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
				491
				492	gfp \|= __GFP_NOWARN; /* failures are expected */
				493
				494	#if FL_PG_ORDER > 0
				495	/*
				496	* Prefer large buffers
				497	*/
				498	while (n) {
				499	pg = alloc_pages(gfp \| __GFP_COMP, FL_PG_ORDER);
				500	if (unlikely(!pg)) {
				501	q->large_alloc_failed++;
				502	break; /* fall back to single pages */
				503	}
				504
				505	mapping = dma_map_page(adap->pdev_dev, pg, 0,
				506	PAGE_SIZE << FL_PG_ORDER,
				507	PCI_DMA_FROMDEVICE);
				508	if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
				509	__free_pages(pg, FL_PG_ORDER);
				510	goto out; /* do not try small pages for this error */
				511	}
				512	mapping \|= RX_LARGE_BUF;
				513	*d++ = cpu_to_be64(mapping);
				514
				515	set_rx_sw_desc(sd, pg, mapping);
				516	sd++;
				517
				518	q->avail++;
				519	if (++q->pidx == q->size) {
				520	q->pidx = 0;
				521	sd = q->sdesc;
				522	d = q->desc;
				523	}
				524	n--;
				525	}
				526	#endif
				527
				528	while (n--) {
				529	pg = __netdev_alloc_page(adap->port[0], gfp);
				530	if (unlikely(!pg)) {
				531	q->alloc_failed++;
				532	break;
				533	}
				534
				535	mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
				536	PCI_DMA_FROMDEVICE);
				537	if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
				538	netdev_free_page(adap->port[0], pg);
				539	goto out;
				540	}
				541	*d++ = cpu_to_be64(mapping);
				542
				543	set_rx_sw_desc(sd, pg, mapping);
				544	sd++;
				545
				546	q->avail++;
				547	if (++q->pidx == q->size) {
				548	q->pidx = 0;
				549	sd = q->sdesc;
				550	d = q->desc;
				551	}
				552	}
				553
				554	out: cred = q->avail - cred;
				555	q->pend_cred += cred;
				556	ring_fl_db(adap, q);
				557
				558	if (unlikely(fl_starving(q))) {
				559	smp_wmb();
				560	set_bit(q->cntxt_id, adap->sge.starving_fl);
				561	}
				562
				563	return cred;
				564	}
				565
				566	static inline void __refill_fl(struct adapter adap, struct sge_fl fl)
				567	{
				568	refill_fl(adap, fl, min(MAX_RX_REFILL, fl_cap(fl) - fl->avail),
				569	GFP_ATOMIC);
				570	}
				571
				572	/**
				573	* alloc_ring - allocate resources for an SGE descriptor ring
				574	* @dev: the PCI device's core device
				575	* @nelem: the number of descriptors
				576	* @elem_size: the size of each descriptor
				577	* @sw_size: the size of the SW state associated with each ring element
				578	* @phys: the physical address of the allocated ring
				579	* @metadata: address of the array holding the SW state for the ring
				580	* @stat_size: extra space in HW ring for status information
				581	*
				582	* Allocates resources for an SGE descriptor ring, such as Tx queues,
				583	* free buffer lists, or response queues. Each SGE ring requires
				584	* space for its HW descriptors plus, optionally, space for the SW state
				585	* associated with each HW entry (the metadata). The function returns
				586	* three values: the virtual address for the HW ring (the return value
				587	* of the function), the bus address of the HW ring, and the address
				588	* of the SW ring.
				589	*/
				590	static void alloc_ring(struct device dev, size_t nelem, size_t elem_size,
				591	size_t sw_size, dma_addr_t phys, void metadata,
				592	size_t stat_size)
				593	{
				594	size_t len = nelem * elem_size + stat_size;
				595	void *s = NULL;
				596	void *p = dma_alloc_coherent(dev, len, phys, GFP_KERNEL);
				597
				598	if (!p)
				599	return NULL;
				600	if (sw_size) {
				601	s = kcalloc(nelem, sw_size, GFP_KERNEL);
				602
				603	if (!s) {
				604	dma_free_coherent(dev, len, p, *phys);
				605	return NULL;
				606	}
				607	}
				608	if (metadata)
				609	(void *)metadata = s;
				610	memset(p, 0, len);
				611	return p;
				612	}
				613
				614	/**
				615	* sgl_len - calculates the size of an SGL of the given capacity
				616	* @n: the number of SGL entries
				617	*
				618	* Calculates the number of flits needed for a scatter/gather list that
				619	* can hold the given number of entries.
				620	*/
				621	static inline unsigned int sgl_len(unsigned int n)
				622	{
				623	n--;
				624	return (3 * n) / 2 + (n & 1) + 2;
				625	}
				626
				627	/**
				628	* flits_to_desc - returns the num of Tx descriptors for the given flits
				629	* @n: the number of flits
				630	*
				631	* Returns the number of Tx descriptors needed for the supplied number
				632	* of flits.
				633	*/
				634	static inline unsigned int flits_to_desc(unsigned int n)
				635	{
				636	BUG_ON(n > SGE_MAX_WR_LEN / 8);
				637	return DIV_ROUND_UP(n, 8);
				638	}
				639
				640	/**
				641	* is_eth_imm - can an Ethernet packet be sent as immediate data?
				642	* @skb: the packet
				643	*
				644	* Returns whether an Ethernet packet is small enough to fit as
				645	* immediate data.
				646	*/
				647	static inline int is_eth_imm(const struct sk_buff *skb)
				648	{
				649	return skb->len <= MAX_IMM_TX_PKT_LEN - sizeof(struct cpl_tx_pkt);
				650	}
				651
				652	/**
				653	* calc_tx_flits - calculate the number of flits for a packet Tx WR
				654	* @skb: the packet
				655	*
				656	* Returns the number of flits needed for a Tx WR for the given Ethernet
				657	* packet, including the needed WR and CPL headers.
				658	*/
				659	static inline unsigned int calc_tx_flits(const struct sk_buff *skb)
				660	{
				661	unsigned int flits;
				662
				663	if (is_eth_imm(skb))
				664	return DIV_ROUND_UP(skb->len + sizeof(struct cpl_tx_pkt), 8);
				665
				666	flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 4;
				667	if (skb_shinfo(skb)->gso_size)
				668	flits += 2;
				669	return flits;
				670	}
				671
				672	/**
				673	* calc_tx_descs - calculate the number of Tx descriptors for a packet
				674	* @skb: the packet
				675	*
				676	* Returns the number of Tx descriptors needed for the given Ethernet
				677	* packet, including the needed WR and CPL headers.
				678	*/
				679	static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
				680	{
				681	return flits_to_desc(calc_tx_flits(skb));
				682	}
				683
				684	/**
				685	* write_sgl - populate a scatter/gather list for a packet
				686	* @skb: the packet
				687	* @q: the Tx queue we are writing into
				688	* @sgl: starting location for writing the SGL
				689	* @end: points right after the end of the SGL
				690	* @start: start offset into skb main-body data to include in the SGL
				691	* @addr: the list of bus addresses for the SGL elements
				692	*
				693	* Generates a gather list for the buffers that make up a packet.
				694	* The caller must provide adequate space for the SGL that will be written.
				695	* The SGL includes all of the packet's page fragments and the data in its
				696	* main body except for the first @start bytes. @sgl must be 16-byte
				697	* aligned and within a Tx descriptor with available space. @end points
				698	* right after the end of the SGL but does not account for any potential
				699	* wrap around, i.e., @end > @sgl.
				700	*/
				701	static void write_sgl(const struct sk_buff skb, struct sge_txq q,
				702	struct ulptx_sgl sgl, u64 end, unsigned int start,
				703	const dma_addr_t *addr)
				704	{
				705	unsigned int i, len;
				706	struct ulptx_sge_pair *to;
				707	const struct skb_shared_info *si = skb_shinfo(skb);
				708	unsigned int nfrags = si->nr_frags;
				709	struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1];
				710
				711	len = skb_headlen(skb) - start;
				712	if (likely(len)) {
				713	sgl->len0 = htonl(len);
				714	sgl->addr0 = cpu_to_be64(addr[0] + start);
				715	nfrags++;
				716	} else {
				717	sgl->len0 = htonl(si->frags[0].size);
				718	sgl->addr0 = cpu_to_be64(addr[1]);
				719	}
				720
				721	sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) \| ULPTX_NSGE(nfrags));
				722	if (likely(--nfrags == 0))
				723	return;
				724	/*
				725	* Most of the complexity below deals with the possibility we hit the
				726	* end of the queue in the middle of writing the SGL. For this case
				727	* only we create the SGL in a temporary buffer and then copy it.
				728	*/
				729	to = (u8 )end > (u8 )q->stat ? buf : sgl->sge;
				730
				731	for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) {
				732	to->len[0] = cpu_to_be32(si->frags[i].size);
				733	to->len[1] = cpu_to_be32(si->frags[++i].size);
				734	to->addr[0] = cpu_to_be64(addr[i]);
				735	to->addr[1] = cpu_to_be64(addr[++i]);
				736	}
				737	if (nfrags) {
				738	to->len[0] = cpu_to_be32(si->frags[i].size);
				739	to->len[1] = cpu_to_be32(0);
				740	to->addr[0] = cpu_to_be64(addr[i + 1]);
				741	}
				742	if (unlikely((u8 )end > (u8 )q->stat)) {
				743	unsigned int part0 = (u8 )q->stat - (u8 )sgl->sge, part1;
				744
				745	if (likely(part0))
				746	memcpy(sgl->sge, buf, part0);
				747	part1 = (u8 )end - (u8 )q->stat;
				748	memcpy(q->desc, (u8 *)buf + part0, part1);
				749	end = (void *)q->desc + part1;
				750	}
				751	if ((uintptr_t)end & 8) /* 0-pad to multiple of 16 */
				752	(u64 )end = 0;
				753	}
				754
				755	/**
				756	* ring_tx_db - check and potentially ring a Tx queue's doorbell
				757	* @adap: the adapter
				758	* @q: the Tx queue
				759	* @n: number of new descriptors to give to HW
				760	*
				761	* Ring the doorbel for a Tx queue.
				762	*/
				763	static inline void ring_tx_db(struct adapter adap, struct sge_txq q, int n)
				764	{
				765	wmb(); /* write descriptors before telling HW */
				766	t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL),
				767	QID(q->cntxt_id) \| PIDX(n));
				768	}
				769
				770	/**
				771	* inline_tx_skb - inline a packet's data into Tx descriptors
				772	* @skb: the packet
				773	* @q: the Tx queue where the packet will be inlined
				774	* @pos: starting position in the Tx queue where to inline the packet
				775	*
				776	* Inline a packet's contents directly into Tx descriptors, starting at
				777	* the given position within the Tx DMA ring.
				778	* Most of the complexity of this operation is dealing with wrap arounds
				779	* in the middle of the packet we want to inline.
				780	*/
				781	static void inline_tx_skb(const struct sk_buff skb, const struct sge_txq q,
				782	void *pos)
				783	{
				784	u64 *p;
				785	int left = (void *)q->stat - pos;
				786
				787	if (likely(skb->len <= left)) {
				788	if (likely(!skb->data_len))
				789	skb_copy_from_linear_data(skb, pos, skb->len);
				790	else
				791	skb_copy_bits(skb, 0, pos, skb->len);
				792	pos += skb->len;
				793	} else {
				794	skb_copy_bits(skb, 0, pos, left);
				795	skb_copy_bits(skb, left, q->desc, skb->len - left);
				796	pos = (void *)q->desc + (skb->len - left);
				797	}
				798
				799	/* 0-pad to multiple of 16 */
				800	p = PTR_ALIGN(pos, 8);
				801	if ((uintptr_t)p & 8)
				802	*p = 0;
				803	}
				804
				805	/*
				806	* Figure out what HW csum a packet wants and return the appropriate control
				807	* bits.
				808	*/
				809	static u64 hwcsum(const struct sk_buff *skb)
				810	{
				811	int csum_type;
				812	const struct iphdr *iph = ip_hdr(skb);
				813
				814	if (iph->version == 4) {
				815	if (iph->protocol == IPPROTO_TCP)
				816	csum_type = TX_CSUM_TCPIP;
				817	else if (iph->protocol == IPPROTO_UDP)
				818	csum_type = TX_CSUM_UDPIP;
				819	else {
				820	nocsum: /*
				821	* unknown protocol, disable HW csum
				822	* and hope a bad packet is detected
				823	*/
				824	return TXPKT_L4CSUM_DIS;
				825	}
				826	} else {
				827	/*
				828	* this doesn't work with extension headers
				829	*/
				830	const struct ipv6hdr ip6h = (const struct ipv6hdr )iph;
				831
				832	if (ip6h->nexthdr == IPPROTO_TCP)
				833	csum_type = TX_CSUM_TCPIP6;
				834	else if (ip6h->nexthdr == IPPROTO_UDP)
				835	csum_type = TX_CSUM_UDPIP6;
				836	else
				837	goto nocsum;
				838	}
				839
				840	if (likely(csum_type >= TX_CSUM_TCPIP))
				841	return TXPKT_CSUM_TYPE(csum_type) \|
				842	TXPKT_IPHDR_LEN(skb_network_header_len(skb)) \|
				843	TXPKT_ETHHDR_LEN(skb_network_offset(skb) - ETH_HLEN);
				844	else {
				845	int start = skb_transport_offset(skb);
				846
				847	return TXPKT_CSUM_TYPE(csum_type) \| TXPKT_CSUM_START(start) \|
				848	TXPKT_CSUM_LOC(start + skb->csum_offset);
				849	}
				850	}
				851
				852	static void eth_txq_stop(struct sge_eth_txq *q)
				853	{
				854	netif_tx_stop_queue(q->txq);
				855	q->q.stops++;
				856	}
				857
				858	static inline void txq_advance(struct sge_txq *q, unsigned int n)
				859	{
				860	q->in_use += n;
				861	q->pidx += n;
				862	if (q->pidx >= q->size)
				863	q->pidx -= q->size;
				864	}
				865
				866	/**
				867	* t4_eth_xmit - add a packet to an Ethernet Tx queue
				868	* @skb: the packet
				869	* @dev: the egress net device
				870	*
				871	* Add a packet to an SGE Ethernet Tx queue. Runs with softirqs disabled.
				872	*/
				873	netdev_tx_t t4_eth_xmit(struct sk_buff skb, struct net_device dev)
				874	{
				875	u32 wr_mid;
				876	u64 cntrl, *end;
				877	int qidx, credits;
				878	unsigned int flits, ndesc;
				879	struct adapter *adap;
				880	struct sge_eth_txq *q;
				881	const struct port_info *pi;
				882	struct fw_eth_tx_pkt_wr *wr;
				883	struct cpl_tx_pkt_core *cpl;
				884	const struct skb_shared_info *ssi;
				885	dma_addr_t addr[MAX_SKB_FRAGS + 1];
				886
				887	/*
				888	* The chip min packet length is 10 octets but play safe and reject
				889	* anything shorter than an Ethernet header.
				890	*/
				891	if (unlikely(skb->len < ETH_HLEN)) {
				892	out_free: dev_kfree_skb(skb);
				893	return NETDEV_TX_OK;
				894	}
				895
				896	pi = netdev_priv(dev);
				897	adap = pi->adapter;
				898	qidx = skb_get_queue_mapping(skb);
				899	q = &adap->sge.ethtxq[qidx + pi->first_qset];
				900
				901	reclaim_completed_tx(adap, &q->q, true);
				902
				903	flits = calc_tx_flits(skb);
				904	ndesc = flits_to_desc(flits);
				905	credits = txq_avail(&q->q) - ndesc;
				906
				907	if (unlikely(credits < 0)) {
				908	eth_txq_stop(q);
				909	dev_err(adap->pdev_dev,
				910	"%s: Tx ring %u full while queue awake!\n",
				911	dev->name, qidx);
				912	return NETDEV_TX_BUSY;
				913	}
				914
				915	if (!is_eth_imm(skb) &&
				916	unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) {
				917	q->mapping_err++;
				918	goto out_free;
				919	}
				920
				921	wr_mid = FW_WR_LEN16(DIV_ROUND_UP(flits, 2));
				922	if (unlikely(credits < ETHTXQ_STOP_THRES)) {
				923	eth_txq_stop(q);
				924	wr_mid \|= FW_WR_EQUEQ \| FW_WR_EQUIQ;
				925	}
				926
				927	wr = (void *)&q->q.desc[q->q.pidx];
				928	wr->equiq_to_len16 = htonl(wr_mid);
				929	wr->r3 = cpu_to_be64(0);
				930	end = (u64 *)wr + flits;
				931
				932	ssi = skb_shinfo(skb);
				933	if (ssi->gso_size) {
				934	struct cpl_tx_pkt_lso lso = (void )wr;
				935	bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
				936	int l3hdr_len = skb_network_header_len(skb);
				937	int eth_xtra_len = skb_network_offset(skb) - ETH_HLEN;
				938
				939	wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) \|
				940	FW_WR_IMMDLEN(sizeof(*lso)));
				941	lso->lso_ctrl = htonl(LSO_OPCODE(CPL_TX_PKT_LSO) \|
				942	LSO_FIRST_SLICE \| LSO_LAST_SLICE \|
				943	LSO_IPV6(v6) \|
				944	LSO_ETHHDR_LEN(eth_xtra_len / 4) \|
				945	LSO_IPHDR_LEN(l3hdr_len / 4) \|
				946	LSO_TCPHDR_LEN(tcp_hdr(skb)->doff));
				947	lso->ipid_ofst = htons(0);
				948	lso->mss = htons(ssi->gso_size);
				949	lso->seqno_offset = htonl(0);
				950	lso->len = htonl(skb->len);
				951	cpl = (void *)(lso + 1);
				952	cntrl = TXPKT_CSUM_TYPE(v6 ? TX_CSUM_TCPIP6 : TX_CSUM_TCPIP) \|
				953	TXPKT_IPHDR_LEN(l3hdr_len) \|
				954	TXPKT_ETHHDR_LEN(eth_xtra_len);
				955	q->tso++;
				956	q->tx_cso += ssi->gso_segs;
				957	} else {
				958	int len;
				959
				960	len = is_eth_imm(skb) ? skb->len + sizeof(cpl) : sizeof(cpl);
				961	wr->op_immdlen = htonl(FW_WR_OP(FW_ETH_TX_PKT_WR) \|
				962	FW_WR_IMMDLEN(len));
				963	cpl = (void *)(wr + 1);
				964	if (skb->ip_summed == CHECKSUM_PARTIAL) {
				965	cntrl = hwcsum(skb) \| TXPKT_IPCSUM_DIS;
				966	q->tx_cso++;
				967	} else
				968	cntrl = TXPKT_L4CSUM_DIS \| TXPKT_IPCSUM_DIS;
				969	}
				970
				971	if (vlan_tx_tag_present(skb)) {
				972	q->vlan_ins++;
				973	cntrl \|= TXPKT_VLAN_VLD \| TXPKT_VLAN(vlan_tx_tag_get(skb));
				974	}
				975
				976	cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) \|
				977	TXPKT_INTF(pi->tx_chan) \| TXPKT_PF(0));
				978	cpl->pack = htons(0);
				979	cpl->len = htons(skb->len);
				980	cpl->ctrl1 = cpu_to_be64(cntrl);
				981
				982	if (is_eth_imm(skb)) {
				983	inline_tx_skb(skb, &q->q, cpl + 1);
				984	dev_kfree_skb(skb);
				985	} else {
				986	int last_desc;
				987
				988	write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0,
				989	addr);
				990	skb_orphan(skb);
				991
				992	last_desc = q->q.pidx + ndesc - 1;
				993	if (last_desc >= q->q.size)
				994	last_desc -= q->q.size;
				995	q->q.sdesc[last_desc].skb = skb;
				996	q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)(cpl + 1);
				997	}
				998
				999	txq_advance(&q->q, ndesc);
				1000
				1001	ring_tx_db(adap, &q->q, ndesc);
				1002	return NETDEV_TX_OK;
				1003	}
				1004
				1005	/**
				1006	* reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
				1007	* @q: the SGE control Tx queue
				1008	*
				1009	* This is a variant of reclaim_completed_tx() that is used for Tx queues
				1010	* that send only immediate data (presently just the control queues) and
				1011	* thus do not have any sk_buffs to release.
				1012	*/
				1013	static inline void reclaim_completed_tx_imm(struct sge_txq *q)
				1014	{
				1015	int hw_cidx = ntohs(q->stat->cidx);
				1016	int reclaim = hw_cidx - q->cidx;
				1017
				1018	if (reclaim < 0)
				1019	reclaim += q->size;
				1020
				1021	q->in_use -= reclaim;
				1022	q->cidx = hw_cidx;
				1023	}
				1024
				1025	/**
				1026	* is_imm - check whether a packet can be sent as immediate data
				1027	* @skb: the packet
				1028	*
				1029	* Returns true if a packet can be sent as a WR with immediate data.
				1030	*/
				1031	static inline int is_imm(const struct sk_buff *skb)
				1032	{
				1033	return skb->len <= MAX_CTRL_WR_LEN;
				1034	}
				1035
				1036	/**
				1037	* ctrlq_check_stop - check if a control queue is full and should stop
				1038	* @q: the queue
				1039	* @wr: most recent WR written to the queue
				1040	*
				1041	* Check if a control queue has become full and should be stopped.
				1042	* We clean up control queue descriptors very lazily, only when we are out.
				1043	* If the queue is still full after reclaiming any completed descriptors
				1044	* we suspend it and have the last WR wake it up.
				1045	*/
				1046	static void ctrlq_check_stop(struct sge_ctrl_txq q, struct fw_wr_hdr wr)
				1047	{
				1048	reclaim_completed_tx_imm(&q->q);
				1049	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
				1050	wr->lo \|= htonl(FW_WR_EQUEQ \| FW_WR_EQUIQ);
				1051	q->q.stops++;
				1052	q->full = 1;
				1053	}
				1054	}
				1055
				1056	/**
				1057	* ctrl_xmit - send a packet through an SGE control Tx queue
				1058	* @q: the control queue
				1059	* @skb: the packet
				1060	*
				1061	* Send a packet through an SGE control Tx queue. Packets sent through
				1062	* a control queue must fit entirely as immediate data.
				1063	*/
				1064	static int ctrl_xmit(struct sge_ctrl_txq q, struct sk_buff skb)
				1065	{
				1066	unsigned int ndesc;
				1067	struct fw_wr_hdr *wr;
				1068
				1069	if (unlikely(!is_imm(skb))) {
				1070	WARN_ON(1);
				1071	dev_kfree_skb(skb);
				1072	return NET_XMIT_DROP;
				1073	}
				1074
				1075	ndesc = DIV_ROUND_UP(skb->len, sizeof(struct tx_desc));
				1076	spin_lock(&q->sendq.lock);
				1077
				1078	if (unlikely(q->full)) {
				1079	skb->priority = ndesc; /* save for restart */
				1080	__skb_queue_tail(&q->sendq, skb);
				1081	spin_unlock(&q->sendq.lock);
				1082	return NET_XMIT_CN;
				1083	}
				1084
				1085	wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
				1086	inline_tx_skb(skb, &q->q, wr);
				1087
				1088	txq_advance(&q->q, ndesc);
				1089	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES))
				1090	ctrlq_check_stop(q, wr);
				1091
				1092	ring_tx_db(q->adap, &q->q, ndesc);
				1093	spin_unlock(&q->sendq.lock);
				1094
				1095	kfree_skb(skb);
				1096	return NET_XMIT_SUCCESS;
				1097	}
				1098
				1099	/**
				1100	* restart_ctrlq - restart a suspended control queue
				1101	* @data: the control queue to restart
				1102	*
				1103	* Resumes transmission on a suspended Tx control queue.
				1104	*/
				1105	static void restart_ctrlq(unsigned long data)
				1106	{
				1107	struct sk_buff *skb;
				1108	unsigned int written = 0;
				1109	struct sge_ctrl_txq q = (struct sge_ctrl_txq )data;
				1110
				1111	spin_lock(&q->sendq.lock);
				1112	reclaim_completed_tx_imm(&q->q);
				1113	BUG_ON(txq_avail(&q->q) < TXQ_STOP_THRES); /* q should be empty */
				1114
				1115	while ((skb = __skb_dequeue(&q->sendq)) != NULL) {
				1116	struct fw_wr_hdr *wr;
				1117	unsigned int ndesc = skb->priority; /* previously saved */
				1118
				1119	/*
				1120	* Write descriptors and free skbs outside the lock to limit
				1121	* wait times. q->full is still set so new skbs will be queued.
				1122	*/
				1123	spin_unlock(&q->sendq.lock);
				1124
				1125	wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
				1126	inline_tx_skb(skb, &q->q, wr);
				1127	kfree_skb(skb);
				1128
				1129	written += ndesc;
				1130	txq_advance(&q->q, ndesc);
				1131	if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
				1132	unsigned long old = q->q.stops;
				1133
				1134	ctrlq_check_stop(q, wr);
				1135	if (q->q.stops != old) { /* suspended anew */
				1136	spin_lock(&q->sendq.lock);
				1137	goto ringdb;
				1138	}
				1139	}
				1140	if (written > 16) {
				1141	ring_tx_db(q->adap, &q->q, written);
				1142	written = 0;
				1143	}
				1144	spin_lock(&q->sendq.lock);
				1145	}
				1146	q->full = 0;
				1147	ringdb: if (written)
				1148	ring_tx_db(q->adap, &q->q, written);
				1149	spin_unlock(&q->sendq.lock);
				1150	}
				1151
				1152	/**
				1153	* t4_mgmt_tx - send a management message
				1154	* @adap: the adapter
				1155	* @skb: the packet containing the management message
				1156	*
				1157	* Send a management message through control queue 0.
				1158	*/
				1159	int t4_mgmt_tx(struct adapter adap, struct sk_buff skb)
				1160	{
				1161	int ret;
				1162
				1163	local_bh_disable();
				1164	ret = ctrl_xmit(&adap->sge.ctrlq[0], skb);
				1165	local_bh_enable();
				1166	return ret;
				1167	}
				1168
				1169	/**
				1170	* is_ofld_imm - check whether a packet can be sent as immediate data
				1171	* @skb: the packet
				1172	*
				1173	* Returns true if a packet can be sent as an offload WR with immediate
				1174	* data. We currently use the same limit as for Ethernet packets.
				1175	*/
				1176	static inline int is_ofld_imm(const struct sk_buff *skb)
				1177	{
				1178	return skb->len <= MAX_IMM_TX_PKT_LEN;
				1179	}
				1180
				1181	/**
				1182	* calc_tx_flits_ofld - calculate # of flits for an offload packet
				1183	* @skb: the packet
				1184	*
				1185	* Returns the number of flits needed for the given offload packet.
				1186	* These packets are already fully constructed and no additional headers
				1187	* will be added.
				1188	*/
				1189	static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
				1190	{
				1191	unsigned int flits, cnt;
				1192
				1193	if (is_ofld_imm(skb))
				1194	return DIV_ROUND_UP(skb->len, 8);
				1195
				1196	flits = skb_transport_offset(skb) / 8U; /* headers */
				1197	cnt = skb_shinfo(skb)->nr_frags;
				1198	if (skb->tail != skb->transport_header)
				1199	cnt++;
				1200	return flits + sgl_len(cnt);
				1201	}
				1202
				1203	/**
				1204	* txq_stop_maperr - stop a Tx queue due to I/O MMU exhaustion
				1205	* @adap: the adapter
				1206	* @q: the queue to stop
				1207	*
				1208	* Mark a Tx queue stopped due to I/O MMU exhaustion and resulting
				1209	* inability to map packets. A periodic timer attempts to restart
				1210	* queues so marked.
				1211	*/
				1212	static void txq_stop_maperr(struct sge_ofld_txq *q)
				1213	{
				1214	q->mapping_err++;
				1215	q->q.stops++;
				1216	set_bit(q->q.cntxt_id, q->adap->sge.txq_maperr);
				1217	}
				1218
				1219	/**
				1220	* ofldtxq_stop - stop an offload Tx queue that has become full
				1221	* @q: the queue to stop
				1222	* @skb: the packet causing the queue to become full
				1223	*
				1224	* Stops an offload Tx queue that has become full and modifies the packet
				1225	* being written to request a wakeup.
				1226	*/
				1227	static void ofldtxq_stop(struct sge_ofld_txq q, struct sk_buff skb)
				1228	{
				1229	struct fw_wr_hdr wr = (struct fw_wr_hdr )skb->data;
				1230
				1231	wr->lo \|= htonl(FW_WR_EQUEQ \| FW_WR_EQUIQ);
				1232	q->q.stops++;
				1233	q->full = 1;
				1234	}
				1235
				1236	/**
				1237	* service_ofldq - restart a suspended offload queue
				1238	* @q: the offload queue
				1239	*
				1240	* Services an offload Tx queue by moving packets from its packet queue
				1241	* to the HW Tx ring. The function starts and ends with the queue locked.
				1242	*/
				1243	static void service_ofldq(struct sge_ofld_txq *q)
				1244	{
				1245	u64 *pos;
				1246	int credits;
				1247	struct sk_buff *skb;
				1248	unsigned int written = 0;
				1249	unsigned int flits, ndesc;
				1250
				1251	while ((skb = skb_peek(&q->sendq)) != NULL && !q->full) {
				1252	/*
				1253	* We drop the lock but leave skb on sendq, thus retaining
				1254	* exclusive access to the state of the queue.
				1255	*/
				1256	spin_unlock(&q->sendq.lock);
				1257
				1258	reclaim_completed_tx(q->adap, &q->q, false);
				1259
				1260	flits = skb->priority; /* previously saved */
				1261	ndesc = flits_to_desc(flits);
				1262	credits = txq_avail(&q->q) - ndesc;
				1263	BUG_ON(credits < 0);
				1264	if (unlikely(credits < TXQ_STOP_THRES))
				1265	ofldtxq_stop(q, skb);
				1266
				1267	pos = (u64 *)&q->q.desc[q->q.pidx];
				1268	if (is_ofld_imm(skb))
				1269	inline_tx_skb(skb, &q->q, pos);
				1270	else if (map_skb(q->adap->pdev_dev, skb,
				1271	(dma_addr_t *)skb->head)) {
				1272	txq_stop_maperr(q);
				1273	spin_lock(&q->sendq.lock);
				1274	break;
				1275	} else {
				1276	int last_desc, hdr_len = skb_transport_offset(skb);
				1277
				1278	memcpy(pos, skb->data, hdr_len);
				1279	write_sgl(skb, &q->q, (void *)pos + hdr_len,
				1280	pos + flits, hdr_len,
				1281	(dma_addr_t *)skb->head);
				1282	#ifdef CONFIG_NEED_DMA_MAP_STATE
				1283	skb->dev = q->adap->port[0];
				1284	skb->destructor = deferred_unmap_destructor;
				1285	#endif
				1286	last_desc = q->q.pidx + ndesc - 1;
				1287	if (last_desc >= q->q.size)
				1288	last_desc -= q->q.size;
				1289	q->q.sdesc[last_desc].skb = skb;
				1290	}
				1291
				1292	txq_advance(&q->q, ndesc);
				1293	written += ndesc;
				1294	if (unlikely(written > 32)) {
				1295	ring_tx_db(q->adap, &q->q, written);
				1296	written = 0;
				1297	}
				1298
				1299	spin_lock(&q->sendq.lock);
				1300	__skb_unlink(skb, &q->sendq);
				1301	if (is_ofld_imm(skb))
				1302	kfree_skb(skb);
				1303	}
				1304	if (likely(written))
				1305	ring_tx_db(q->adap, &q->q, written);
				1306	}
				1307
				1308	/**
				1309	* ofld_xmit - send a packet through an offload queue
				1310	* @q: the Tx offload queue
				1311	* @skb: the packet
				1312	*
				1313	* Send an offload packet through an SGE offload queue.
				1314	*/
				1315	static int ofld_xmit(struct sge_ofld_txq q, struct sk_buff skb)
				1316	{
				1317	skb->priority = calc_tx_flits_ofld(skb); /* save for restart */
				1318	spin_lock(&q->sendq.lock);
				1319	__skb_queue_tail(&q->sendq, skb);
				1320	if (q->sendq.qlen == 1)
				1321	service_ofldq(q);
				1322	spin_unlock(&q->sendq.lock);
				1323	return NET_XMIT_SUCCESS;
				1324	}
				1325
				1326	/**
				1327	* restart_ofldq - restart a suspended offload queue
				1328	* @data: the offload queue to restart
				1329	*
				1330	* Resumes transmission on a suspended Tx offload queue.
				1331	*/
				1332	static void restart_ofldq(unsigned long data)
				1333	{
				1334	struct sge_ofld_txq q = (struct sge_ofld_txq )data;
				1335
				1336	spin_lock(&q->sendq.lock);
				1337	q->full = 0; /* the queue actually is completely empty now */
				1338	service_ofldq(q);
				1339	spin_unlock(&q->sendq.lock);
				1340	}
				1341
				1342	/**
				1343	* skb_txq - return the Tx queue an offload packet should use
				1344	* @skb: the packet
				1345	*
				1346	* Returns the Tx queue an offload packet should use as indicated by bits
				1347	* 1-15 in the packet's queue_mapping.
				1348	*/
				1349	static inline unsigned int skb_txq(const struct sk_buff *skb)
				1350	{
				1351	return skb->queue_mapping >> 1;
				1352	}
				1353
				1354	/**
				1355	* is_ctrl_pkt - return whether an offload packet is a control packet
				1356	* @skb: the packet
				1357	*
				1358	* Returns whether an offload packet should use an OFLD or a CTRL
				1359	* Tx queue as indicated by bit 0 in the packet's queue_mapping.
				1360	*/
				1361	static inline unsigned int is_ctrl_pkt(const struct sk_buff *skb)
				1362	{
				1363	return skb->queue_mapping & 1;
				1364	}
				1365
				1366	static inline int ofld_send(struct adapter adap, struct sk_buff skb)
				1367	{
				1368	unsigned int idx = skb_txq(skb);
				1369
				1370	if (unlikely(is_ctrl_pkt(skb)))
				1371	return ctrl_xmit(&adap->sge.ctrlq[idx], skb);
				1372	return ofld_xmit(&adap->sge.ofldtxq[idx], skb);
				1373	}
				1374
				1375	/**
				1376	* t4_ofld_send - send an offload packet
				1377	* @adap: the adapter
				1378	* @skb: the packet
				1379	*
				1380	* Sends an offload packet. We use the packet queue_mapping to select the
				1381	* appropriate Tx queue as follows: bit 0 indicates whether the packet
				1382	* should be sent as regular or control, bits 1-15 select the queue.
				1383	*/
				1384	int t4_ofld_send(struct adapter adap, struct sk_buff skb)
				1385	{
				1386	int ret;
				1387
				1388	local_bh_disable();
				1389	ret = ofld_send(adap, skb);
				1390	local_bh_enable();
				1391	return ret;
				1392	}
				1393
				1394	/**
				1395	* cxgb4_ofld_send - send an offload packet
				1396	* @dev: the net device
				1397	* @skb: the packet
				1398	*
				1399	* Sends an offload packet. This is an exported version of @t4_ofld_send,
				1400	* intended for ULDs.
				1401	*/
				1402	int cxgb4_ofld_send(struct net_device dev, struct sk_buff skb)
				1403	{
				1404	return t4_ofld_send(netdev2adap(dev), skb);
				1405	}
				1406	EXPORT_SYMBOL(cxgb4_ofld_send);
				1407
				1408	static inline void copy_frags(struct skb_shared_info *ssi,
				1409	const struct pkt_gl *gl, unsigned int offset)
				1410	{
				1411	unsigned int n;
				1412
				1413	/* usually there's just one frag */
				1414	ssi->frags[0].page = gl->frags[0].page;
				1415	ssi->frags[0].page_offset = gl->frags[0].page_offset + offset;
				1416	ssi->frags[0].size = gl->frags[0].size - offset;
				1417	ssi->nr_frags = gl->nfrags;
				1418	n = gl->nfrags - 1;
				1419	if (n)
				1420	memcpy(&ssi->frags[1], &gl->frags[1], n * sizeof(skb_frag_t));
				1421
				1422	/* get a reference to the last page, we don't own it */
				1423	get_page(gl->frags[n].page);
				1424	}
				1425
				1426	/**
				1427	* cxgb4_pktgl_to_skb - build an sk_buff from a packet gather list
				1428	* @gl: the gather list
				1429	* @skb_len: size of sk_buff main body if it carries fragments
				1430	* @pull_len: amount of data to move to the sk_buff's main body
				1431	*
				1432	* Builds an sk_buff from the given packet gather list. Returns the
				1433	* sk_buff or %NULL if sk_buff allocation failed.
				1434	*/
				1435	struct sk_buff cxgb4_pktgl_to_skb(const struct pkt_gl gl,
				1436	unsigned int skb_len, unsigned int pull_len)
				1437	{
				1438	struct sk_buff *skb;
				1439
				1440	/*
				1441	* Below we rely on RX_COPY_THRES being less than the smallest Rx buffer
				1442	* size, which is expected since buffers are at least PAGE_SIZEd.
				1443	* In this case packets up to RX_COPY_THRES have only one fragment.
				1444	*/
				1445	if (gl->tot_len <= RX_COPY_THRES) {
				1446	skb = dev_alloc_skb(gl->tot_len);
				1447	if (unlikely(!skb))
				1448	goto out;
				1449	__skb_put(skb, gl->tot_len);
				1450	skb_copy_to_linear_data(skb, gl->va, gl->tot_len);
				1451	} else {
				1452	skb = dev_alloc_skb(skb_len);
				1453	if (unlikely(!skb))
				1454	goto out;
				1455	__skb_put(skb, pull_len);
				1456	skb_copy_to_linear_data(skb, gl->va, pull_len);
				1457
				1458	copy_frags(skb_shinfo(skb), gl, pull_len);
				1459	skb->len = gl->tot_len;
				1460	skb->data_len = skb->len - pull_len;
				1461	skb->truesize += skb->data_len;
				1462	}
				1463	out: return skb;
				1464	}
				1465	EXPORT_SYMBOL(cxgb4_pktgl_to_skb);
				1466
				1467	/**
				1468	* t4_pktgl_free - free a packet gather list
				1469	* @gl: the gather list
				1470	*
				1471	* Releases the pages of a packet gather list. We do not own the last
				1472	* page on the list and do not free it.
				1473	*/
Roland Dreier	de498c8	2010-04-21 08:59:17 +0000	[diff] [blame]	1474	static void t4_pktgl_free(const struct pkt_gl *gl)
Dimitris Michailidis	fd3a479	2010-04-01 15:28:24 +0000	[diff] [blame]	1475	{
				1476	int n;
				1477	const skb_frag_t *p;
				1478
				1479	for (p = gl->frags, n = gl->nfrags - 1; n--; p++)
				1480	put_page(p->page);
				1481	}
				1482
				1483	/*
				1484	* Process an MPS trace packet. Give it an unused protocol number so it won't
				1485	* be delivered to anyone and send it to the stack for capture.
				1486	*/
				1487	static noinline int handle_trace_pkt(struct adapter *adap,
				1488	const struct pkt_gl *gl)
				1489	{
				1490	struct sk_buff *skb;
				1491	struct cpl_trace_pkt *p;
				1492
				1493	skb = cxgb4_pktgl_to_skb(gl, RX_PULL_LEN, RX_PULL_LEN);
				1494	if (unlikely(!skb)) {
				1495	t4_pktgl_free(gl);
				1496	return 0;
				1497	}
				1498
				1499	p = (struct cpl_trace_pkt *)skb->data;
				1500	__skb_pull(skb, sizeof(*p));
				1501	skb_reset_mac_header(skb);
				1502	skb->protocol = htons(0xffff);
				1503	skb->dev = adap->port[0];
				1504	netif_receive_skb(skb);
				1505	return 0;
				1506	}
				1507
				1508	static void do_gro(struct sge_eth_rxq rxq, const struct pkt_gl gl,
				1509	const struct cpl_rx_pkt *pkt)
				1510	{
				1511	int ret;
				1512	struct sk_buff *skb;
				1513
				1514	skb = napi_get_frags(&rxq->rspq.napi);
				1515	if (unlikely(!skb)) {
				1516	t4_pktgl_free(gl);
				1517	rxq->stats.rx_drops++;
				1518	return;
				1519	}
				1520
				1521	copy_frags(skb_shinfo(skb), gl, RX_PKT_PAD);
				1522	skb->len = gl->tot_len - RX_PKT_PAD;
				1523	skb->data_len = skb->len;
				1524	skb->truesize += skb->data_len;
				1525	skb->ip_summed = CHECKSUM_UNNECESSARY;
				1526	skb_record_rx_queue(skb, rxq->rspq.idx);
Dimitris Michailidis	87b6cf5	2010-04-27 16:22:42 -0700	[diff] [blame]	1527	if (rxq->rspq.netdev->features & NETIF_F_RXHASH)
				1528	skb->rxhash = (__force u32)pkt->rsshdr.hash_val;
Dimitris Michailidis	fd3a479	2010-04-01 15:28:24 +0000	[diff] [blame]	1529
				1530	if (unlikely(pkt->vlan_ex)) {
				1531	struct port_info *pi = netdev_priv(rxq->rspq.netdev);
				1532	struct vlan_group *grp = pi->vlan_grp;
				1533
				1534	rxq->stats.vlan_ex++;
				1535	if (likely(grp)) {
				1536	ret = vlan_gro_frags(&rxq->rspq.napi, grp,
				1537	ntohs(pkt->vlan));
				1538	goto stats;
				1539	}
				1540	}
				1541	ret = napi_gro_frags(&rxq->rspq.napi);
				1542	stats: if (ret == GRO_HELD)
				1543	rxq->stats.lro_pkts++;
				1544	else if (ret == GRO_MERGED \|\| ret == GRO_MERGED_FREE)
				1545	rxq->stats.lro_merged++;
				1546	rxq->stats.pkts++;
				1547	rxq->stats.rx_cso++;
				1548	}
				1549
				1550	/**
				1551	* t4_ethrx_handler - process an ingress ethernet packet
				1552	* @q: the response queue that received the packet
				1553	* @rsp: the response queue descriptor holding the RX_PKT message
				1554	* @si: the gather list of packet fragments
				1555	*
				1556	* Process an ingress ethernet packet and deliver it to the stack.
				1557	*/
				1558	int t4_ethrx_handler(struct sge_rspq q, const __be64 rsp,
				1559	const struct pkt_gl *si)
				1560	{
				1561	bool csum_ok;
				1562	struct sk_buff *skb;
				1563	struct port_info *pi;
				1564	const struct cpl_rx_pkt *pkt;
				1565	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
				1566
				1567	if (unlikely((u8 )rsp == CPL_TRACE_PKT))
				1568	return handle_trace_pkt(q->adap, si);
				1569
Dimitris Michailidis	87b6cf5	2010-04-27 16:22:42 -0700	[diff] [blame]	1570	pkt = (const struct cpl_rx_pkt *)rsp;
Dimitris Michailidis	fd3a479	2010-04-01 15:28:24 +0000	[diff] [blame]	1571	csum_ok = pkt->csum_calc && !pkt->err_vec;
				1572	if ((pkt->l2info & htonl(RXF_TCP)) &&
				1573	(q->netdev->features & NETIF_F_GRO) && csum_ok && !pkt->ip_frag) {
				1574	do_gro(rxq, si, pkt);
				1575	return 0;
				1576	}
				1577
				1578	skb = cxgb4_pktgl_to_skb(si, RX_PKT_SKB_LEN, RX_PULL_LEN);
				1579	if (unlikely(!skb)) {
				1580	t4_pktgl_free(si);
				1581	rxq->stats.rx_drops++;
				1582	return 0;
				1583	}
				1584
				1585	__skb_pull(skb, RX_PKT_PAD); /* remove ethernet header padding */
				1586	skb->protocol = eth_type_trans(skb, q->netdev);
				1587	skb_record_rx_queue(skb, q->idx);
Dimitris Michailidis	87b6cf5	2010-04-27 16:22:42 -0700	[diff] [blame]	1588	if (skb->dev->features & NETIF_F_RXHASH)
				1589	skb->rxhash = (__force u32)pkt->rsshdr.hash_val;
				1590
Dimitris Michailidis	fd3a479	2010-04-01 15:28:24 +0000	[diff] [blame]	1591	pi = netdev_priv(skb->dev);
				1592	rxq->stats.pkts++;
				1593
				1594	if (csum_ok && (pi->rx_offload & RX_CSO) &&
				1595	(pkt->l2info & htonl(RXF_UDP \| RXF_TCP))) {
				1596	if (!pkt->ip_frag)
				1597	skb->ip_summed = CHECKSUM_UNNECESSARY;
				1598	else {
				1599	__sum16 c = (__force __sum16)pkt->csum;
				1600	skb->csum = csum_unfold(c);
				1601	skb->ip_summed = CHECKSUM_COMPLETE;
				1602	}
				1603	rxq->stats.rx_cso++;
				1604	} else
				1605	skb->ip_summed = CHECKSUM_NONE;
				1606
				1607	if (unlikely(pkt->vlan_ex)) {
				1608	struct vlan_group *grp = pi->vlan_grp;
				1609
				1610	rxq->stats.vlan_ex++;
				1611	if (likely(grp))
				1612	vlan_hwaccel_receive_skb(skb, grp, ntohs(pkt->vlan));
				1613	else
				1614	dev_kfree_skb_any(skb);
				1615	} else
				1616	netif_receive_skb(skb);
				1617
				1618	return 0;
				1619	}
				1620
				1621	/**
				1622	* restore_rx_bufs - put back a packet's Rx buffers
				1623	* @si: the packet gather list
				1624	* @q: the SGE free list
				1625	* @frags: number of FL buffers to restore
				1626	*
				1627	* Puts back on an FL the Rx buffers associated with @si. The buffers
				1628	* have already been unmapped and are left unmapped, we mark them so to
				1629	* prevent further unmapping attempts.
				1630	*
				1631	* This function undoes a series of @unmap_rx_buf calls when we find out
				1632	* that the current packet can't be processed right away afterall and we
				1633	* need to come back to it later. This is a very rare event and there's
				1634	* no effort to make this particularly efficient.
				1635	*/
				1636	static void restore_rx_bufs(const struct pkt_gl si, struct sge_fl q,
				1637	int frags)
				1638	{
				1639	struct rx_sw_desc *d;
				1640
				1641	while (frags--) {
				1642	if (q->cidx == 0)
				1643	q->cidx = q->size - 1;
				1644	else
				1645	q->cidx--;
				1646	d = &q->sdesc[q->cidx];
				1647	d->page = si->frags[frags].page;
				1648	d->dma_addr \|= RX_UNMAPPED_BUF;
				1649	q->avail++;
				1650	}
				1651	}
				1652
				1653	/**
				1654	* is_new_response - check if a response is newly written
				1655	* @r: the response descriptor
				1656	* @q: the response queue
				1657	*
				1658	* Returns true if a response descriptor contains a yet unprocessed
				1659	* response.
				1660	*/
				1661	static inline bool is_new_response(const struct rsp_ctrl *r,
				1662	const struct sge_rspq *q)
				1663	{
				1664	return RSPD_GEN(r->type_gen) == q->gen;
				1665	}
				1666
				1667	/**
				1668	* rspq_next - advance to the next entry in a response queue
				1669	* @q: the queue
				1670	*
				1671	* Updates the state of a response queue to advance it to the next entry.
				1672	*/
				1673	static inline void rspq_next(struct sge_rspq *q)
				1674	{
				1675	q->cur_desc = (void *)q->cur_desc + q->iqe_len;
				1676	if (unlikely(++q->cidx == q->size)) {
				1677	q->cidx = 0;
				1678	q->gen ^= 1;
				1679	q->cur_desc = q->desc;
				1680	}
				1681	}
				1682
				1683	/**
				1684	* process_responses - process responses from an SGE response queue
				1685	* @q: the ingress queue to process
				1686	* @budget: how many responses can be processed in this round
				1687	*
				1688	* Process responses from an SGE response queue up to the supplied budget.
				1689	* Responses include received packets as well as control messages from FW
				1690	* or HW.
				1691	*
				1692	* Additionally choose the interrupt holdoff time for the next interrupt
				1693	* on this queue. If the system is under memory shortage use a fairly
				1694	* long delay to help recovery.
				1695	*/
				1696	static int process_responses(struct sge_rspq *q, int budget)
				1697	{
				1698	int ret, rsp_type;
				1699	int budget_left = budget;
				1700	const struct rsp_ctrl *rc;
				1701	struct sge_eth_rxq *rxq = container_of(q, struct sge_eth_rxq, rspq);
				1702
				1703	while (likely(budget_left)) {
				1704	rc = (void )q->cur_desc + (q->iqe_len - sizeof(rc));
				1705	if (!is_new_response(rc, q))
				1706	break;
				1707
				1708	rmb();
				1709	rsp_type = RSPD_TYPE(rc->type_gen);
				1710	if (likely(rsp_type == RSP_TYPE_FLBUF)) {
				1711	skb_frag_t *fp;
				1712	struct pkt_gl si;
				1713	const struct rx_sw_desc *rsd;
				1714	u32 len = ntohl(rc->pldbuflen_qid), bufsz, frags;
				1715
				1716	if (len & RSPD_NEWBUF) {
				1717	if (likely(q->offset > 0)) {
				1718	free_rx_bufs(q->adap, &rxq->fl, 1);
				1719	q->offset = 0;
				1720	}
				1721	len &= RSPD_LEN;
				1722	}
				1723	si.tot_len = len;
				1724
				1725	/* gather packet fragments */
				1726	for (frags = 0, fp = si.frags; ; frags++, fp++) {
				1727	rsd = &rxq->fl.sdesc[rxq->fl.cidx];
				1728	bufsz = get_buf_size(rsd);
				1729	fp->page = rsd->page;
				1730	fp->page_offset = q->offset;
				1731	fp->size = min(bufsz, len);
				1732	len -= fp->size;
				1733	if (!len)
				1734	break;
				1735	unmap_rx_buf(q->adap, &rxq->fl);
				1736	}
				1737
				1738	/*
				1739	* Last buffer remains mapped so explicitly make it
				1740	* coherent for CPU access.
				1741	*/
				1742	dma_sync_single_for_cpu(q->adap->pdev_dev,
				1743	get_buf_addr(rsd),
				1744	fp->size, DMA_FROM_DEVICE);
				1745
				1746	si.va = page_address(si.frags[0].page) +
				1747	si.frags[0].page_offset;
				1748	prefetch(si.va);
				1749
				1750	si.nfrags = frags + 1;
				1751	ret = q->handler(q, q->cur_desc, &si);
				1752	if (likely(ret == 0))
				1753	q->offset += ALIGN(fp->size, FL_ALIGN);
				1754	else
				1755	restore_rx_bufs(&si, &rxq->fl, frags);
				1756	} else if (likely(rsp_type == RSP_TYPE_CPL)) {
				1757	ret = q->handler(q, q->cur_desc, NULL);
				1758	} else {
				1759	ret = q->handler(q, (const __be64 *)rc, CXGB4_MSG_AN);
				1760	}
				1761
				1762	if (unlikely(ret)) {
				1763	/* couldn't process descriptor, back off for recovery */
				1764	q->next_intr_params = QINTR_TIMER_IDX(NOMEM_TMR_IDX);
				1765	break;
				1766	}
				1767
				1768	rspq_next(q);
				1769	budget_left--;
				1770	}
				1771
				1772	if (q->offset >= 0 && rxq->fl.size - rxq->fl.avail >= 16)
				1773	__refill_fl(q->adap, &rxq->fl);
				1774	return budget - budget_left;
				1775	}
				1776
				1777	/**
				1778	* napi_rx_handler - the NAPI handler for Rx processing
				1779	* @napi: the napi instance
				1780	* @budget: how many packets we can process in this round
				1781	*
				1782	* Handler for new data events when using NAPI. This does not need any
				1783	* locking or protection from interrupts as data interrupts are off at
				1784	* this point and other adapter interrupts do not interfere (the latter
				1785	* in not a concern at all with MSI-X as non-data interrupts then have
				1786	* a separate handler).
				1787	*/
				1788	static int napi_rx_handler(struct napi_struct *napi, int budget)
				1789	{
				1790	unsigned int params;
				1791	struct sge_rspq *q = container_of(napi, struct sge_rspq, napi);
				1792	int work_done = process_responses(q, budget);
				1793
				1794	if (likely(work_done < budget)) {
				1795	napi_complete(napi);
				1796	params = q->next_intr_params;
				1797	q->next_intr_params = q->intr_params;
				1798	} else
				1799	params = QINTR_TIMER_IDX(7);
				1800
				1801	t4_write_reg(q->adap, MYPF_REG(SGE_PF_GTS), CIDXINC(work_done) \|
				1802	INGRESSQID((u32)q->cntxt_id) \| SEINTARM(params));
				1803	return work_done;
				1804	}
				1805
				1806	/*
				1807	* The MSI-X interrupt handler for an SGE response queue.
				1808	*/
				1809	irqreturn_t t4_sge_intr_msix(int irq, void *cookie)
				1810	{
				1811	struct sge_rspq *q = cookie;
				1812
				1813	napi_schedule(&q->napi);
				1814	return IRQ_HANDLED;
				1815	}
				1816
				1817	/*
				1818	* Process the indirect interrupt entries in the interrupt queue and kick off
				1819	* NAPI for each queue that has generated an entry.
				1820	*/
				1821	static unsigned int process_intrq(struct adapter *adap)
				1822	{
				1823	unsigned int credits;
				1824	const struct rsp_ctrl *rc;
				1825	struct sge_rspq *q = &adap->sge.intrq;
				1826
				1827	spin_lock(&adap->sge.intrq_lock);
				1828	for (credits = 0; ; credits++) {
				1829	rc = (void )q->cur_desc + (q->iqe_len - sizeof(rc));
				1830	if (!is_new_response(rc, q))
				1831	break;
				1832
				1833	rmb();
				1834	if (RSPD_TYPE(rc->type_gen) == RSP_TYPE_INTR) {
				1835	unsigned int qid = ntohl(rc->pldbuflen_qid);
				1836
				1837	napi_schedule(&adap->sge.ingr_map[qid]->napi);
				1838	}
				1839
				1840	rspq_next(q);
				1841	}
				1842
				1843	t4_write_reg(adap, MYPF_REG(SGE_PF_GTS), CIDXINC(credits) \|
				1844	INGRESSQID(q->cntxt_id) \| SEINTARM(q->intr_params));
				1845	spin_unlock(&adap->sge.intrq_lock);
				1846	return credits;
				1847	}
				1848
				1849	/*
				1850	* The MSI interrupt handler, which handles data events from SGE response queues
				1851	* as well as error and other async events as they all use the same MSI vector.
				1852	*/
				1853	static irqreturn_t t4_intr_msi(int irq, void *cookie)
				1854	{
				1855	struct adapter *adap = cookie;
				1856
				1857	t4_slow_intr_handler(adap);
				1858	process_intrq(adap);
				1859	return IRQ_HANDLED;
				1860	}
				1861
				1862	/*
				1863	* Interrupt handler for legacy INTx interrupts.
				1864	* Handles data events from SGE response queues as well as error and other
				1865	* async events as they all use the same interrupt line.
				1866	*/
				1867	static irqreturn_t t4_intr_intx(int irq, void *cookie)
				1868	{
				1869	struct adapter *adap = cookie;
				1870
				1871	t4_write_reg(adap, MYPF_REG(PCIE_PF_CLI), 0);
				1872	if (t4_slow_intr_handler(adap) \| process_intrq(adap))
				1873	return IRQ_HANDLED;
				1874	return IRQ_NONE; /* probably shared interrupt */
				1875	}
				1876
				1877	/**
				1878	* t4_intr_handler - select the top-level interrupt handler
				1879	* @adap: the adapter
				1880	*
				1881	* Selects the top-level interrupt handler based on the type of interrupts
				1882	* (MSI-X, MSI, or INTx).
				1883	*/
				1884	irq_handler_t t4_intr_handler(struct adapter *adap)
				1885	{
				1886	if (adap->flags & USING_MSIX)
				1887	return t4_sge_intr_msix;
				1888	if (adap->flags & USING_MSI)
				1889	return t4_intr_msi;
				1890	return t4_intr_intx;
				1891	}
				1892
				1893	static void sge_rx_timer_cb(unsigned long data)
				1894	{
				1895	unsigned long m;
				1896	unsigned int i, cnt[2];
				1897	struct adapter adap = (struct adapter )data;
				1898	struct sge *s = &adap->sge;
				1899
				1900	for (i = 0; i < ARRAY_SIZE(s->starving_fl); i++)
				1901	for (m = s->starving_fl[i]; m; m &= m - 1) {
				1902	struct sge_eth_rxq *rxq;
				1903	unsigned int id = __ffs(m) + i * BITS_PER_LONG;
				1904	struct sge_fl *fl = s->egr_map[id];
				1905
				1906	clear_bit(id, s->starving_fl);
				1907	smp_mb__after_clear_bit();
				1908
				1909	if (fl_starving(fl)) {
				1910	rxq = container_of(fl, struct sge_eth_rxq, fl);
				1911	if (napi_reschedule(&rxq->rspq.napi))
				1912	fl->starving++;
				1913	else
				1914	set_bit(id, s->starving_fl);
				1915	}
				1916	}
				1917
				1918	t4_write_reg(adap, SGE_DEBUG_INDEX, 13);
				1919	cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH);
				1920	cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW);
				1921
				1922	for (i = 0; i < 2; i++)
				1923	if (cnt[i] >= s->starve_thres) {
				1924	if (s->idma_state[i] \|\| cnt[i] == 0xffffffff)
				1925	continue;
				1926	s->idma_state[i] = 1;
				1927	t4_write_reg(adap, SGE_DEBUG_INDEX, 11);
				1928	m = t4_read_reg(adap, SGE_DEBUG_DATA_LOW) >> (i * 16);
				1929	dev_warn(adap->pdev_dev,
				1930	"SGE idma%u starvation detected for "
				1931	"queue %lu\n", i, m & 0xffff);
				1932	} else if (s->idma_state[i])
				1933	s->idma_state[i] = 0;
				1934
				1935	mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
				1936	}
				1937
				1938	static void sge_tx_timer_cb(unsigned long data)
				1939	{
				1940	unsigned long m;
				1941	unsigned int i, budget;
				1942	struct adapter adap = (struct adapter )data;
				1943	struct sge *s = &adap->sge;
				1944
				1945	for (i = 0; i < ARRAY_SIZE(s->txq_maperr); i++)
				1946	for (m = s->txq_maperr[i]; m; m &= m - 1) {
				1947	unsigned long id = __ffs(m) + i * BITS_PER_LONG;
				1948	struct sge_ofld_txq *txq = s->egr_map[id];
				1949
				1950	clear_bit(id, s->txq_maperr);
				1951	tasklet_schedule(&txq->qresume_tsk);
				1952	}
				1953
				1954	budget = MAX_TIMER_TX_RECLAIM;
				1955	i = s->ethtxq_rover;
				1956	do {
				1957	struct sge_eth_txq *q = &s->ethtxq[i];
				1958
				1959	if (q->q.in_use &&
				1960	time_after_eq(jiffies, q->txq->trans_start + HZ / 100) &&
				1961	__netif_tx_trylock(q->txq)) {
				1962	int avail = reclaimable(&q->q);
				1963
				1964	if (avail) {
				1965	if (avail > budget)
				1966	avail = budget;
				1967
				1968	free_tx_desc(adap, &q->q, avail, true);
				1969	q->q.in_use -= avail;
				1970	budget -= avail;
				1971	}
				1972	__netif_tx_unlock(q->txq);
				1973	}
				1974
				1975	if (++i >= s->ethqsets)
				1976	i = 0;
				1977	} while (budget && i != s->ethtxq_rover);
				1978	s->ethtxq_rover = i;
				1979	mod_timer(&s->tx_timer, jiffies + (budget ? TX_QCHECK_PERIOD : 2));
				1980	}
				1981
				1982	int t4_sge_alloc_rxq(struct adapter adap, struct sge_rspq iq, bool fwevtq,
				1983	struct net_device *dev, int intr_idx,
				1984	struct sge_fl *fl, rspq_handler_t hnd)
				1985	{
				1986	int ret, flsz = 0;
				1987	struct fw_iq_cmd c;
				1988	struct port_info *pi = netdev_priv(dev);
				1989
				1990	/* Size needs to be multiple of 16, including status entry. */
				1991	iq->size = roundup(iq->size, 16);
				1992
				1993	iq->desc = alloc_ring(adap->pdev_dev, iq->size, iq->iqe_len, 0,
				1994	&iq->phys_addr, NULL, 0);
				1995	if (!iq->desc)
				1996	return -ENOMEM;
				1997
				1998	memset(&c, 0, sizeof(c));
				1999	c.op_to_vfn = htonl(FW_CMD_OP(FW_IQ_CMD) \| FW_CMD_REQUEST \|
				2000	FW_CMD_WRITE \| FW_CMD_EXEC \|
				2001	FW_IQ_CMD_PFN(0) \| FW_IQ_CMD_VFN(0));
				2002	c.alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC \| FW_IQ_CMD_IQSTART(1) \|
				2003	FW_LEN16(c));
				2004	c.type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) \|
				2005	FW_IQ_CMD_IQASYNCH(fwevtq) \| FW_IQ_CMD_VIID(pi->viid) \|
				2006	FW_IQ_CMD_IQANDST(intr_idx < 0) \| FW_IQ_CMD_IQANUD(1) \|
				2007	FW_IQ_CMD_IQANDSTINDEX(intr_idx >= 0 ? intr_idx :
				2008	-intr_idx - 1));
				2009	c.iqdroprss_to_iqesize = htons(FW_IQ_CMD_IQPCIECH(pi->tx_chan) \|
				2010	FW_IQ_CMD_IQGTSMODE \|
				2011	FW_IQ_CMD_IQINTCNTTHRESH(iq->pktcnt_idx) \|
				2012	FW_IQ_CMD_IQESIZE(ilog2(iq->iqe_len) - 4));
				2013	c.iqsize = htons(iq->size);
				2014	c.iqaddr = cpu_to_be64(iq->phys_addr);
				2015
				2016	if (fl) {
				2017	fl->size = roundup(fl->size, 8);
				2018	fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
				2019	sizeof(struct rx_sw_desc), &fl->addr,
				2020	&fl->sdesc, STAT_LEN);
				2021	if (!fl->desc)
				2022	goto fl_nomem;
				2023
				2024	flsz = fl->size / 8 + STAT_LEN / sizeof(struct tx_desc);
				2025	c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN \|
				2026	FW_IQ_CMD_FL0PADEN);
				2027	c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN(2) \|
				2028	FW_IQ_CMD_FL0FBMAX(3));
				2029	c.fl0size = htons(flsz);
				2030	c.fl0addr = cpu_to_be64(fl->addr);
				2031	}
				2032
				2033	ret = t4_wr_mbox(adap, 0, &c, sizeof(c), &c);
				2034	if (ret)
				2035	goto err;
				2036
				2037	netif_napi_add(dev, &iq->napi, napi_rx_handler, 64);
				2038	iq->cur_desc = iq->desc;
				2039	iq->cidx = 0;
				2040	iq->gen = 1;
				2041	iq->next_intr_params = iq->intr_params;
				2042	iq->cntxt_id = ntohs(c.iqid);
				2043	iq->abs_id = ntohs(c.physiqid);
				2044	iq->size--; /* subtract status entry */
				2045	iq->adap = adap;
				2046	iq->netdev = dev;
				2047	iq->handler = hnd;
				2048
				2049	/* set offset to -1 to distinguish ingress queues without FL */
				2050	iq->offset = fl ? 0 : -1;
				2051
				2052	adap->sge.ingr_map[iq->cntxt_id] = iq;
				2053
				2054	if (fl) {
Roland Dreier	62718b3	2010-04-21 08:09:21 +0000	[diff] [blame]	2055	fl->cntxt_id = ntohs(c.fl0id);
Dimitris Michailidis	fd3a479	2010-04-01 15:28:24 +0000	[diff] [blame]	2056	fl->avail = fl->pend_cred = 0;
				2057	fl->pidx = fl->cidx = 0;
				2058	fl->alloc_failed = fl->large_alloc_failed = fl->starving = 0;
				2059	adap->sge.egr_map[fl->cntxt_id] = fl;
				2060	refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
				2061	}
				2062	return 0;
				2063
				2064	fl_nomem:
				2065	ret = -ENOMEM;
				2066	err:
				2067	if (iq->desc) {
				2068	dma_free_coherent(adap->pdev_dev, iq->size * iq->iqe_len,
				2069	iq->desc, iq->phys_addr);
				2070	iq->desc = NULL;
				2071	}
				2072	if (fl && fl->desc) {
				2073	kfree(fl->sdesc);
				2074	fl->sdesc = NULL;
				2075	dma_free_coherent(adap->pdev_dev, flsz * sizeof(struct tx_desc),
				2076	fl->desc, fl->addr);
				2077	fl->desc = NULL;
				2078	}
				2079	return ret;
				2080	}
				2081
				2082	static void init_txq(struct adapter adap, struct sge_txq q, unsigned int id)
				2083	{
				2084	q->in_use = 0;
				2085	q->cidx = q->pidx = 0;
				2086	q->stops = q->restarts = 0;
				2087	q->stat = (void *)&q->desc[q->size];
				2088	q->cntxt_id = id;
				2089	adap->sge.egr_map[id] = q;
				2090	}
				2091
				2092	int t4_sge_alloc_eth_txq(struct adapter adap, struct sge_eth_txq txq,
				2093	struct net_device dev, struct netdev_queue netdevq,
				2094	unsigned int iqid)
				2095	{
				2096	int ret, nentries;
				2097	struct fw_eq_eth_cmd c;
				2098	struct port_info *pi = netdev_priv(dev);
				2099
				2100	/* Add status entries */
				2101	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
				2102
				2103	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
				2104	sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
				2105	&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
				2106	if (!txq->q.desc)
				2107	return -ENOMEM;
				2108
				2109	memset(&c, 0, sizeof(c));
				2110	c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_ETH_CMD) \| FW_CMD_REQUEST \|
				2111	FW_CMD_WRITE \| FW_CMD_EXEC \|
				2112	FW_EQ_ETH_CMD_PFN(0) \| FW_EQ_ETH_CMD_VFN(0));
				2113	c.alloc_to_len16 = htonl(FW_EQ_ETH_CMD_ALLOC \|
				2114	FW_EQ_ETH_CMD_EQSTART \| FW_LEN16(c));
				2115	c.viid_pkd = htonl(FW_EQ_ETH_CMD_VIID(pi->viid));
				2116	c.fetchszm_to_iqid = htonl(FW_EQ_ETH_CMD_HOSTFCMODE(2) \|
				2117	FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) \|
				2118	FW_EQ_ETH_CMD_IQID(iqid));
				2119	c.dcaen_to_eqsize = htonl(FW_EQ_ETH_CMD_FBMIN(2) \|
				2120	FW_EQ_ETH_CMD_FBMAX(3) \|
				2121	FW_EQ_ETH_CMD_CIDXFTHRESH(5) \|
				2122	FW_EQ_ETH_CMD_EQSIZE(nentries));
				2123	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
				2124
				2125	ret = t4_wr_mbox(adap, 0, &c, sizeof(c), &c);
				2126	if (ret) {
				2127	kfree(txq->q.sdesc);
				2128	txq->q.sdesc = NULL;
				2129	dma_free_coherent(adap->pdev_dev,
				2130	nentries * sizeof(struct tx_desc),
				2131	txq->q.desc, txq->q.phys_addr);
				2132	txq->q.desc = NULL;
				2133	return ret;
				2134	}
				2135
				2136	init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_GET(ntohl(c.eqid_pkd)));
				2137	txq->txq = netdevq;
				2138	txq->tso = txq->tx_cso = txq->vlan_ins = 0;
				2139	txq->mapping_err = 0;
				2140	return 0;
				2141	}
				2142
				2143	int t4_sge_alloc_ctrl_txq(struct adapter adap, struct sge_ctrl_txq txq,
				2144	struct net_device *dev, unsigned int iqid,
				2145	unsigned int cmplqid)
				2146	{
				2147	int ret, nentries;
				2148	struct fw_eq_ctrl_cmd c;
				2149	struct port_info *pi = netdev_priv(dev);
				2150
				2151	/* Add status entries */
				2152	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
				2153
				2154	txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
				2155	sizeof(struct tx_desc), 0, &txq->q.phys_addr,
				2156	NULL, 0);
				2157	if (!txq->q.desc)
				2158	return -ENOMEM;
				2159
				2160	c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_CTRL_CMD) \| FW_CMD_REQUEST \|
				2161	FW_CMD_WRITE \| FW_CMD_EXEC \|
				2162	FW_EQ_CTRL_CMD_PFN(0) \| FW_EQ_CTRL_CMD_VFN(0));
				2163	c.alloc_to_len16 = htonl(FW_EQ_CTRL_CMD_ALLOC \|
				2164	FW_EQ_CTRL_CMD_EQSTART \| FW_LEN16(c));
				2165	c.cmpliqid_eqid = htonl(FW_EQ_CTRL_CMD_CMPLIQID(cmplqid));
				2166	c.physeqid_pkd = htonl(0);
				2167	c.fetchszm_to_iqid = htonl(FW_EQ_CTRL_CMD_HOSTFCMODE(2) \|
				2168	FW_EQ_CTRL_CMD_PCIECHN(pi->tx_chan) \|
				2169	FW_EQ_CTRL_CMD_IQID(iqid));
				2170	c.dcaen_to_eqsize = htonl(FW_EQ_CTRL_CMD_FBMIN(2) \|
				2171	FW_EQ_CTRL_CMD_FBMAX(3) \|
				2172	FW_EQ_CTRL_CMD_CIDXFTHRESH(5) \|
				2173	FW_EQ_CTRL_CMD_EQSIZE(nentries));
				2174	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
				2175
				2176	ret = t4_wr_mbox(adap, 0, &c, sizeof(c), &c);
				2177	if (ret) {
				2178	dma_free_coherent(adap->pdev_dev,
				2179	nentries * sizeof(struct tx_desc),
				2180	txq->q.desc, txq->q.phys_addr);
				2181	txq->q.desc = NULL;
				2182	return ret;
				2183	}
				2184
				2185	init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_GET(ntohl(c.cmpliqid_eqid)));
				2186	txq->adap = adap;
				2187	skb_queue_head_init(&txq->sendq);
				2188	tasklet_init(&txq->qresume_tsk, restart_ctrlq, (unsigned long)txq);
				2189	txq->full = 0;
				2190	return 0;
				2191	}
				2192
				2193	int t4_sge_alloc_ofld_txq(struct adapter adap, struct sge_ofld_txq txq,
				2194	struct net_device *dev, unsigned int iqid)
				2195	{
				2196	int ret, nentries;
				2197	struct fw_eq_ofld_cmd c;
				2198	struct port_info *pi = netdev_priv(dev);
				2199
				2200	/* Add status entries */
				2201	nentries = txq->q.size + STAT_LEN / sizeof(struct tx_desc);
				2202
				2203	txq->q.desc = alloc_ring(adap->pdev_dev, txq->q.size,
				2204	sizeof(struct tx_desc), sizeof(struct tx_sw_desc),
				2205	&txq->q.phys_addr, &txq->q.sdesc, STAT_LEN);
				2206	if (!txq->q.desc)
				2207	return -ENOMEM;
				2208
				2209	memset(&c, 0, sizeof(c));
				2210	c.op_to_vfn = htonl(FW_CMD_OP(FW_EQ_OFLD_CMD) \| FW_CMD_REQUEST \|
				2211	FW_CMD_WRITE \| FW_CMD_EXEC \|
				2212	FW_EQ_OFLD_CMD_PFN(0) \| FW_EQ_OFLD_CMD_VFN(0));
				2213	c.alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC \|
				2214	FW_EQ_OFLD_CMD_EQSTART \| FW_LEN16(c));
				2215	c.fetchszm_to_iqid = htonl(FW_EQ_OFLD_CMD_HOSTFCMODE(2) \|
				2216	FW_EQ_OFLD_CMD_PCIECHN(pi->tx_chan) \|
				2217	FW_EQ_OFLD_CMD_IQID(iqid));
				2218	c.dcaen_to_eqsize = htonl(FW_EQ_OFLD_CMD_FBMIN(2) \|
				2219	FW_EQ_OFLD_CMD_FBMAX(3) \|
				2220	FW_EQ_OFLD_CMD_CIDXFTHRESH(5) \|
				2221	FW_EQ_OFLD_CMD_EQSIZE(nentries));
				2222	c.eqaddr = cpu_to_be64(txq->q.phys_addr);
				2223
				2224	ret = t4_wr_mbox(adap, 0, &c, sizeof(c), &c);
				2225	if (ret) {
				2226	kfree(txq->q.sdesc);
				2227	txq->q.sdesc = NULL;
				2228	dma_free_coherent(adap->pdev_dev,
				2229	nentries * sizeof(struct tx_desc),
				2230	txq->q.desc, txq->q.phys_addr);
				2231	txq->q.desc = NULL;
				2232	return ret;
				2233	}
				2234
				2235	init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_GET(ntohl(c.eqid_pkd)));
				2236	txq->adap = adap;
				2237	skb_queue_head_init(&txq->sendq);
				2238	tasklet_init(&txq->qresume_tsk, restart_ofldq, (unsigned long)txq);
				2239	txq->full = 0;
				2240	txq->mapping_err = 0;
				2241	return 0;
				2242	}
				2243
				2244	static void free_txq(struct adapter adap, struct sge_txq q)
				2245	{
				2246	dma_free_coherent(adap->pdev_dev,
				2247	q->size * sizeof(struct tx_desc) + STAT_LEN,
				2248	q->desc, q->phys_addr);
				2249	q->cntxt_id = 0;
				2250	q->sdesc = NULL;
				2251	q->desc = NULL;
				2252	}
				2253
				2254	static void free_rspq_fl(struct adapter adap, struct sge_rspq rq,
				2255	struct sge_fl *fl)
				2256	{
				2257	unsigned int fl_id = fl ? fl->cntxt_id : 0xffff;
				2258
				2259	adap->sge.ingr_map[rq->cntxt_id] = NULL;
				2260	t4_iq_free(adap, 0, 0, 0, FW_IQ_TYPE_FL_INT_CAP, rq->cntxt_id, fl_id,
				2261	0xffff);
				2262	dma_free_coherent(adap->pdev_dev, (rq->size + 1) * rq->iqe_len,
				2263	rq->desc, rq->phys_addr);
				2264	netif_napi_del(&rq->napi);
				2265	rq->netdev = NULL;
				2266	rq->cntxt_id = rq->abs_id = 0;
				2267	rq->desc = NULL;
				2268
				2269	if (fl) {
				2270	free_rx_bufs(adap, fl, fl->avail);
				2271	dma_free_coherent(adap->pdev_dev, fl->size * 8 + STAT_LEN,
				2272	fl->desc, fl->addr);
				2273	kfree(fl->sdesc);
				2274	fl->sdesc = NULL;
				2275	fl->cntxt_id = 0;
				2276	fl->desc = NULL;
				2277	}
				2278	}
				2279
				2280	/**
				2281	* t4_free_sge_resources - free SGE resources
				2282	* @adap: the adapter
				2283	*
				2284	* Frees resources used by the SGE queue sets.
				2285	*/
				2286	void t4_free_sge_resources(struct adapter *adap)
				2287	{
				2288	int i;
				2289	struct sge_eth_rxq *eq = adap->sge.ethrxq;
				2290	struct sge_eth_txq *etq = adap->sge.ethtxq;
				2291	struct sge_ofld_rxq *oq = adap->sge.ofldrxq;
				2292
				2293	/* clean up Ethernet Tx/Rx queues */
				2294	for (i = 0; i < adap->sge.ethqsets; i++, eq++, etq++) {
				2295	if (eq->rspq.desc)
				2296	free_rspq_fl(adap, &eq->rspq, &eq->fl);
				2297	if (etq->q.desc) {
				2298	t4_eth_eq_free(adap, 0, 0, 0, etq->q.cntxt_id);
				2299	free_tx_desc(adap, &etq->q, etq->q.in_use, true);
				2300	kfree(etq->q.sdesc);
				2301	free_txq(adap, &etq->q);
				2302	}
				2303	}
				2304
				2305	/* clean up RDMA and iSCSI Rx queues */
				2306	for (i = 0; i < adap->sge.ofldqsets; i++, oq++) {
				2307	if (oq->rspq.desc)
				2308	free_rspq_fl(adap, &oq->rspq, &oq->fl);
				2309	}
				2310	for (i = 0, oq = adap->sge.rdmarxq; i < adap->sge.rdmaqs; i++, oq++) {
				2311	if (oq->rspq.desc)
				2312	free_rspq_fl(adap, &oq->rspq, &oq->fl);
				2313	}
				2314
				2315	/* clean up offload Tx queues */
				2316	for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {
				2317	struct sge_ofld_txq *q = &adap->sge.ofldtxq[i];
				2318
				2319	if (q->q.desc) {
				2320	tasklet_kill(&q->qresume_tsk);
				2321	t4_ofld_eq_free(adap, 0, 0, 0, q->q.cntxt_id);
				2322	free_tx_desc(adap, &q->q, q->q.in_use, false);
				2323	kfree(q->q.sdesc);
				2324	__skb_queue_purge(&q->sendq);
				2325	free_txq(adap, &q->q);
				2326	}
				2327	}
				2328
				2329	/* clean up control Tx queues */
				2330	for (i = 0; i < ARRAY_SIZE(adap->sge.ctrlq); i++) {
				2331	struct sge_ctrl_txq *cq = &adap->sge.ctrlq[i];
				2332
				2333	if (cq->q.desc) {
				2334	tasklet_kill(&cq->qresume_tsk);
				2335	t4_ctrl_eq_free(adap, 0, 0, 0, cq->q.cntxt_id);
				2336	__skb_queue_purge(&cq->sendq);
				2337	free_txq(adap, &cq->q);
				2338	}
				2339	}
				2340
				2341	if (adap->sge.fw_evtq.desc)
				2342	free_rspq_fl(adap, &adap->sge.fw_evtq, NULL);
				2343
				2344	if (adap->sge.intrq.desc)
				2345	free_rspq_fl(adap, &adap->sge.intrq, NULL);
				2346
				2347	/* clear the reverse egress queue map */
				2348	memset(adap->sge.egr_map, 0, sizeof(adap->sge.egr_map));
				2349	}
				2350
				2351	void t4_sge_start(struct adapter *adap)
				2352	{
				2353	adap->sge.ethtxq_rover = 0;
				2354	mod_timer(&adap->sge.rx_timer, jiffies + RX_QCHECK_PERIOD);
				2355	mod_timer(&adap->sge.tx_timer, jiffies + TX_QCHECK_PERIOD);
				2356	}
				2357
				2358	/**
				2359	* t4_sge_stop - disable SGE operation
				2360	* @adap: the adapter
				2361	*
				2362	* Stop tasklets and timers associated with the DMA engine. Note that
				2363	* this is effective only if measures have been taken to disable any HW
				2364	* events that may restart them.
				2365	*/
				2366	void t4_sge_stop(struct adapter *adap)
				2367	{
				2368	int i;
				2369	struct sge *s = &adap->sge;
				2370
				2371	if (in_interrupt()) /* actions below require waiting */
				2372	return;
				2373
				2374	if (s->rx_timer.function)
				2375	del_timer_sync(&s->rx_timer);
				2376	if (s->tx_timer.function)
				2377	del_timer_sync(&s->tx_timer);
				2378
				2379	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) {
				2380	struct sge_ofld_txq *q = &s->ofldtxq[i];
				2381
				2382	if (q->q.desc)
				2383	tasklet_kill(&q->qresume_tsk);
				2384	}
				2385	for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++) {
				2386	struct sge_ctrl_txq *cq = &s->ctrlq[i];
				2387
				2388	if (cq->q.desc)
				2389	tasklet_kill(&cq->qresume_tsk);
				2390	}
				2391	}
				2392
				2393	/**
				2394	* t4_sge_init - initialize SGE
				2395	* @adap: the adapter
				2396	*
				2397	* Performs SGE initialization needed every time after a chip reset.
				2398	* We do not initialize any of the queues here, instead the driver
				2399	* top-level must request them individually.
				2400	*/
				2401	void t4_sge_init(struct adapter *adap)
				2402	{
				2403	struct sge *s = &adap->sge;
				2404	unsigned int fl_align_log = ilog2(FL_ALIGN);
				2405
				2406	t4_set_reg_field(adap, SGE_CONTROL, PKTSHIFT_MASK \|
				2407	INGPADBOUNDARY_MASK \| EGRSTATUSPAGESIZE,
				2408	INGPADBOUNDARY(fl_align_log - 5) \| PKTSHIFT(2) \|
				2409	RXPKTCPLMODE \|
				2410	(STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0));
				2411	t4_set_reg_field(adap, SGE_HOST_PAGE_SIZE, HOSTPAGESIZEPF0_MASK,
				2412	HOSTPAGESIZEPF0(PAGE_SHIFT - 10));
				2413	t4_write_reg(adap, SGE_FL_BUFFER_SIZE0, PAGE_SIZE);
				2414	#if FL_PG_ORDER > 0
				2415	t4_write_reg(adap, SGE_FL_BUFFER_SIZE1, PAGE_SIZE << FL_PG_ORDER);
				2416	#endif
				2417	t4_write_reg(adap, SGE_INGRESS_RX_THRESHOLD,
				2418	THRESHOLD_0(s->counter_val[0]) \|
				2419	THRESHOLD_1(s->counter_val[1]) \|
				2420	THRESHOLD_2(s->counter_val[2]) \|
				2421	THRESHOLD_3(s->counter_val[3]));
				2422	t4_write_reg(adap, SGE_TIMER_VALUE_0_AND_1,
				2423	TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[0])) \|
				2424	TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[1])));
				2425	t4_write_reg(adap, SGE_TIMER_VALUE_2_AND_3,
				2426	TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[2])) \|
				2427	TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[3])));
				2428	t4_write_reg(adap, SGE_TIMER_VALUE_4_AND_5,
				2429	TIMERVALUE0(us_to_core_ticks(adap, s->timer_val[4])) \|
				2430	TIMERVALUE1(us_to_core_ticks(adap, s->timer_val[5])));
				2431	setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
				2432	setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
				2433	s->starve_thres = core_ticks_per_usec(adap) * 1000000; /* 1 s */
				2434	s->idma_state[0] = s->idma_state[1] = 0;
				2435	spin_lock_init(&s->intrq_lock);
				2436	}