blob: a482429846eb40e78141f54bff7168e2ddf91a4f [file] [log] [blame]
Divy Le Ray4d22de32007-01-18 22:04:14 -05001/*
Divy Le Raya02d44a2008-10-13 18:47:30 -07002 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
Divy Le Ray4d22de32007-01-18 22:04:14 -05003 *
Divy Le Ray1d68e932007-01-30 19:44:35 -08004 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
Divy Le Ray4d22de32007-01-18 22:04:14 -05009 *
Divy Le Ray1d68e932007-01-30 19:44:35 -080010 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
Divy Le Ray4d22de32007-01-18 22:04:14 -050031 */
Divy Le Ray4d22de32007-01-18 22:04:14 -050032#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
Karen Xiea109a5b2008-12-18 22:56:20 -080039#include <net/arp.h>
Divy Le Ray4d22de32007-01-18 22:04:14 -050040#include "common.h"
41#include "regs.h"
42#include "sge_defs.h"
43#include "t3_cpl.h"
44#include "firmware_exports.h"
45
46#define USE_GTS 0
47
48#define SGE_RX_SM_BUF_SIZE 1536
Divy Le Raye0994eb2007-02-24 16:44:17 -080049
Divy Le Ray4d22de32007-01-18 22:04:14 -050050#define SGE_RX_COPY_THRES 256
Divy Le Raycf992af2007-05-30 21:10:47 -070051#define SGE_RX_PULL_LEN 128
Divy Le Ray4d22de32007-01-18 22:04:14 -050052
Divy Le Raye0994eb2007-02-24 16:44:17 -080053/*
Divy Le Raycf992af2007-05-30 21:10:47 -070054 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
55 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
56 * directly.
Divy Le Raye0994eb2007-02-24 16:44:17 -080057 */
Divy Le Raycf992af2007-05-30 21:10:47 -070058#define FL0_PG_CHUNK_SIZE 2048
Divy Le Ray7385ecf2008-05-21 18:56:21 -070059#define FL0_PG_ORDER 0
60#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
61#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
Divy Le Raycf992af2007-05-30 21:10:47 -070062
Divy Le Raye0994eb2007-02-24 16:44:17 -080063#define SGE_RX_DROP_THRES 16
Divy Le Ray4d22de32007-01-18 22:04:14 -050064
65/*
Divy Le Ray26b38712009-03-12 21:13:43 +000066 * Max number of Rx buffers we replenish at a time.
67 */
68#define MAX_RX_REFILL 16U
69/*
Divy Le Ray4d22de32007-01-18 22:04:14 -050070 * Period of the Tx buffer reclaim timer. This timer does not need to run
71 * frequently as Tx buffers are usually reclaimed by new Tx packets.
72 */
73#define TX_RECLAIM_PERIOD (HZ / 4)
74
75/* WR size in bytes */
76#define WR_LEN (WR_FLITS * 8)
77
78/*
79 * Types of Tx queues in each queue set. Order here matters, do not change.
80 */
81enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
82
83/* Values for sge_txq.flags */
84enum {
85 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
86 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
87};
88
89struct tx_desc {
Al Virofb8e4442007-08-23 03:04:12 -040090 __be64 flit[TX_DESC_FLITS];
Divy Le Ray4d22de32007-01-18 22:04:14 -050091};
92
93struct rx_desc {
94 __be32 addr_lo;
95 __be32 len_gen;
96 __be32 gen2;
97 __be32 addr_hi;
98};
99
100struct tx_sw_desc { /* SW state per Tx descriptor */
101 struct sk_buff *skb;
Divy Le Ray23561c92007-11-16 11:22:05 -0800102 u8 eop; /* set if last descriptor for packet */
103 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
104 u8 fragidx; /* first page fragment associated with descriptor */
105 s8 sflit; /* start flit of first SGL entry in descriptor */
Divy Le Ray4d22de32007-01-18 22:04:14 -0500106};
107
Divy Le Raycf992af2007-05-30 21:10:47 -0700108struct rx_sw_desc { /* SW state per Rx descriptor */
Divy Le Raye0994eb2007-02-24 16:44:17 -0800109 union {
110 struct sk_buff *skb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700111 struct fl_pg_chunk pg_chunk;
112 };
113 DECLARE_PCI_UNMAP_ADDR(dma_addr);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500114};
115
116struct rsp_desc { /* response queue descriptor */
117 struct rss_header rss_hdr;
118 __be32 flags;
119 __be32 len_cq;
120 u8 imm_data[47];
121 u8 intr_gen;
122};
123
Divy Le Ray4d22de32007-01-18 22:04:14 -0500124/*
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800125 * Holds unmapping information for Tx packets that need deferred unmapping.
126 * This structure lives at skb->head and must be allocated by callers.
127 */
128struct deferred_unmap_info {
129 struct pci_dev *pdev;
130 dma_addr_t addr[MAX_SKB_FRAGS + 1];
131};
132
133/*
Divy Le Ray4d22de32007-01-18 22:04:14 -0500134 * Maps a number of flits to the number of Tx descriptors that can hold them.
135 * The formula is
136 *
137 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
138 *
139 * HW allows up to 4 descriptors to be combined into a WR.
140 */
141static u8 flit_desc_map[] = {
142 0,
143#if SGE_NUM_GENBITS == 1
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
148#elif SGE_NUM_GENBITS == 2
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
152 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
153#else
154# error "SGE_NUM_GENBITS must be 1 or 2"
155#endif
156};
157
158static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
159{
160 return container_of(q, struct sge_qset, fl[qidx]);
161}
162
163static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
164{
165 return container_of(q, struct sge_qset, rspq);
166}
167
168static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
169{
170 return container_of(q, struct sge_qset, txq[qidx]);
171}
172
173/**
174 * refill_rspq - replenish an SGE response queue
175 * @adapter: the adapter
176 * @q: the response queue to replenish
177 * @credits: how many new responses to make available
178 *
179 * Replenishes a response queue by making the supplied number of responses
180 * available to HW.
181 */
182static inline void refill_rspq(struct adapter *adapter,
183 const struct sge_rspq *q, unsigned int credits)
184{
Divy Le Rayafefce62007-11-16 11:22:21 -0800185 rmb();
Divy Le Ray4d22de32007-01-18 22:04:14 -0500186 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
187 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
188}
189
190/**
191 * need_skb_unmap - does the platform need unmapping of sk_buffs?
192 *
193 * Returns true if the platfrom needs sk_buff unmapping. The compiler
194 * optimizes away unecessary code if this returns true.
195 */
196static inline int need_skb_unmap(void)
197{
198 /*
199 * This structure is used to tell if the platfrom needs buffer
200 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
201 */
202 struct dummy {
203 DECLARE_PCI_UNMAP_ADDR(addr);
204 };
205
206 return sizeof(struct dummy) != 0;
207}
208
209/**
210 * unmap_skb - unmap a packet main body and its page fragments
211 * @skb: the packet
212 * @q: the Tx queue containing Tx descriptors for the packet
213 * @cidx: index of Tx descriptor
214 * @pdev: the PCI device
215 *
216 * Unmap the main body of an sk_buff and its page fragments, if any.
217 * Because of the fairly complicated structure of our SGLs and the desire
Divy Le Ray23561c92007-11-16 11:22:05 -0800218 * to conserve space for metadata, the information necessary to unmap an
219 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
220 * descriptors (the physical addresses of the various data buffers), and
221 * the SW descriptor state (assorted indices). The send functions
222 * initialize the indices for the first packet descriptor so we can unmap
223 * the buffers held in the first Tx descriptor here, and we have enough
224 * information at this point to set the state for the next Tx descriptor.
225 *
226 * Note that it is possible to clean up the first descriptor of a packet
227 * before the send routines have written the next descriptors, but this
228 * race does not cause any problem. We just end up writing the unmapping
229 * info for the descriptor first.
Divy Le Ray4d22de32007-01-18 22:04:14 -0500230 */
231static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
232 unsigned int cidx, struct pci_dev *pdev)
233{
234 const struct sg_ent *sgp;
Divy Le Ray23561c92007-11-16 11:22:05 -0800235 struct tx_sw_desc *d = &q->sdesc[cidx];
236 int nfrags, frag_idx, curflit, j = d->addr_idx;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500237
Divy Le Ray23561c92007-11-16 11:22:05 -0800238 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
239 frag_idx = d->fragidx;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500240
Divy Le Ray23561c92007-11-16 11:22:05 -0800241 if (frag_idx == 0 && skb_headlen(skb)) {
242 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
243 skb_headlen(skb), PCI_DMA_TODEVICE);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500244 j = 1;
245 }
246
Divy Le Ray23561c92007-11-16 11:22:05 -0800247 curflit = d->sflit + 1 + j;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500248 nfrags = skb_shinfo(skb)->nr_frags;
249
250 while (frag_idx < nfrags && curflit < WR_FLITS) {
251 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
252 skb_shinfo(skb)->frags[frag_idx].size,
253 PCI_DMA_TODEVICE);
254 j ^= 1;
255 if (j == 0) {
256 sgp++;
257 curflit++;
258 }
259 curflit++;
260 frag_idx++;
261 }
262
Divy Le Ray23561c92007-11-16 11:22:05 -0800263 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
264 d = cidx + 1 == q->size ? q->sdesc : d + 1;
265 d->fragidx = frag_idx;
266 d->addr_idx = j;
267 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
Divy Le Ray4d22de32007-01-18 22:04:14 -0500268 }
269}
270
271/**
272 * free_tx_desc - reclaims Tx descriptors and their buffers
273 * @adapter: the adapter
274 * @q: the Tx queue to reclaim descriptors from
275 * @n: the number of descriptors to reclaim
276 *
277 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
278 * Tx buffers. Called with the Tx queue lock held.
279 */
280static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
281 unsigned int n)
282{
283 struct tx_sw_desc *d;
284 struct pci_dev *pdev = adapter->pdev;
285 unsigned int cidx = q->cidx;
286
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800287 const int need_unmap = need_skb_unmap() &&
288 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
289
Divy Le Ray4d22de32007-01-18 22:04:14 -0500290 d = &q->sdesc[cidx];
291 while (n--) {
292 if (d->skb) { /* an SGL is present */
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800293 if (need_unmap)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500294 unmap_skb(d->skb, q, cidx, pdev);
Divy Le Ray23561c92007-11-16 11:22:05 -0800295 if (d->eop)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500296 kfree_skb(d->skb);
297 }
298 ++d;
299 if (++cidx == q->size) {
300 cidx = 0;
301 d = q->sdesc;
302 }
303 }
304 q->cidx = cidx;
305}
306
307/**
308 * reclaim_completed_tx - reclaims completed Tx descriptors
309 * @adapter: the adapter
310 * @q: the Tx queue to reclaim completed descriptors from
311 *
312 * Reclaims Tx descriptors that the SGE has indicated it has processed,
313 * and frees the associated buffers if possible. Called with the Tx
314 * queue's lock held.
315 */
316static inline void reclaim_completed_tx(struct adapter *adapter,
317 struct sge_txq *q)
318{
319 unsigned int reclaim = q->processed - q->cleaned;
320
321 if (reclaim) {
322 free_tx_desc(adapter, q, reclaim);
323 q->cleaned += reclaim;
324 q->in_use -= reclaim;
325 }
326}
327
328/**
329 * should_restart_tx - are there enough resources to restart a Tx queue?
330 * @q: the Tx queue
331 *
332 * Checks if there are enough descriptors to restart a suspended Tx queue.
333 */
334static inline int should_restart_tx(const struct sge_txq *q)
335{
336 unsigned int r = q->processed - q->cleaned;
337
338 return q->in_use - r < (q->size >> 1);
339}
340
Divy Le Ray9bb2b312009-03-12 21:13:49 +0000341static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d)
342{
343 if (q->use_pages) {
344 if (d->pg_chunk.page)
345 put_page(d->pg_chunk.page);
346 d->pg_chunk.page = NULL;
347 } else {
348 kfree_skb(d->skb);
349 d->skb = NULL;
350 }
351}
352
Divy Le Ray4d22de32007-01-18 22:04:14 -0500353/**
354 * free_rx_bufs - free the Rx buffers on an SGE free list
355 * @pdev: the PCI device associated with the adapter
356 * @rxq: the SGE free list to clean up
357 *
358 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
359 * this queue should be stopped before calling this function.
360 */
361static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
362{
363 unsigned int cidx = q->cidx;
364
365 while (q->credits--) {
366 struct rx_sw_desc *d = &q->sdesc[cidx];
367
368 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
369 q->buf_size, PCI_DMA_FROMDEVICE);
Divy Le Ray9bb2b312009-03-12 21:13:49 +0000370 clear_rx_desc(q, d);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500371 if (++cidx == q->size)
372 cidx = 0;
373 }
Divy Le Raye0994eb2007-02-24 16:44:17 -0800374
Divy Le Raycf992af2007-05-30 21:10:47 -0700375 if (q->pg_chunk.page) {
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700376 __free_pages(q->pg_chunk.page, q->order);
Divy Le Raycf992af2007-05-30 21:10:47 -0700377 q->pg_chunk.page = NULL;
378 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500379}
380
381/**
382 * add_one_rx_buf - add a packet buffer to a free-buffer list
Divy Le Raycf992af2007-05-30 21:10:47 -0700383 * @va: buffer start VA
Divy Le Ray4d22de32007-01-18 22:04:14 -0500384 * @len: the buffer length
385 * @d: the HW Rx descriptor to write
386 * @sd: the SW Rx descriptor to write
387 * @gen: the generation bit value
388 * @pdev: the PCI device associated with the adapter
389 *
390 * Add a buffer of the given length to the supplied HW and SW Rx
391 * descriptors.
392 */
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700393static inline int add_one_rx_buf(void *va, unsigned int len,
394 struct rx_desc *d, struct rx_sw_desc *sd,
395 unsigned int gen, struct pci_dev *pdev)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500396{
397 dma_addr_t mapping;
398
Divy Le Raye0994eb2007-02-24 16:44:17 -0800399 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
FUJITA Tomonori8d8bb392008-07-25 19:44:49 -0700400 if (unlikely(pci_dma_mapping_error(pdev, mapping)))
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700401 return -ENOMEM;
402
Divy Le Ray4d22de32007-01-18 22:04:14 -0500403 pci_unmap_addr_set(sd, dma_addr, mapping);
404
405 d->addr_lo = cpu_to_be32(mapping);
406 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
407 wmb();
408 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
409 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700410 return 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500411}
412
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700413static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
414 unsigned int order)
Divy Le Raycf992af2007-05-30 21:10:47 -0700415{
416 if (!q->pg_chunk.page) {
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700417 q->pg_chunk.page = alloc_pages(gfp, order);
Divy Le Raycf992af2007-05-30 21:10:47 -0700418 if (unlikely(!q->pg_chunk.page))
419 return -ENOMEM;
420 q->pg_chunk.va = page_address(q->pg_chunk.page);
421 q->pg_chunk.offset = 0;
422 }
423 sd->pg_chunk = q->pg_chunk;
424
425 q->pg_chunk.offset += q->buf_size;
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700426 if (q->pg_chunk.offset == (PAGE_SIZE << order))
Divy Le Raycf992af2007-05-30 21:10:47 -0700427 q->pg_chunk.page = NULL;
428 else {
429 q->pg_chunk.va += q->buf_size;
430 get_page(q->pg_chunk.page);
431 }
432 return 0;
433}
434
Divy Le Ray26b38712009-03-12 21:13:43 +0000435static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
436{
437 if (q->pend_cred >= q->credits / 4) {
438 q->pend_cred = 0;
439 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
440 }
441}
442
Divy Le Ray4d22de32007-01-18 22:04:14 -0500443/**
444 * refill_fl - refill an SGE free-buffer list
445 * @adapter: the adapter
446 * @q: the free-list to refill
447 * @n: the number of new buffers to allocate
448 * @gfp: the gfp flags for allocating new buffers
449 *
450 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
451 * allocated with the supplied gfp flags. The caller must assure that
452 * @n does not exceed the queue's capacity.
453 */
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700454static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500455{
Divy Le Raycf992af2007-05-30 21:10:47 -0700456 void *buf_start;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500457 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
458 struct rx_desc *d = &q->desc[q->pidx];
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700459 unsigned int count = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500460
461 while (n--) {
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700462 int err;
463
Divy Le Raycf992af2007-05-30 21:10:47 -0700464 if (q->use_pages) {
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700465 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
Divy Le Raycf992af2007-05-30 21:10:47 -0700466nomem: q->alloc_failed++;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800467 break;
468 }
Divy Le Raycf992af2007-05-30 21:10:47 -0700469 buf_start = sd->pg_chunk.va;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800470 } else {
Divy Le Raycf992af2007-05-30 21:10:47 -0700471 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
Divy Le Raye0994eb2007-02-24 16:44:17 -0800472
Divy Le Raycf992af2007-05-30 21:10:47 -0700473 if (!skb)
474 goto nomem;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800475
Divy Le Raycf992af2007-05-30 21:10:47 -0700476 sd->skb = skb;
477 buf_start = skb->data;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800478 }
479
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700480 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
481 adap->pdev);
482 if (unlikely(err)) {
Divy Le Ray9bb2b312009-03-12 21:13:49 +0000483 clear_rx_desc(q, sd);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700484 break;
485 }
486
Divy Le Ray4d22de32007-01-18 22:04:14 -0500487 d++;
488 sd++;
489 if (++q->pidx == q->size) {
490 q->pidx = 0;
491 q->gen ^= 1;
492 sd = q->sdesc;
493 d = q->desc;
494 }
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700495 count++;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500496 }
Divy Le Ray26b38712009-03-12 21:13:43 +0000497
498 q->credits += count;
499 q->pend_cred += count;
500 ring_fl_db(adap, q);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700501
502 return count;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500503}
504
505static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
506{
Divy Le Ray26b38712009-03-12 21:13:43 +0000507 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700508 GFP_ATOMIC | __GFP_COMP);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500509}
510
511/**
512 * recycle_rx_buf - recycle a receive buffer
513 * @adapter: the adapter
514 * @q: the SGE free list
515 * @idx: index of buffer to recycle
516 *
517 * Recycles the specified buffer on the given free list by adding it at
518 * the next available slot on the list.
519 */
520static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
521 unsigned int idx)
522{
523 struct rx_desc *from = &q->desc[idx];
524 struct rx_desc *to = &q->desc[q->pidx];
525
Divy Le Raycf992af2007-05-30 21:10:47 -0700526 q->sdesc[q->pidx] = q->sdesc[idx];
Divy Le Ray4d22de32007-01-18 22:04:14 -0500527 to->addr_lo = from->addr_lo; /* already big endian */
528 to->addr_hi = from->addr_hi; /* likewise */
529 wmb();
530 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
531 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
Divy Le Ray4d22de32007-01-18 22:04:14 -0500532
533 if (++q->pidx == q->size) {
534 q->pidx = 0;
535 q->gen ^= 1;
536 }
Divy Le Ray26b38712009-03-12 21:13:43 +0000537
538 q->credits++;
539 q->pend_cred++;
540 ring_fl_db(adap, q);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500541}
542
543/**
544 * alloc_ring - allocate resources for an SGE descriptor ring
545 * @pdev: the PCI device
546 * @nelem: the number of descriptors
547 * @elem_size: the size of each descriptor
548 * @sw_size: the size of the SW state associated with each ring element
549 * @phys: the physical address of the allocated ring
550 * @metadata: address of the array holding the SW state for the ring
551 *
552 * Allocates resources for an SGE descriptor ring, such as Tx queues,
553 * free buffer lists, or response queues. Each SGE ring requires
554 * space for its HW descriptors plus, optionally, space for the SW state
555 * associated with each HW entry (the metadata). The function returns
556 * three values: the virtual address for the HW ring (the return value
557 * of the function), the physical address of the HW ring, and the address
558 * of the SW ring.
559 */
560static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
Divy Le Raye0994eb2007-02-24 16:44:17 -0800561 size_t sw_size, dma_addr_t * phys, void *metadata)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500562{
563 size_t len = nelem * elem_size;
564 void *s = NULL;
565 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
566
567 if (!p)
568 return NULL;
Divy Le Ray52565542008-11-26 15:35:59 -0800569 if (sw_size && metadata) {
Divy Le Ray4d22de32007-01-18 22:04:14 -0500570 s = kcalloc(nelem, sw_size, GFP_KERNEL);
571
572 if (!s) {
573 dma_free_coherent(&pdev->dev, len, p, *phys);
574 return NULL;
575 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500576 *(void **)metadata = s;
Divy Le Ray52565542008-11-26 15:35:59 -0800577 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500578 memset(p, 0, len);
579 return p;
580}
581
582/**
Divy Le Ray204e2f92008-05-06 19:26:01 -0700583 * t3_reset_qset - reset a sge qset
584 * @q: the queue set
585 *
586 * Reset the qset structure.
587 * the NAPI structure is preserved in the event of
588 * the qset's reincarnation, for example during EEH recovery.
589 */
590static void t3_reset_qset(struct sge_qset *q)
591{
592 if (q->adap &&
593 !(q->adap->flags & NAPI_INIT)) {
594 memset(q, 0, sizeof(*q));
595 return;
596 }
597
598 q->adap = NULL;
599 memset(&q->rspq, 0, sizeof(q->rspq));
600 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
601 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
602 q->txq_stopped = 0;
Divy Le Ray20d3fc12008-10-08 17:36:03 -0700603 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
Herbert Xu7be2df42009-01-21 14:39:13 -0800604 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
Divy Le Ray204e2f92008-05-06 19:26:01 -0700605}
606
607
608/**
Divy Le Ray4d22de32007-01-18 22:04:14 -0500609 * free_qset - free the resources of an SGE queue set
610 * @adapter: the adapter owning the queue set
611 * @q: the queue set
612 *
613 * Release the HW and SW resources associated with an SGE queue set, such
614 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
615 * queue set must be quiesced prior to calling this.
616 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -0700617static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500618{
619 int i;
620 struct pci_dev *pdev = adapter->pdev;
621
Divy Le Ray4d22de32007-01-18 22:04:14 -0500622 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
623 if (q->fl[i].desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700624 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500625 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
Roland Dreierb1186de2008-03-20 13:30:48 -0700626 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500627 free_rx_bufs(pdev, &q->fl[i]);
628 kfree(q->fl[i].sdesc);
629 dma_free_coherent(&pdev->dev,
630 q->fl[i].size *
631 sizeof(struct rx_desc), q->fl[i].desc,
632 q->fl[i].phys_addr);
633 }
634
635 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
636 if (q->txq[i].desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700637 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500638 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
Roland Dreierb1186de2008-03-20 13:30:48 -0700639 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500640 if (q->txq[i].sdesc) {
641 free_tx_desc(adapter, &q->txq[i],
642 q->txq[i].in_use);
643 kfree(q->txq[i].sdesc);
644 }
645 dma_free_coherent(&pdev->dev,
646 q->txq[i].size *
647 sizeof(struct tx_desc),
648 q->txq[i].desc, q->txq[i].phys_addr);
649 __skb_queue_purge(&q->txq[i].sendq);
650 }
651
652 if (q->rspq.desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700653 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500654 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
Roland Dreierb1186de2008-03-20 13:30:48 -0700655 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500656 dma_free_coherent(&pdev->dev,
657 q->rspq.size * sizeof(struct rsp_desc),
658 q->rspq.desc, q->rspq.phys_addr);
659 }
660
Divy Le Ray204e2f92008-05-06 19:26:01 -0700661 t3_reset_qset(q);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500662}
663
664/**
665 * init_qset_cntxt - initialize an SGE queue set context info
666 * @qs: the queue set
667 * @id: the queue set id
668 *
669 * Initializes the TIDs and context ids for the queues of a queue set.
670 */
671static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
672{
673 qs->rspq.cntxt_id = id;
674 qs->fl[0].cntxt_id = 2 * id;
675 qs->fl[1].cntxt_id = 2 * id + 1;
676 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
677 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
678 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
679 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
680 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
681}
682
683/**
684 * sgl_len - calculates the size of an SGL of the given capacity
685 * @n: the number of SGL entries
686 *
687 * Calculates the number of flits needed for a scatter/gather list that
688 * can hold the given number of entries.
689 */
690static inline unsigned int sgl_len(unsigned int n)
691{
692 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
693 return (3 * n) / 2 + (n & 1);
694}
695
696/**
697 * flits_to_desc - returns the num of Tx descriptors for the given flits
698 * @n: the number of flits
699 *
700 * Calculates the number of Tx descriptors needed for the supplied number
701 * of flits.
702 */
703static inline unsigned int flits_to_desc(unsigned int n)
704{
705 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
706 return flit_desc_map[n];
707}
708
709/**
Divy Le Raycf992af2007-05-30 21:10:47 -0700710 * get_packet - return the next ingress packet buffer from a free list
711 * @adap: the adapter that received the packet
712 * @fl: the SGE free list holding the packet
713 * @len: the packet length including any SGE padding
714 * @drop_thres: # of remaining buffers before we start dropping packets
715 *
716 * Get the next packet from a free list and complete setup of the
717 * sk_buff. If the packet is small we make a copy and recycle the
718 * original buffer, otherwise we use the original buffer itself. If a
719 * positive drop threshold is supplied packets are dropped and their
720 * buffers recycled if (a) the number of remaining buffers is under the
721 * threshold and the packet is too big to copy, or (b) the packet should
722 * be copied but there is no memory for the copy.
723 */
724static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
725 unsigned int len, unsigned int drop_thres)
726{
727 struct sk_buff *skb = NULL;
728 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
729
730 prefetch(sd->skb->data);
731 fl->credits--;
732
733 if (len <= SGE_RX_COPY_THRES) {
734 skb = alloc_skb(len, GFP_ATOMIC);
735 if (likely(skb != NULL)) {
736 __skb_put(skb, len);
737 pci_dma_sync_single_for_cpu(adap->pdev,
738 pci_unmap_addr(sd, dma_addr), len,
739 PCI_DMA_FROMDEVICE);
740 memcpy(skb->data, sd->skb->data, len);
741 pci_dma_sync_single_for_device(adap->pdev,
742 pci_unmap_addr(sd, dma_addr), len,
743 PCI_DMA_FROMDEVICE);
744 } else if (!drop_thres)
745 goto use_orig_buf;
746recycle:
747 recycle_rx_buf(adap, fl, fl->cidx);
748 return skb;
749 }
750
Divy Le Ray26b38712009-03-12 21:13:43 +0000751 if (unlikely(fl->credits < drop_thres) &&
752 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
753 GFP_ATOMIC | __GFP_COMP) == 0)
Divy Le Raycf992af2007-05-30 21:10:47 -0700754 goto recycle;
755
756use_orig_buf:
757 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
758 fl->buf_size, PCI_DMA_FROMDEVICE);
759 skb = sd->skb;
760 skb_put(skb, len);
761 __refill_fl(adap, fl);
762 return skb;
763}
764
765/**
766 * get_packet_pg - return the next ingress packet buffer from a free list
767 * @adap: the adapter that received the packet
768 * @fl: the SGE free list holding the packet
769 * @len: the packet length including any SGE padding
770 * @drop_thres: # of remaining buffers before we start dropping packets
771 *
772 * Get the next packet from a free list populated with page chunks.
773 * If the packet is small we make a copy and recycle the original buffer,
774 * otherwise we attach the original buffer as a page fragment to a fresh
775 * sk_buff. If a positive drop threshold is supplied packets are dropped
776 * and their buffers recycled if (a) the number of remaining buffers is
777 * under the threshold and the packet is too big to copy, or (b) there's
778 * no system memory.
779 *
780 * Note: this function is similar to @get_packet but deals with Rx buffers
781 * that are page chunks rather than sk_buffs.
782 */
783static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700784 struct sge_rspq *q, unsigned int len,
785 unsigned int drop_thres)
Divy Le Raycf992af2007-05-30 21:10:47 -0700786{
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700787 struct sk_buff *newskb, *skb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700788 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
789
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700790 newskb = skb = q->pg_skb;
791
792 if (!skb && (len <= SGE_RX_COPY_THRES)) {
793 newskb = alloc_skb(len, GFP_ATOMIC);
794 if (likely(newskb != NULL)) {
795 __skb_put(newskb, len);
Divy Le Raycf992af2007-05-30 21:10:47 -0700796 pci_dma_sync_single_for_cpu(adap->pdev,
797 pci_unmap_addr(sd, dma_addr), len,
798 PCI_DMA_FROMDEVICE);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700799 memcpy(newskb->data, sd->pg_chunk.va, len);
Divy Le Raycf992af2007-05-30 21:10:47 -0700800 pci_dma_sync_single_for_device(adap->pdev,
801 pci_unmap_addr(sd, dma_addr), len,
802 PCI_DMA_FROMDEVICE);
803 } else if (!drop_thres)
804 return NULL;
805recycle:
806 fl->credits--;
807 recycle_rx_buf(adap, fl, fl->cidx);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700808 q->rx_recycle_buf++;
809 return newskb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700810 }
811
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700812 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
Divy Le Raycf992af2007-05-30 21:10:47 -0700813 goto recycle;
814
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700815 if (!skb)
Divy Le Rayb47385b2008-05-21 18:56:26 -0700816 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700817 if (unlikely(!newskb)) {
Divy Le Raycf992af2007-05-30 21:10:47 -0700818 if (!drop_thres)
819 return NULL;
820 goto recycle;
821 }
822
823 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
824 fl->buf_size, PCI_DMA_FROMDEVICE);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700825 if (!skb) {
826 __skb_put(newskb, SGE_RX_PULL_LEN);
827 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
828 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
829 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
830 len - SGE_RX_PULL_LEN);
831 newskb->len = len;
832 newskb->data_len = len - SGE_RX_PULL_LEN;
Divy Le Ray8f435802009-03-12 21:13:54 +0000833 newskb->truesize += newskb->data_len;
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700834 } else {
835 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
836 sd->pg_chunk.page,
837 sd->pg_chunk.offset, len);
838 newskb->len += len;
839 newskb->data_len += len;
Divy Le Ray8f435802009-03-12 21:13:54 +0000840 newskb->truesize += len;
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700841 }
Divy Le Raycf992af2007-05-30 21:10:47 -0700842
843 fl->credits--;
844 /*
845 * We do not refill FLs here, we let the caller do it to overlap a
846 * prefetch.
847 */
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700848 return newskb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700849}
850
851/**
Divy Le Ray4d22de32007-01-18 22:04:14 -0500852 * get_imm_packet - return the next ingress packet buffer from a response
853 * @resp: the response descriptor containing the packet data
854 *
855 * Return a packet containing the immediate data of the given response.
856 */
857static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
858{
859 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
860
861 if (skb) {
862 __skb_put(skb, IMMED_PKT_SIZE);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300863 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500864 }
865 return skb;
866}
867
868/**
869 * calc_tx_descs - calculate the number of Tx descriptors for a packet
870 * @skb: the packet
871 *
872 * Returns the number of Tx descriptors needed for the given Ethernet
873 * packet. Ethernet packets require addition of WR and CPL headers.
874 */
875static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
876{
877 unsigned int flits;
878
879 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
880 return 1;
881
882 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
883 if (skb_shinfo(skb)->gso_size)
884 flits++;
885 return flits_to_desc(flits);
886}
887
888/**
889 * make_sgl - populate a scatter/gather list for a packet
890 * @skb: the packet
891 * @sgp: the SGL to populate
892 * @start: start address of skb main body data to include in the SGL
893 * @len: length of skb main body data to include in the SGL
894 * @pdev: the PCI device
895 *
896 * Generates a scatter/gather list for the buffers that make up a packet
897 * and returns the SGL size in 8-byte words. The caller must size the SGL
898 * appropriately.
899 */
900static inline unsigned int make_sgl(const struct sk_buff *skb,
901 struct sg_ent *sgp, unsigned char *start,
902 unsigned int len, struct pci_dev *pdev)
903{
904 dma_addr_t mapping;
905 unsigned int i, j = 0, nfrags;
906
907 if (len) {
908 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
909 sgp->len[0] = cpu_to_be32(len);
910 sgp->addr[0] = cpu_to_be64(mapping);
911 j = 1;
912 }
913
914 nfrags = skb_shinfo(skb)->nr_frags;
915 for (i = 0; i < nfrags; i++) {
916 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
917
918 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
919 frag->size, PCI_DMA_TODEVICE);
920 sgp->len[j] = cpu_to_be32(frag->size);
921 sgp->addr[j] = cpu_to_be64(mapping);
922 j ^= 1;
923 if (j == 0)
924 ++sgp;
925 }
926 if (j)
927 sgp->len[j] = 0;
928 return ((nfrags + (len != 0)) * 3) / 2 + j;
929}
930
931/**
932 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
933 * @adap: the adapter
934 * @q: the Tx queue
935 *
936 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
937 * where the HW is going to sleep just after we checked, however,
938 * then the interrupt handler will detect the outstanding TX packet
939 * and ring the doorbell for us.
940 *
941 * When GTS is disabled we unconditionally ring the doorbell.
942 */
943static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
944{
945#if USE_GTS
946 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
947 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
948 set_bit(TXQ_LAST_PKT_DB, &q->flags);
949 t3_write_reg(adap, A_SG_KDOORBELL,
950 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
951 }
952#else
953 wmb(); /* write descriptors before telling HW */
954 t3_write_reg(adap, A_SG_KDOORBELL,
955 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
956#endif
957}
958
959static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
960{
961#if SGE_NUM_GENBITS == 2
962 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
963#endif
964}
965
966/**
967 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
968 * @ndesc: number of Tx descriptors spanned by the SGL
969 * @skb: the packet corresponding to the WR
970 * @d: first Tx descriptor to be written
971 * @pidx: index of above descriptors
972 * @q: the SGE Tx queue
973 * @sgl: the SGL
974 * @flits: number of flits to the start of the SGL in the first descriptor
975 * @sgl_flits: the SGL size in flits
976 * @gen: the Tx descriptor generation
977 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
978 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
979 *
980 * Write a work request header and an associated SGL. If the SGL is
981 * small enough to fit into one Tx descriptor it has already been written
982 * and we just need to write the WR header. Otherwise we distribute the
983 * SGL across the number of descriptors it spans.
984 */
985static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
986 struct tx_desc *d, unsigned int pidx,
987 const struct sge_txq *q,
988 const struct sg_ent *sgl,
989 unsigned int flits, unsigned int sgl_flits,
Al Virofb8e4442007-08-23 03:04:12 -0400990 unsigned int gen, __be32 wr_hi,
991 __be32 wr_lo)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500992{
993 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
994 struct tx_sw_desc *sd = &q->sdesc[pidx];
995
996 sd->skb = skb;
997 if (need_skb_unmap()) {
Divy Le Ray23561c92007-11-16 11:22:05 -0800998 sd->fragidx = 0;
999 sd->addr_idx = 0;
1000 sd->sflit = flits;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001001 }
1002
1003 if (likely(ndesc == 1)) {
Divy Le Ray23561c92007-11-16 11:22:05 -08001004 sd->eop = 1;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001005 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1006 V_WR_SGLSFLT(flits)) | wr_hi;
1007 wmb();
1008 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1009 V_WR_GEN(gen)) | wr_lo;
1010 wr_gen2(d, gen);
1011 } else {
1012 unsigned int ogen = gen;
1013 const u64 *fp = (const u64 *)sgl;
1014 struct work_request_hdr *wp = wrp;
1015
1016 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1017 V_WR_SGLSFLT(flits)) | wr_hi;
1018
1019 while (sgl_flits) {
1020 unsigned int avail = WR_FLITS - flits;
1021
1022 if (avail > sgl_flits)
1023 avail = sgl_flits;
1024 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1025 sgl_flits -= avail;
1026 ndesc--;
1027 if (!sgl_flits)
1028 break;
1029
1030 fp += avail;
1031 d++;
Divy Le Ray23561c92007-11-16 11:22:05 -08001032 sd->eop = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001033 sd++;
1034 if (++pidx == q->size) {
1035 pidx = 0;
1036 gen ^= 1;
1037 d = q->desc;
1038 sd = q->sdesc;
1039 }
1040
1041 sd->skb = skb;
1042 wrp = (struct work_request_hdr *)d;
1043 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1044 V_WR_SGLSFLT(1)) | wr_hi;
1045 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1046 sgl_flits + 1)) |
1047 V_WR_GEN(gen)) | wr_lo;
1048 wr_gen2(d, gen);
1049 flits = 1;
1050 }
Divy Le Ray23561c92007-11-16 11:22:05 -08001051 sd->eop = 1;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001052 wrp->wr_hi |= htonl(F_WR_EOP);
1053 wmb();
1054 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1055 wr_gen2((struct tx_desc *)wp, ogen);
1056 WARN_ON(ndesc != 0);
1057 }
1058}
1059
1060/**
1061 * write_tx_pkt_wr - write a TX_PKT work request
1062 * @adap: the adapter
1063 * @skb: the packet to send
1064 * @pi: the egress interface
1065 * @pidx: index of the first Tx descriptor to write
1066 * @gen: the generation value to use
1067 * @q: the Tx queue
1068 * @ndesc: number of descriptors the packet will occupy
1069 * @compl: the value of the COMPL bit to use
1070 *
1071 * Generate a TX_PKT work request to send the supplied packet.
1072 */
1073static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1074 const struct port_info *pi,
1075 unsigned int pidx, unsigned int gen,
1076 struct sge_txq *q, unsigned int ndesc,
1077 unsigned int compl)
1078{
1079 unsigned int flits, sgl_flits, cntrl, tso_info;
1080 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1081 struct tx_desc *d = &q->desc[pidx];
1082 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1083
1084 cpl->len = htonl(skb->len | 0x80000000);
1085 cntrl = V_TXPKT_INTF(pi->port_id);
1086
1087 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1088 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1089
1090 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1091 if (tso_info) {
1092 int eth_type;
1093 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1094
1095 d->flit[2] = 0;
1096 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1097 hdr->cntrl = htonl(cntrl);
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001098 eth_type = skb_network_offset(skb) == ETH_HLEN ?
Divy Le Ray4d22de32007-01-18 22:04:14 -05001099 CPL_ETH_II : CPL_ETH_II_VLAN;
1100 tso_info |= V_LSO_ETH_TYPE(eth_type) |
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001101 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001102 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001103 hdr->lso_info = htonl(tso_info);
1104 flits = 3;
1105 } else {
1106 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1107 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1108 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1109 cpl->cntrl = htonl(cntrl);
1110
1111 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1112 q->sdesc[pidx].skb = NULL;
1113 if (!skb->data_len)
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03001114 skb_copy_from_linear_data(skb, &d->flit[2],
1115 skb->len);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001116 else
1117 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1118
1119 flits = (skb->len + 7) / 8 + 2;
1120 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1121 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1122 | F_WR_SOP | F_WR_EOP | compl);
1123 wmb();
1124 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1125 V_WR_TID(q->token));
1126 wr_gen2(d, gen);
1127 kfree_skb(skb);
1128 return;
1129 }
1130
1131 flits = 2;
1132 }
1133
1134 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1135 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001136
1137 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1138 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1139 htonl(V_WR_TID(q->token)));
1140}
1141
Divy Le Ray82ad3322008-12-16 01:09:39 -08001142static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1143 struct sge_qset *qs, struct sge_txq *q)
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301144{
Divy Le Ray82ad3322008-12-16 01:09:39 -08001145 netif_tx_stop_queue(txq);
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301146 set_bit(TXQ_ETH, &qs->txq_stopped);
1147 q->stops++;
1148}
1149
Divy Le Ray4d22de32007-01-18 22:04:14 -05001150/**
1151 * eth_xmit - add a packet to the Ethernet Tx queue
1152 * @skb: the packet
1153 * @dev: the egress net device
1154 *
1155 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1156 */
1157int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1158{
Divy Le Ray82ad3322008-12-16 01:09:39 -08001159 int qidx;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001160 unsigned int ndesc, pidx, credits, gen, compl;
1161 const struct port_info *pi = netdev_priv(dev);
Divy Le Ray5fbf8162007-08-29 19:15:47 -07001162 struct adapter *adap = pi->adapter;
Divy Le Ray82ad3322008-12-16 01:09:39 -08001163 struct netdev_queue *txq;
1164 struct sge_qset *qs;
1165 struct sge_txq *q;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001166
1167 /*
1168 * The chip min packet length is 9 octets but play safe and reject
1169 * anything shorter than an Ethernet header.
1170 */
1171 if (unlikely(skb->len < ETH_HLEN)) {
1172 dev_kfree_skb(skb);
1173 return NETDEV_TX_OK;
1174 }
1175
Divy Le Ray82ad3322008-12-16 01:09:39 -08001176 qidx = skb_get_queue_mapping(skb);
1177 qs = &pi->qs[qidx];
1178 q = &qs->txq[TXQ_ETH];
1179 txq = netdev_get_tx_queue(dev, qidx);
1180
Divy Le Ray4d22de32007-01-18 22:04:14 -05001181 spin_lock(&q->lock);
1182 reclaim_completed_tx(adap, q);
1183
1184 credits = q->size - q->in_use;
1185 ndesc = calc_tx_descs(skb);
1186
1187 if (unlikely(credits < ndesc)) {
Divy Le Ray82ad3322008-12-16 01:09:39 -08001188 t3_stop_tx_queue(txq, qs, q);
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301189 dev_err(&adap->pdev->dev,
1190 "%s: Tx ring %u full while queue awake!\n",
1191 dev->name, q->cntxt_id & 7);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001192 spin_unlock(&q->lock);
1193 return NETDEV_TX_BUSY;
1194 }
1195
1196 q->in_use += ndesc;
Divy Le Raycd7e9032008-03-13 00:13:30 -07001197 if (unlikely(credits - ndesc < q->stop_thres)) {
Divy Le Ray82ad3322008-12-16 01:09:39 -08001198 t3_stop_tx_queue(txq, qs, q);
Divy Le Raycd7e9032008-03-13 00:13:30 -07001199
1200 if (should_restart_tx(q) &&
1201 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1202 q->restarts++;
Divy Le Ray82ad3322008-12-16 01:09:39 -08001203 netif_tx_wake_queue(txq);
Divy Le Raycd7e9032008-03-13 00:13:30 -07001204 }
1205 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001206
1207 gen = q->gen;
1208 q->unacked += ndesc;
1209 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1210 q->unacked &= 7;
1211 pidx = q->pidx;
1212 q->pidx += ndesc;
1213 if (q->pidx >= q->size) {
1214 q->pidx -= q->size;
1215 q->gen ^= 1;
1216 }
1217
1218 /* update port statistics */
1219 if (skb->ip_summed == CHECKSUM_COMPLETE)
1220 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1221 if (skb_shinfo(skb)->gso_size)
1222 qs->port_stats[SGE_PSTAT_TSO]++;
1223 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1224 qs->port_stats[SGE_PSTAT_VLANINS]++;
1225
1226 dev->trans_start = jiffies;
1227 spin_unlock(&q->lock);
1228
1229 /*
1230 * We do not use Tx completion interrupts to free DMAd Tx packets.
1231 * This is good for performamce but means that we rely on new Tx
1232 * packets arriving to run the destructors of completed packets,
1233 * which open up space in their sockets' send queues. Sometimes
1234 * we do not get such new packets causing Tx to stall. A single
1235 * UDP transmitter is a good example of this situation. We have
1236 * a clean up timer that periodically reclaims completed packets
1237 * but it doesn't run often enough (nor do we want it to) to prevent
1238 * lengthy stalls. A solution to this problem is to run the
1239 * destructor early, after the packet is queued but before it's DMAd.
1240 * A cons is that we lie to socket memory accounting, but the amount
1241 * of extra memory is reasonable (limited by the number of Tx
1242 * descriptors), the packets do actually get freed quickly by new
1243 * packets almost always, and for protocols like TCP that wait for
1244 * acks to really free up the data the extra memory is even less.
1245 * On the positive side we run the destructors on the sending CPU
1246 * rather than on a potentially different completing CPU, usually a
1247 * good thing. We also run them without holding our Tx queue lock,
1248 * unlike what reclaim_completed_tx() would otherwise do.
1249 *
1250 * Run the destructor before telling the DMA engine about the packet
1251 * to make sure it doesn't complete and get freed prematurely.
1252 */
1253 if (likely(!skb_shared(skb)))
1254 skb_orphan(skb);
1255
1256 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1257 check_ring_tx_db(adap, q);
1258 return NETDEV_TX_OK;
1259}
1260
1261/**
1262 * write_imm - write a packet into a Tx descriptor as immediate data
1263 * @d: the Tx descriptor to write
1264 * @skb: the packet
1265 * @len: the length of packet data to write as immediate data
1266 * @gen: the generation bit value to write
1267 *
1268 * Writes a packet as immediate data into a Tx descriptor. The packet
1269 * contains a work request at its beginning. We must write the packet
Divy Le Ray27186dc2007-08-21 20:49:15 -07001270 * carefully so the SGE doesn't read it accidentally before it's written
1271 * in its entirety.
Divy Le Ray4d22de32007-01-18 22:04:14 -05001272 */
1273static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1274 unsigned int len, unsigned int gen)
1275{
1276 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1277 struct work_request_hdr *to = (struct work_request_hdr *)d;
1278
Divy Le Ray27186dc2007-08-21 20:49:15 -07001279 if (likely(!skb->data_len))
1280 memcpy(&to[1], &from[1], len - sizeof(*from));
1281 else
1282 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1283
Divy Le Ray4d22de32007-01-18 22:04:14 -05001284 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1285 V_WR_BCNTLFLT(len & 7));
1286 wmb();
1287 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1288 V_WR_LEN((len + 7) / 8));
1289 wr_gen2(d, gen);
1290 kfree_skb(skb);
1291}
1292
1293/**
1294 * check_desc_avail - check descriptor availability on a send queue
1295 * @adap: the adapter
1296 * @q: the send queue
1297 * @skb: the packet needing the descriptors
1298 * @ndesc: the number of Tx descriptors needed
1299 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1300 *
1301 * Checks if the requested number of Tx descriptors is available on an
1302 * SGE send queue. If the queue is already suspended or not enough
1303 * descriptors are available the packet is queued for later transmission.
1304 * Must be called with the Tx queue locked.
1305 *
1306 * Returns 0 if enough descriptors are available, 1 if there aren't
1307 * enough descriptors and the packet has been queued, and 2 if the caller
1308 * needs to retry because there weren't enough descriptors at the
1309 * beginning of the call but some freed up in the mean time.
1310 */
1311static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1312 struct sk_buff *skb, unsigned int ndesc,
1313 unsigned int qid)
1314{
1315 if (unlikely(!skb_queue_empty(&q->sendq))) {
1316 addq_exit:__skb_queue_tail(&q->sendq, skb);
1317 return 1;
1318 }
1319 if (unlikely(q->size - q->in_use < ndesc)) {
1320 struct sge_qset *qs = txq_to_qset(q, qid);
1321
1322 set_bit(qid, &qs->txq_stopped);
1323 smp_mb__after_clear_bit();
1324
1325 if (should_restart_tx(q) &&
1326 test_and_clear_bit(qid, &qs->txq_stopped))
1327 return 2;
1328
1329 q->stops++;
1330 goto addq_exit;
1331 }
1332 return 0;
1333}
1334
1335/**
1336 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1337 * @q: the SGE control Tx queue
1338 *
1339 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1340 * that send only immediate data (presently just the control queues) and
1341 * thus do not have any sk_buffs to release.
1342 */
1343static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1344{
1345 unsigned int reclaim = q->processed - q->cleaned;
1346
1347 q->in_use -= reclaim;
1348 q->cleaned += reclaim;
1349}
1350
1351static inline int immediate(const struct sk_buff *skb)
1352{
Divy Le Ray27186dc2007-08-21 20:49:15 -07001353 return skb->len <= WR_LEN;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001354}
1355
1356/**
1357 * ctrl_xmit - send a packet through an SGE control Tx queue
1358 * @adap: the adapter
1359 * @q: the control queue
1360 * @skb: the packet
1361 *
1362 * Send a packet through an SGE control Tx queue. Packets sent through
1363 * a control queue must fit entirely as immediate data in a single Tx
1364 * descriptor and have no page fragments.
1365 */
1366static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1367 struct sk_buff *skb)
1368{
1369 int ret;
1370 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1371
1372 if (unlikely(!immediate(skb))) {
1373 WARN_ON(1);
1374 dev_kfree_skb(skb);
1375 return NET_XMIT_SUCCESS;
1376 }
1377
1378 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1379 wrp->wr_lo = htonl(V_WR_TID(q->token));
1380
1381 spin_lock(&q->lock);
1382 again:reclaim_completed_tx_imm(q);
1383
1384 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1385 if (unlikely(ret)) {
1386 if (ret == 1) {
1387 spin_unlock(&q->lock);
1388 return NET_XMIT_CN;
1389 }
1390 goto again;
1391 }
1392
1393 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1394
1395 q->in_use++;
1396 if (++q->pidx >= q->size) {
1397 q->pidx = 0;
1398 q->gen ^= 1;
1399 }
1400 spin_unlock(&q->lock);
1401 wmb();
1402 t3_write_reg(adap, A_SG_KDOORBELL,
1403 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1404 return NET_XMIT_SUCCESS;
1405}
1406
1407/**
1408 * restart_ctrlq - restart a suspended control queue
1409 * @qs: the queue set cotaining the control queue
1410 *
1411 * Resumes transmission on a suspended Tx control queue.
1412 */
1413static void restart_ctrlq(unsigned long data)
1414{
1415 struct sk_buff *skb;
1416 struct sge_qset *qs = (struct sge_qset *)data;
1417 struct sge_txq *q = &qs->txq[TXQ_CTRL];
Divy Le Ray4d22de32007-01-18 22:04:14 -05001418
1419 spin_lock(&q->lock);
1420 again:reclaim_completed_tx_imm(q);
1421
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001422 while (q->in_use < q->size &&
1423 (skb = __skb_dequeue(&q->sendq)) != NULL) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05001424
1425 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1426
1427 if (++q->pidx >= q->size) {
1428 q->pidx = 0;
1429 q->gen ^= 1;
1430 }
1431 q->in_use++;
1432 }
1433
1434 if (!skb_queue_empty(&q->sendq)) {
1435 set_bit(TXQ_CTRL, &qs->txq_stopped);
1436 smp_mb__after_clear_bit();
1437
1438 if (should_restart_tx(q) &&
1439 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1440 goto again;
1441 q->stops++;
1442 }
1443
1444 spin_unlock(&q->lock);
Divy Le Rayafefce62007-11-16 11:22:21 -08001445 wmb();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001446 t3_write_reg(qs->adap, A_SG_KDOORBELL,
Divy Le Ray4d22de32007-01-18 22:04:14 -05001447 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1448}
1449
Divy Le Ray14ab9892007-01-30 19:43:50 -08001450/*
1451 * Send a management message through control queue 0
1452 */
1453int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1454{
Divy Le Ray204e2f92008-05-06 19:26:01 -07001455 int ret;
Divy Le Raybc4b6b522007-12-17 18:47:41 -08001456 local_bh_disable();
1457 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1458 local_bh_enable();
1459
1460 return ret;
Divy Le Ray14ab9892007-01-30 19:43:50 -08001461}
1462
Divy Le Ray4d22de32007-01-18 22:04:14 -05001463/**
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001464 * deferred_unmap_destructor - unmap a packet when it is freed
1465 * @skb: the packet
1466 *
1467 * This is the packet destructor used for Tx packets that need to remain
1468 * mapped until they are freed rather than until their Tx descriptors are
1469 * freed.
1470 */
1471static void deferred_unmap_destructor(struct sk_buff *skb)
1472{
1473 int i;
1474 const dma_addr_t *p;
1475 const struct skb_shared_info *si;
1476 const struct deferred_unmap_info *dui;
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001477
1478 dui = (struct deferred_unmap_info *)skb->head;
1479 p = dui->addr;
1480
Divy Le Ray23561c92007-11-16 11:22:05 -08001481 if (skb->tail - skb->transport_header)
1482 pci_unmap_single(dui->pdev, *p++,
1483 skb->tail - skb->transport_header,
1484 PCI_DMA_TODEVICE);
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001485
1486 si = skb_shinfo(skb);
1487 for (i = 0; i < si->nr_frags; i++)
1488 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1489 PCI_DMA_TODEVICE);
1490}
1491
1492static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1493 const struct sg_ent *sgl, int sgl_flits)
1494{
1495 dma_addr_t *p;
1496 struct deferred_unmap_info *dui;
1497
1498 dui = (struct deferred_unmap_info *)skb->head;
1499 dui->pdev = pdev;
1500 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1501 *p++ = be64_to_cpu(sgl->addr[0]);
1502 *p++ = be64_to_cpu(sgl->addr[1]);
1503 }
1504 if (sgl_flits)
1505 *p = be64_to_cpu(sgl->addr[0]);
1506}
1507
1508/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001509 * write_ofld_wr - write an offload work request
1510 * @adap: the adapter
1511 * @skb: the packet to send
1512 * @q: the Tx queue
1513 * @pidx: index of the first Tx descriptor to write
1514 * @gen: the generation value to use
1515 * @ndesc: number of descriptors the packet will occupy
1516 *
1517 * Write an offload work request to send the supplied packet. The packet
1518 * data already carry the work request with most fields populated.
1519 */
1520static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1521 struct sge_txq *q, unsigned int pidx,
1522 unsigned int gen, unsigned int ndesc)
1523{
1524 unsigned int sgl_flits, flits;
1525 struct work_request_hdr *from;
1526 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1527 struct tx_desc *d = &q->desc[pidx];
1528
1529 if (immediate(skb)) {
1530 q->sdesc[pidx].skb = NULL;
1531 write_imm(d, skb, skb->len, gen);
1532 return;
1533 }
1534
1535 /* Only TX_DATA builds SGLs */
1536
1537 from = (struct work_request_hdr *)skb->data;
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001538 memcpy(&d->flit[1], &from[1],
1539 skb_transport_offset(skb) - sizeof(*from));
Divy Le Ray4d22de32007-01-18 22:04:14 -05001540
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001541 flits = skb_transport_offset(skb) / 8;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001542 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001543 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001544 skb->tail - skb->transport_header,
Divy Le Ray4d22de32007-01-18 22:04:14 -05001545 adap->pdev);
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001546 if (need_skb_unmap()) {
1547 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1548 skb->destructor = deferred_unmap_destructor;
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001549 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001550
1551 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1552 gen, from->wr_hi, from->wr_lo);
1553}
1554
1555/**
1556 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1557 * @skb: the packet
1558 *
1559 * Returns the number of Tx descriptors needed for the given offload
1560 * packet. These packets are already fully constructed.
1561 */
1562static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1563{
Divy Le Ray27186dc2007-08-21 20:49:15 -07001564 unsigned int flits, cnt;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001565
Divy Le Ray27186dc2007-08-21 20:49:15 -07001566 if (skb->len <= WR_LEN)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001567 return 1; /* packet fits as immediate data */
1568
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001569 flits = skb_transport_offset(skb) / 8; /* headers */
Divy Le Ray27186dc2007-08-21 20:49:15 -07001570 cnt = skb_shinfo(skb)->nr_frags;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001571 if (skb->tail != skb->transport_header)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001572 cnt++;
1573 return flits_to_desc(flits + sgl_len(cnt));
1574}
1575
1576/**
1577 * ofld_xmit - send a packet through an offload queue
1578 * @adap: the adapter
1579 * @q: the Tx offload queue
1580 * @skb: the packet
1581 *
1582 * Send an offload packet through an SGE offload queue.
1583 */
1584static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1585 struct sk_buff *skb)
1586{
1587 int ret;
1588 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1589
1590 spin_lock(&q->lock);
1591 again:reclaim_completed_tx(adap, q);
1592
1593 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1594 if (unlikely(ret)) {
1595 if (ret == 1) {
1596 skb->priority = ndesc; /* save for restart */
1597 spin_unlock(&q->lock);
1598 return NET_XMIT_CN;
1599 }
1600 goto again;
1601 }
1602
1603 gen = q->gen;
1604 q->in_use += ndesc;
1605 pidx = q->pidx;
1606 q->pidx += ndesc;
1607 if (q->pidx >= q->size) {
1608 q->pidx -= q->size;
1609 q->gen ^= 1;
1610 }
1611 spin_unlock(&q->lock);
1612
1613 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1614 check_ring_tx_db(adap, q);
1615 return NET_XMIT_SUCCESS;
1616}
1617
1618/**
1619 * restart_offloadq - restart a suspended offload queue
1620 * @qs: the queue set cotaining the offload queue
1621 *
1622 * Resumes transmission on a suspended Tx offload queue.
1623 */
1624static void restart_offloadq(unsigned long data)
1625{
1626 struct sk_buff *skb;
1627 struct sge_qset *qs = (struct sge_qset *)data;
1628 struct sge_txq *q = &qs->txq[TXQ_OFLD];
Divy Le Ray5fbf8162007-08-29 19:15:47 -07001629 const struct port_info *pi = netdev_priv(qs->netdev);
1630 struct adapter *adap = pi->adapter;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001631
1632 spin_lock(&q->lock);
1633 again:reclaim_completed_tx(adap, q);
1634
1635 while ((skb = skb_peek(&q->sendq)) != NULL) {
1636 unsigned int gen, pidx;
1637 unsigned int ndesc = skb->priority;
1638
1639 if (unlikely(q->size - q->in_use < ndesc)) {
1640 set_bit(TXQ_OFLD, &qs->txq_stopped);
1641 smp_mb__after_clear_bit();
1642
1643 if (should_restart_tx(q) &&
1644 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1645 goto again;
1646 q->stops++;
1647 break;
1648 }
1649
1650 gen = q->gen;
1651 q->in_use += ndesc;
1652 pidx = q->pidx;
1653 q->pidx += ndesc;
1654 if (q->pidx >= q->size) {
1655 q->pidx -= q->size;
1656 q->gen ^= 1;
1657 }
1658 __skb_unlink(skb, &q->sendq);
1659 spin_unlock(&q->lock);
1660
1661 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1662 spin_lock(&q->lock);
1663 }
1664 spin_unlock(&q->lock);
1665
1666#if USE_GTS
1667 set_bit(TXQ_RUNNING, &q->flags);
1668 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1669#endif
Divy Le Rayafefce62007-11-16 11:22:21 -08001670 wmb();
Divy Le Ray4d22de32007-01-18 22:04:14 -05001671 t3_write_reg(adap, A_SG_KDOORBELL,
1672 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1673}
1674
1675/**
1676 * queue_set - return the queue set a packet should use
1677 * @skb: the packet
1678 *
1679 * Maps a packet to the SGE queue set it should use. The desired queue
1680 * set is carried in bits 1-3 in the packet's priority.
1681 */
1682static inline int queue_set(const struct sk_buff *skb)
1683{
1684 return skb->priority >> 1;
1685}
1686
1687/**
1688 * is_ctrl_pkt - return whether an offload packet is a control packet
1689 * @skb: the packet
1690 *
1691 * Determines whether an offload packet should use an OFLD or a CTRL
1692 * Tx queue. This is indicated by bit 0 in the packet's priority.
1693 */
1694static inline int is_ctrl_pkt(const struct sk_buff *skb)
1695{
1696 return skb->priority & 1;
1697}
1698
1699/**
1700 * t3_offload_tx - send an offload packet
1701 * @tdev: the offload device to send to
1702 * @skb: the packet
1703 *
1704 * Sends an offload packet. We use the packet priority to select the
1705 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1706 * should be sent as regular or control, bits 1-3 select the queue set.
1707 */
1708int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1709{
1710 struct adapter *adap = tdev2adap(tdev);
1711 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1712
1713 if (unlikely(is_ctrl_pkt(skb)))
1714 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1715
1716 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1717}
1718
1719/**
1720 * offload_enqueue - add an offload packet to an SGE offload receive queue
1721 * @q: the SGE response queue
1722 * @skb: the packet
1723 *
1724 * Add a new offload packet to an SGE response queue's offload packet
1725 * queue. If the packet is the first on the queue it schedules the RX
1726 * softirq to process the queue.
1727 */
1728static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1729{
David S. Miller147e70e2008-09-22 01:29:52 -07001730 int was_empty = skb_queue_empty(&q->rx_queue);
1731
1732 __skb_queue_tail(&q->rx_queue, skb);
1733
1734 if (was_empty) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05001735 struct sge_qset *qs = rspq_to_qset(q);
1736
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001737 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001738 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001739}
1740
1741/**
1742 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1743 * @tdev: the offload device that will be receiving the packets
1744 * @q: the SGE response queue that assembled the bundle
1745 * @skbs: the partial bundle
1746 * @n: the number of packets in the bundle
1747 *
1748 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1749 */
1750static inline void deliver_partial_bundle(struct t3cdev *tdev,
1751 struct sge_rspq *q,
1752 struct sk_buff *skbs[], int n)
1753{
1754 if (n) {
1755 q->offload_bundles++;
1756 tdev->recv(tdev, skbs, n);
1757 }
1758}
1759
1760/**
1761 * ofld_poll - NAPI handler for offload packets in interrupt mode
1762 * @dev: the network device doing the polling
1763 * @budget: polling budget
1764 *
1765 * The NAPI handler for offload packets when a response queue is serviced
1766 * by the hard interrupt handler, i.e., when it's operating in non-polling
1767 * mode. Creates small packet batches and sends them through the offload
1768 * receive handler. Batches need to be of modest size as we do prefetches
1769 * on the packets in each.
1770 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001771static int ofld_poll(struct napi_struct *napi, int budget)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001772{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001773 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001774 struct sge_rspq *q = &qs->rspq;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001775 struct adapter *adapter = qs->adap;
1776 int work_done = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001777
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001778 while (work_done < budget) {
David S. Miller147e70e2008-09-22 01:29:52 -07001779 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1780 struct sk_buff_head queue;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001781 int ngathered;
1782
1783 spin_lock_irq(&q->lock);
David S. Miller147e70e2008-09-22 01:29:52 -07001784 __skb_queue_head_init(&queue);
1785 skb_queue_splice_init(&q->rx_queue, &queue);
1786 if (skb_queue_empty(&queue)) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001787 napi_complete(napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001788 spin_unlock_irq(&q->lock);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001789 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001790 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001791 spin_unlock_irq(&q->lock);
1792
David S. Miller147e70e2008-09-22 01:29:52 -07001793 ngathered = 0;
1794 skb_queue_walk_safe(&queue, skb, tmp) {
1795 if (work_done >= budget)
1796 break;
1797 work_done++;
1798
1799 __skb_unlink(skb, &queue);
1800 prefetch(skb->data);
1801 skbs[ngathered] = skb;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001802 if (++ngathered == RX_BUNDLE_SIZE) {
1803 q->offload_bundles++;
1804 adapter->tdev.recv(&adapter->tdev, skbs,
1805 ngathered);
1806 ngathered = 0;
1807 }
1808 }
David S. Miller147e70e2008-09-22 01:29:52 -07001809 if (!skb_queue_empty(&queue)) {
1810 /* splice remaining packets back onto Rx queue */
Divy Le Ray4d22de32007-01-18 22:04:14 -05001811 spin_lock_irq(&q->lock);
David S. Miller147e70e2008-09-22 01:29:52 -07001812 skb_queue_splice(&queue, &q->rx_queue);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001813 spin_unlock_irq(&q->lock);
1814 }
1815 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1816 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001817
1818 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001819}
1820
1821/**
1822 * rx_offload - process a received offload packet
1823 * @tdev: the offload device receiving the packet
1824 * @rq: the response queue that received the packet
1825 * @skb: the packet
1826 * @rx_gather: a gather list of packets if we are building a bundle
1827 * @gather_idx: index of the next available slot in the bundle
1828 *
1829 * Process an ingress offload pakcet and add it to the offload ingress
1830 * queue. Returns the index of the next available slot in the bundle.
1831 */
1832static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1833 struct sk_buff *skb, struct sk_buff *rx_gather[],
1834 unsigned int gather_idx)
1835{
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001836 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001837 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03001838 skb_reset_transport_header(skb);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001839
1840 if (rq->polling) {
1841 rx_gather[gather_idx++] = skb;
1842 if (gather_idx == RX_BUNDLE_SIZE) {
1843 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1844 gather_idx = 0;
1845 rq->offload_bundles++;
1846 }
1847 } else
1848 offload_enqueue(rq, skb);
1849
1850 return gather_idx;
1851}
1852
1853/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001854 * restart_tx - check whether to restart suspended Tx queues
1855 * @qs: the queue set to resume
1856 *
1857 * Restarts suspended Tx queues of an SGE queue set if they have enough
1858 * free resources to resume operation.
1859 */
1860static void restart_tx(struct sge_qset *qs)
1861{
1862 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1863 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1864 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1865 qs->txq[TXQ_ETH].restarts++;
1866 if (netif_running(qs->netdev))
Divy Le Ray82ad3322008-12-16 01:09:39 -08001867 netif_tx_wake_queue(qs->tx_q);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001868 }
1869
1870 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1871 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1872 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1873 qs->txq[TXQ_OFLD].restarts++;
1874 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1875 }
1876 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1877 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1878 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1879 qs->txq[TXQ_CTRL].restarts++;
1880 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1881 }
1882}
1883
1884/**
Karen Xiea109a5b2008-12-18 22:56:20 -08001885 * cxgb3_arp_process - process an ARP request probing a private IP address
1886 * @adapter: the adapter
1887 * @skb: the skbuff containing the ARP request
1888 *
1889 * Check if the ARP request is probing the private IP address
1890 * dedicated to iSCSI, generate an ARP reply if so.
1891 */
1892static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1893{
1894 struct net_device *dev = skb->dev;
1895 struct port_info *pi;
1896 struct arphdr *arp;
1897 unsigned char *arp_ptr;
1898 unsigned char *sha;
1899 __be32 sip, tip;
1900
1901 if (!dev)
1902 return;
1903
1904 skb_reset_network_header(skb);
1905 arp = arp_hdr(skb);
1906
1907 if (arp->ar_op != htons(ARPOP_REQUEST))
1908 return;
1909
1910 arp_ptr = (unsigned char *)(arp + 1);
1911 sha = arp_ptr;
1912 arp_ptr += dev->addr_len;
1913 memcpy(&sip, arp_ptr, sizeof(sip));
1914 arp_ptr += sizeof(sip);
1915 arp_ptr += dev->addr_len;
1916 memcpy(&tip, arp_ptr, sizeof(tip));
1917
1918 pi = netdev_priv(dev);
1919 if (tip != pi->iscsi_ipv4addr)
1920 return;
1921
1922 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1923 dev->dev_addr, sha);
1924
1925}
1926
1927static inline int is_arp(struct sk_buff *skb)
1928{
1929 return skb->protocol == htons(ETH_P_ARP);
1930}
1931
1932/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001933 * rx_eth - process an ingress ethernet packet
1934 * @adap: the adapter
1935 * @rq: the response queue that received the packet
1936 * @skb: the packet
1937 * @pad: amount of padding at the start of the buffer
1938 *
1939 * Process an ingress ethernet pakcet and deliver it to the stack.
1940 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1941 * if it was immediate data in a response.
1942 */
1943static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
Divy Le Rayb47385b2008-05-21 18:56:26 -07001944 struct sk_buff *skb, int pad, int lro)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001945{
1946 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
Divy Le Rayb47385b2008-05-21 18:56:26 -07001947 struct sge_qset *qs = rspq_to_qset(rq);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001948 struct port_info *pi;
1949
Divy Le Ray4d22de32007-01-18 22:04:14 -05001950 skb_pull(skb, sizeof(*p) + pad);
Arnaldo Carvalho de Melo4c13eb62007-04-25 17:40:23 -07001951 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001952 pi = netdev_priv(skb->dev);
Roland Dreier47fd23f2009-01-11 00:19:36 -08001953 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
Divy Le Ray4d22de32007-01-18 22:04:14 -05001954 !p->fragment) {
Karen Xiea109a5b2008-12-18 22:56:20 -08001955 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001956 skb->ip_summed = CHECKSUM_UNNECESSARY;
1957 } else
1958 skb->ip_summed = CHECKSUM_NONE;
David S. Miller0c8dfc82009-01-27 16:22:32 -08001959 skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001960
1961 if (unlikely(p->vlan_valid)) {
1962 struct vlan_group *grp = pi->vlan_grp;
1963
Divy Le Rayb47385b2008-05-21 18:56:26 -07001964 qs->port_stats[SGE_PSTAT_VLANEX]++;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001965 if (likely(grp))
Divy Le Rayb47385b2008-05-21 18:56:26 -07001966 if (lro)
Herbert Xu7be2df42009-01-21 14:39:13 -08001967 vlan_gro_receive(&qs->napi, grp,
1968 ntohs(p->vlan), skb);
Karen Xiea109a5b2008-12-18 22:56:20 -08001969 else {
1970 if (unlikely(pi->iscsi_ipv4addr &&
1971 is_arp(skb))) {
1972 unsigned short vtag = ntohs(p->vlan) &
1973 VLAN_VID_MASK;
1974 skb->dev = vlan_group_get_device(grp,
1975 vtag);
1976 cxgb3_arp_process(adap, skb);
1977 }
Divy Le Rayb47385b2008-05-21 18:56:26 -07001978 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1979 rq->polling);
Karen Xiea109a5b2008-12-18 22:56:20 -08001980 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001981 else
1982 dev_kfree_skb_any(skb);
Divy Le Rayb47385b2008-05-21 18:56:26 -07001983 } else if (rq->polling) {
1984 if (lro)
Herbert Xu7be2df42009-01-21 14:39:13 -08001985 napi_gro_receive(&qs->napi, skb);
Karen Xiea109a5b2008-12-18 22:56:20 -08001986 else {
1987 if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
1988 cxgb3_arp_process(adap, skb);
Divy Le Rayb47385b2008-05-21 18:56:26 -07001989 netif_receive_skb(skb);
Karen Xiea109a5b2008-12-18 22:56:20 -08001990 }
Divy Le Rayb47385b2008-05-21 18:56:26 -07001991 } else
Divy Le Ray4d22de32007-01-18 22:04:14 -05001992 netif_rx(skb);
1993}
1994
Divy Le Rayb47385b2008-05-21 18:56:26 -07001995static inline int is_eth_tcp(u32 rss)
1996{
1997 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
1998}
1999
2000/**
Divy Le Rayb47385b2008-05-21 18:56:26 -07002001 * lro_add_page - add a page chunk to an LRO session
2002 * @adap: the adapter
2003 * @qs: the associated queue set
2004 * @fl: the free list containing the page chunk to add
2005 * @len: packet length
2006 * @complete: Indicates the last fragment of a frame
2007 *
2008 * Add a received packet contained in a page chunk to an existing LRO
2009 * session.
2010 */
2011static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2012 struct sge_fl *fl, int len, int complete)
2013{
2014 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2015 struct cpl_rx_pkt *cpl;
Herbert Xu7be2df42009-01-21 14:39:13 -08002016 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2017 int nr_frags = qs->lro_frag_tbl.nr_frags;
2018 int frag_len = qs->lro_frag_tbl.len;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002019 int offset = 0;
2020
2021 if (!nr_frags) {
2022 offset = 2 + sizeof(struct cpl_rx_pkt);
2023 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2024 }
2025
2026 fl->credits--;
2027
2028 len -= offset;
2029 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2030 fl->buf_size, PCI_DMA_FROMDEVICE);
2031
Divy Le Rayb2b964f2009-03-12 21:13:59 +00002032 prefetch(&qs->lro_frag_tbl);
2033
Divy Le Rayb47385b2008-05-21 18:56:26 -07002034 rx_frag += nr_frags;
2035 rx_frag->page = sd->pg_chunk.page;
2036 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2037 rx_frag->size = len;
2038 frag_len += len;
Herbert Xu7be2df42009-01-21 14:39:13 -08002039 qs->lro_frag_tbl.nr_frags++;
2040 qs->lro_frag_tbl.len = frag_len;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002041
2042 if (!complete)
2043 return;
2044
Herbert Xu7be2df42009-01-21 14:39:13 -08002045 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002046 cpl = qs->lro_va;
2047
2048 if (unlikely(cpl->vlan_valid)) {
2049 struct net_device *dev = qs->netdev;
2050 struct port_info *pi = netdev_priv(dev);
2051 struct vlan_group *grp = pi->vlan_grp;
2052
2053 if (likely(grp != NULL)) {
Herbert Xu7be2df42009-01-21 14:39:13 -08002054 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2055 &qs->lro_frag_tbl);
2056 goto out;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002057 }
2058 }
Herbert Xu7be2df42009-01-21 14:39:13 -08002059 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002060
Herbert Xu7be2df42009-01-21 14:39:13 -08002061out:
2062 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002063}
2064
Divy Le Ray4d22de32007-01-18 22:04:14 -05002065/**
2066 * handle_rsp_cntrl_info - handles control information in a response
2067 * @qs: the queue set corresponding to the response
2068 * @flags: the response control flags
Divy Le Ray4d22de32007-01-18 22:04:14 -05002069 *
2070 * Handles the control information of an SGE response, such as GTS
2071 * indications and completion credits for the queue set's Tx queues.
Divy Le Ray6195c712007-01-30 19:43:56 -08002072 * HW coalesces credits, we don't do any extra SW coalescing.
Divy Le Ray4d22de32007-01-18 22:04:14 -05002073 */
Divy Le Ray6195c712007-01-30 19:43:56 -08002074static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002075{
2076 unsigned int credits;
2077
2078#if USE_GTS
2079 if (flags & F_RSPD_TXQ0_GTS)
2080 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2081#endif
2082
Divy Le Ray4d22de32007-01-18 22:04:14 -05002083 credits = G_RSPD_TXQ0_CR(flags);
2084 if (credits)
2085 qs->txq[TXQ_ETH].processed += credits;
2086
Divy Le Ray6195c712007-01-30 19:43:56 -08002087 credits = G_RSPD_TXQ2_CR(flags);
2088 if (credits)
2089 qs->txq[TXQ_CTRL].processed += credits;
2090
Divy Le Ray4d22de32007-01-18 22:04:14 -05002091# if USE_GTS
2092 if (flags & F_RSPD_TXQ1_GTS)
2093 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2094# endif
Divy Le Ray6195c712007-01-30 19:43:56 -08002095 credits = G_RSPD_TXQ1_CR(flags);
2096 if (credits)
2097 qs->txq[TXQ_OFLD].processed += credits;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002098}
2099
2100/**
2101 * check_ring_db - check if we need to ring any doorbells
2102 * @adapter: the adapter
2103 * @qs: the queue set whose Tx queues are to be examined
2104 * @sleeping: indicates which Tx queue sent GTS
2105 *
2106 * Checks if some of a queue set's Tx queues need to ring their doorbells
2107 * to resume transmission after idling while they still have unprocessed
2108 * descriptors.
2109 */
2110static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2111 unsigned int sleeping)
2112{
2113 if (sleeping & F_RSPD_TXQ0_GTS) {
2114 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2115
2116 if (txq->cleaned + txq->in_use != txq->processed &&
2117 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2118 set_bit(TXQ_RUNNING, &txq->flags);
2119 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2120 V_EGRCNTX(txq->cntxt_id));
2121 }
2122 }
2123
2124 if (sleeping & F_RSPD_TXQ1_GTS) {
2125 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2126
2127 if (txq->cleaned + txq->in_use != txq->processed &&
2128 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2129 set_bit(TXQ_RUNNING, &txq->flags);
2130 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2131 V_EGRCNTX(txq->cntxt_id));
2132 }
2133 }
2134}
2135
2136/**
2137 * is_new_response - check if a response is newly written
2138 * @r: the response descriptor
2139 * @q: the response queue
2140 *
2141 * Returns true if a response descriptor contains a yet unprocessed
2142 * response.
2143 */
2144static inline int is_new_response(const struct rsp_desc *r,
2145 const struct sge_rspq *q)
2146{
2147 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2148}
2149
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002150static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2151{
2152 q->pg_skb = NULL;
2153 q->rx_recycle_buf = 0;
2154}
2155
Divy Le Ray4d22de32007-01-18 22:04:14 -05002156#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2157#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2158 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2159 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2160 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2161
2162/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2163#define NOMEM_INTR_DELAY 2500
2164
2165/**
2166 * process_responses - process responses from an SGE response queue
2167 * @adap: the adapter
2168 * @qs: the queue set to which the response queue belongs
2169 * @budget: how many responses can be processed in this round
2170 *
2171 * Process responses from an SGE response queue up to the supplied budget.
2172 * Responses include received packets as well as credits and other events
2173 * for the queues that belong to the response queue's queue set.
2174 * A negative budget is effectively unlimited.
2175 *
2176 * Additionally choose the interrupt holdoff time for the next interrupt
2177 * on this queue. If the system is under memory shortage use a fairly
2178 * long delay to help recovery.
2179 */
2180static int process_responses(struct adapter *adap, struct sge_qset *qs,
2181 int budget)
2182{
2183 struct sge_rspq *q = &qs->rspq;
2184 struct rsp_desc *r = &q->desc[q->cidx];
2185 int budget_left = budget;
Divy Le Ray6195c712007-01-30 19:43:56 -08002186 unsigned int sleeping = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002187 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2188 int ngathered = 0;
2189
2190 q->next_holdoff = q->holdoff_tmr;
2191
2192 while (likely(budget_left && is_new_response(r, q))) {
Divy Le Rayb47385b2008-05-21 18:56:26 -07002193 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002194 struct sk_buff *skb = NULL;
2195 u32 len, flags = ntohl(r->flags);
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002196 __be32 rss_hi = *(const __be32 *)r,
2197 rss_lo = r->rss_hdr.rss_hash_val;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002198
2199 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2200
2201 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2202 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2203 if (!skb)
2204 goto no_mem;
2205
2206 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2207 skb->data[0] = CPL_ASYNC_NOTIF;
2208 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2209 q->async_notif++;
2210 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2211 skb = get_imm_packet(r);
2212 if (unlikely(!skb)) {
Divy Le Raycf992af2007-05-30 21:10:47 -07002213no_mem:
Divy Le Ray4d22de32007-01-18 22:04:14 -05002214 q->next_holdoff = NOMEM_INTR_DELAY;
2215 q->nomem++;
2216 /* consume one credit since we tried */
2217 budget_left--;
2218 break;
2219 }
2220 q->imm_data++;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002221 ethpad = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002222 } else if ((len = ntohl(r->len_cq)) != 0) {
Divy Le Raycf992af2007-05-30 21:10:47 -07002223 struct sge_fl *fl;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002224
Divy Le Ray65ab8382009-02-04 16:31:39 -08002225 lro &= eth && is_eth_tcp(rss_hi);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002226
Divy Le Raycf992af2007-05-30 21:10:47 -07002227 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2228 if (fl->use_pages) {
2229 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002230
Divy Le Raycf992af2007-05-30 21:10:47 -07002231 prefetch(addr);
2232#if L1_CACHE_BYTES < 128
2233 prefetch(addr + L1_CACHE_BYTES);
2234#endif
Divy Le Raye0994eb2007-02-24 16:44:17 -08002235 __refill_fl(adap, fl);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002236 if (lro > 0) {
2237 lro_add_page(adap, qs, fl,
2238 G_RSPD_LEN(len),
2239 flags & F_RSPD_EOP);
2240 goto next_fl;
2241 }
Divy Le Raye0994eb2007-02-24 16:44:17 -08002242
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002243 skb = get_packet_pg(adap, fl, q,
2244 G_RSPD_LEN(len),
2245 eth ?
2246 SGE_RX_DROP_THRES : 0);
2247 q->pg_skb = skb;
Divy Le Raycf992af2007-05-30 21:10:47 -07002248 } else
Divy Le Raye0994eb2007-02-24 16:44:17 -08002249 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2250 eth ? SGE_RX_DROP_THRES : 0);
Divy Le Raycf992af2007-05-30 21:10:47 -07002251 if (unlikely(!skb)) {
2252 if (!eth)
2253 goto no_mem;
2254 q->rx_drops++;
2255 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2256 __skb_pull(skb, 2);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002257next_fl:
Divy Le Ray4d22de32007-01-18 22:04:14 -05002258 if (++fl->cidx == fl->size)
2259 fl->cidx = 0;
2260 } else
2261 q->pure_rsps++;
2262
2263 if (flags & RSPD_CTRL_MASK) {
2264 sleeping |= flags & RSPD_GTS_MASK;
Divy Le Ray6195c712007-01-30 19:43:56 -08002265 handle_rsp_cntrl_info(qs, flags);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002266 }
2267
2268 r++;
2269 if (unlikely(++q->cidx == q->size)) {
2270 q->cidx = 0;
2271 q->gen ^= 1;
2272 r = q->desc;
2273 }
2274 prefetch(r);
2275
2276 if (++q->credits >= (q->size / 4)) {
2277 refill_rspq(adap, q, q->credits);
2278 q->credits = 0;
2279 }
2280
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002281 packet_complete = flags &
2282 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2283 F_RSPD_ASYNC_NOTIF);
2284
2285 if (skb != NULL && packet_complete) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05002286 if (eth)
Divy Le Rayb47385b2008-05-21 18:56:26 -07002287 rx_eth(adap, q, skb, ethpad, lro);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002288 else {
Divy Le Rayafefce62007-11-16 11:22:21 -08002289 q->offload_pkts++;
Divy Le Raycf992af2007-05-30 21:10:47 -07002290 /* Preserve the RSS info in csum & priority */
2291 skb->csum = rss_hi;
2292 skb->priority = rss_lo;
2293 ngathered = rx_offload(&adap->tdev, q, skb,
2294 offload_skbs,
Divy Le Raye0994eb2007-02-24 16:44:17 -08002295 ngathered);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002296 }
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002297
2298 if (flags & F_RSPD_EOP)
Divy Le Rayb47385b2008-05-21 18:56:26 -07002299 clear_rspq_bufstate(q);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002300 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05002301 --budget_left;
2302 }
2303
Divy Le Ray4d22de32007-01-18 22:04:14 -05002304 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002305
Divy Le Ray4d22de32007-01-18 22:04:14 -05002306 if (sleeping)
2307 check_ring_db(adap, qs, sleeping);
2308
2309 smp_mb(); /* commit Tx queue .processed updates */
2310 if (unlikely(qs->txq_stopped != 0))
2311 restart_tx(qs);
2312
2313 budget -= budget_left;
2314 return budget;
2315}
2316
2317static inline int is_pure_response(const struct rsp_desc *r)
2318{
Roland Dreierc5419e62008-11-28 21:55:42 -08002319 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002320
2321 return (n | r->len_cq) == 0;
2322}
2323
2324/**
2325 * napi_rx_handler - the NAPI handler for Rx processing
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002326 * @napi: the napi instance
Divy Le Ray4d22de32007-01-18 22:04:14 -05002327 * @budget: how many packets we can process in this round
2328 *
2329 * Handler for new data events when using NAPI.
2330 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002331static int napi_rx_handler(struct napi_struct *napi, int budget)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002332{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002333 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2334 struct adapter *adap = qs->adap;
2335 int work_done = process_responses(adap, qs, budget);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002336
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002337 if (likely(work_done < budget)) {
2338 napi_complete(napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002339
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002340 /*
2341 * Because we don't atomically flush the following
2342 * write it is possible that in very rare cases it can
2343 * reach the device in a way that races with a new
2344 * response being written plus an error interrupt
2345 * causing the NAPI interrupt handler below to return
2346 * unhandled status to the OS. To protect against
2347 * this would require flushing the write and doing
2348 * both the write and the flush with interrupts off.
2349 * Way too expensive and unjustifiable given the
2350 * rarity of the race.
2351 *
2352 * The race cannot happen at all with MSI-X.
2353 */
2354 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2355 V_NEWTIMER(qs->rspq.next_holdoff) |
2356 V_NEWINDEX(qs->rspq.cidx));
2357 }
2358 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002359}
2360
2361/*
2362 * Returns true if the device is already scheduled for polling.
2363 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002364static inline int napi_is_scheduled(struct napi_struct *napi)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002365{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002366 return test_bit(NAPI_STATE_SCHED, &napi->state);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002367}
2368
2369/**
2370 * process_pure_responses - process pure responses from a response queue
2371 * @adap: the adapter
2372 * @qs: the queue set owning the response queue
2373 * @r: the first pure response to process
2374 *
2375 * A simpler version of process_responses() that handles only pure (i.e.,
2376 * non data-carrying) responses. Such respones are too light-weight to
2377 * justify calling a softirq under NAPI, so we handle them specially in
2378 * the interrupt handler. The function is called with a pointer to a
2379 * response, which the caller must ensure is a valid pure response.
2380 *
2381 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2382 */
2383static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2384 struct rsp_desc *r)
2385{
2386 struct sge_rspq *q = &qs->rspq;
Divy Le Ray6195c712007-01-30 19:43:56 -08002387 unsigned int sleeping = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002388
2389 do {
2390 u32 flags = ntohl(r->flags);
2391
2392 r++;
2393 if (unlikely(++q->cidx == q->size)) {
2394 q->cidx = 0;
2395 q->gen ^= 1;
2396 r = q->desc;
2397 }
2398 prefetch(r);
2399
2400 if (flags & RSPD_CTRL_MASK) {
2401 sleeping |= flags & RSPD_GTS_MASK;
Divy Le Ray6195c712007-01-30 19:43:56 -08002402 handle_rsp_cntrl_info(qs, flags);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002403 }
2404
2405 q->pure_rsps++;
2406 if (++q->credits >= (q->size / 4)) {
2407 refill_rspq(adap, q, q->credits);
2408 q->credits = 0;
2409 }
2410 } while (is_new_response(r, q) && is_pure_response(r));
2411
Divy Le Ray4d22de32007-01-18 22:04:14 -05002412 if (sleeping)
2413 check_ring_db(adap, qs, sleeping);
2414
2415 smp_mb(); /* commit Tx queue .processed updates */
2416 if (unlikely(qs->txq_stopped != 0))
2417 restart_tx(qs);
2418
2419 return is_new_response(r, q);
2420}
2421
2422/**
2423 * handle_responses - decide what to do with new responses in NAPI mode
2424 * @adap: the adapter
2425 * @q: the response queue
2426 *
2427 * This is used by the NAPI interrupt handlers to decide what to do with
2428 * new SGE responses. If there are no new responses it returns -1. If
2429 * there are new responses and they are pure (i.e., non-data carrying)
2430 * it handles them straight in hard interrupt context as they are very
2431 * cheap and don't deliver any packets. Finally, if there are any data
2432 * signaling responses it schedules the NAPI handler. Returns 1 if it
2433 * schedules NAPI, 0 if all new responses were pure.
2434 *
2435 * The caller must ascertain NAPI is not already running.
2436 */
2437static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2438{
2439 struct sge_qset *qs = rspq_to_qset(q);
2440 struct rsp_desc *r = &q->desc[q->cidx];
2441
2442 if (!is_new_response(r, q))
2443 return -1;
2444 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2445 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2446 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2447 return 0;
2448 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002449 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002450 return 1;
2451}
2452
2453/*
2454 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2455 * (i.e., response queue serviced in hard interrupt).
2456 */
2457irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2458{
2459 struct sge_qset *qs = cookie;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002460 struct adapter *adap = qs->adap;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002461 struct sge_rspq *q = &qs->rspq;
2462
2463 spin_lock(&q->lock);
2464 if (process_responses(adap, qs, -1) == 0)
2465 q->unhandled_irqs++;
2466 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2467 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2468 spin_unlock(&q->lock);
2469 return IRQ_HANDLED;
2470}
2471
2472/*
2473 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2474 * (i.e., response queue serviced by NAPI polling).
2475 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -07002476static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002477{
2478 struct sge_qset *qs = cookie;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002479 struct sge_rspq *q = &qs->rspq;
2480
2481 spin_lock(&q->lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002482
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002483 if (handle_responses(qs->adap, q) < 0)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002484 q->unhandled_irqs++;
2485 spin_unlock(&q->lock);
2486 return IRQ_HANDLED;
2487}
2488
2489/*
2490 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2491 * SGE response queues as well as error and other async events as they all use
2492 * the same MSI vector. We use one SGE response queue per port in this mode
2493 * and protect all response queues with queue 0's lock.
2494 */
2495static irqreturn_t t3_intr_msi(int irq, void *cookie)
2496{
2497 int new_packets = 0;
2498 struct adapter *adap = cookie;
2499 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2500
2501 spin_lock(&q->lock);
2502
2503 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2504 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2505 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2506 new_packets = 1;
2507 }
2508
2509 if (adap->params.nports == 2 &&
2510 process_responses(adap, &adap->sge.qs[1], -1)) {
2511 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2512
2513 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2514 V_NEWTIMER(q1->next_holdoff) |
2515 V_NEWINDEX(q1->cidx));
2516 new_packets = 1;
2517 }
2518
2519 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2520 q->unhandled_irqs++;
2521
2522 spin_unlock(&q->lock);
2523 return IRQ_HANDLED;
2524}
2525
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002526static int rspq_check_napi(struct sge_qset *qs)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002527{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002528 struct sge_rspq *q = &qs->rspq;
2529
2530 if (!napi_is_scheduled(&qs->napi) &&
2531 is_new_response(&q->desc[q->cidx], q)) {
2532 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002533 return 1;
2534 }
2535 return 0;
2536}
2537
2538/*
2539 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2540 * by NAPI polling). Handles data events from SGE response queues as well as
2541 * error and other async events as they all use the same MSI vector. We use
2542 * one SGE response queue per port in this mode and protect all response
2543 * queues with queue 0's lock.
2544 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -07002545static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002546{
2547 int new_packets;
2548 struct adapter *adap = cookie;
2549 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2550
2551 spin_lock(&q->lock);
2552
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002553 new_packets = rspq_check_napi(&adap->sge.qs[0]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002554 if (adap->params.nports == 2)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002555 new_packets += rspq_check_napi(&adap->sge.qs[1]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002556 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2557 q->unhandled_irqs++;
2558
2559 spin_unlock(&q->lock);
2560 return IRQ_HANDLED;
2561}
2562
2563/*
2564 * A helper function that processes responses and issues GTS.
2565 */
2566static inline int process_responses_gts(struct adapter *adap,
2567 struct sge_rspq *rq)
2568{
2569 int work;
2570
2571 work = process_responses(adap, rspq_to_qset(rq), -1);
2572 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2573 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2574 return work;
2575}
2576
2577/*
2578 * The legacy INTx interrupt handler. This needs to handle data events from
2579 * SGE response queues as well as error and other async events as they all use
2580 * the same interrupt pin. We use one SGE response queue per port in this mode
2581 * and protect all response queues with queue 0's lock.
2582 */
2583static irqreturn_t t3_intr(int irq, void *cookie)
2584{
2585 int work_done, w0, w1;
2586 struct adapter *adap = cookie;
2587 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2588 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2589
2590 spin_lock(&q0->lock);
2591
2592 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2593 w1 = adap->params.nports == 2 &&
2594 is_new_response(&q1->desc[q1->cidx], q1);
2595
2596 if (likely(w0 | w1)) {
2597 t3_write_reg(adap, A_PL_CLI, 0);
2598 t3_read_reg(adap, A_PL_CLI); /* flush */
2599
2600 if (likely(w0))
2601 process_responses_gts(adap, q0);
2602
2603 if (w1)
2604 process_responses_gts(adap, q1);
2605
2606 work_done = w0 | w1;
2607 } else
2608 work_done = t3_slow_intr_handler(adap);
2609
2610 spin_unlock(&q0->lock);
2611 return IRQ_RETVAL(work_done != 0);
2612}
2613
2614/*
2615 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2616 * Handles data events from SGE response queues as well as error and other
2617 * async events as they all use the same interrupt pin. We use one SGE
2618 * response queue per port in this mode and protect all response queues with
2619 * queue 0's lock.
2620 */
2621static irqreturn_t t3b_intr(int irq, void *cookie)
2622{
2623 u32 map;
2624 struct adapter *adap = cookie;
2625 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2626
2627 t3_write_reg(adap, A_PL_CLI, 0);
2628 map = t3_read_reg(adap, A_SG_DATA_INTR);
2629
2630 if (unlikely(!map)) /* shared interrupt, most likely */
2631 return IRQ_NONE;
2632
2633 spin_lock(&q0->lock);
2634
2635 if (unlikely(map & F_ERRINTR))
2636 t3_slow_intr_handler(adap);
2637
2638 if (likely(map & 1))
2639 process_responses_gts(adap, q0);
2640
2641 if (map & 2)
2642 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2643
2644 spin_unlock(&q0->lock);
2645 return IRQ_HANDLED;
2646}
2647
2648/*
2649 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2650 * Handles data events from SGE response queues as well as error and other
2651 * async events as they all use the same interrupt pin. We use one SGE
2652 * response queue per port in this mode and protect all response queues with
2653 * queue 0's lock.
2654 */
2655static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2656{
2657 u32 map;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002658 struct adapter *adap = cookie;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002659 struct sge_qset *qs0 = &adap->sge.qs[0];
2660 struct sge_rspq *q0 = &qs0->rspq;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002661
2662 t3_write_reg(adap, A_PL_CLI, 0);
2663 map = t3_read_reg(adap, A_SG_DATA_INTR);
2664
2665 if (unlikely(!map)) /* shared interrupt, most likely */
2666 return IRQ_NONE;
2667
2668 spin_lock(&q0->lock);
2669
2670 if (unlikely(map & F_ERRINTR))
2671 t3_slow_intr_handler(adap);
2672
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002673 if (likely(map & 1))
2674 napi_schedule(&qs0->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002675
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002676 if (map & 2)
2677 napi_schedule(&adap->sge.qs[1].napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002678
2679 spin_unlock(&q0->lock);
2680 return IRQ_HANDLED;
2681}
2682
2683/**
2684 * t3_intr_handler - select the top-level interrupt handler
2685 * @adap: the adapter
2686 * @polling: whether using NAPI to service response queues
2687 *
2688 * Selects the top-level interrupt handler based on the type of interrupts
2689 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2690 * response queues.
2691 */
Jeff Garzik7c239972007-10-19 03:12:20 -04002692irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002693{
2694 if (adap->flags & USING_MSIX)
2695 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2696 if (adap->flags & USING_MSI)
2697 return polling ? t3_intr_msi_napi : t3_intr_msi;
2698 if (adap->params.rev > 0)
2699 return polling ? t3b_intr_napi : t3b_intr;
2700 return t3_intr;
2701}
2702
Divy Le Rayb8819552007-12-17 18:47:31 -08002703#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2704 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2705 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2706 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2707 F_HIRCQPARITYERROR)
2708#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2709#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2710 F_RSPQDISABLED)
2711
Divy Le Ray4d22de32007-01-18 22:04:14 -05002712/**
2713 * t3_sge_err_intr_handler - SGE async event interrupt handler
2714 * @adapter: the adapter
2715 *
2716 * Interrupt handler for SGE asynchronous (non-data) events.
2717 */
2718void t3_sge_err_intr_handler(struct adapter *adapter)
2719{
2720 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2721
Divy Le Rayb8819552007-12-17 18:47:31 -08002722 if (status & SGE_PARERR)
2723 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2724 status & SGE_PARERR);
2725 if (status & SGE_FRAMINGERR)
2726 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2727 status & SGE_FRAMINGERR);
2728
Divy Le Ray4d22de32007-01-18 22:04:14 -05002729 if (status & F_RSPQCREDITOVERFOW)
2730 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2731
2732 if (status & F_RSPQDISABLED) {
2733 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2734
2735 CH_ALERT(adapter,
2736 "packet delivered to disabled response queue "
2737 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2738 }
2739
Divy Le Ray6e3f03b2007-08-21 20:49:10 -07002740 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2741 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2742 status & F_HIPIODRBDROPERR ? "high" : "lo");
2743
Divy Le Ray4d22de32007-01-18 22:04:14 -05002744 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
Divy Le Rayb8819552007-12-17 18:47:31 -08002745 if (status & SGE_FATALERR)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002746 t3_fatal_err(adapter);
2747}
2748
2749/**
2750 * sge_timer_cb - perform periodic maintenance of an SGE qset
2751 * @data: the SGE queue set to maintain
2752 *
2753 * Runs periodically from a timer to perform maintenance of an SGE queue
2754 * set. It performs two tasks:
2755 *
2756 * a) Cleans up any completed Tx descriptors that may still be pending.
2757 * Normal descriptor cleanup happens when new packets are added to a Tx
2758 * queue so this timer is relatively infrequent and does any cleanup only
2759 * if the Tx queue has not seen any new packets in a while. We make a
2760 * best effort attempt to reclaim descriptors, in that we don't wait
2761 * around if we cannot get a queue's lock (which most likely is because
2762 * someone else is queueing new packets and so will also handle the clean
2763 * up). Since control queues use immediate data exclusively we don't
2764 * bother cleaning them up here.
2765 *
2766 * b) Replenishes Rx queues that have run out due to memory shortage.
2767 * Normally new Rx buffers are added when existing ones are consumed but
2768 * when out of memory a queue can become empty. We try to add only a few
2769 * buffers here, the queue will be replenished fully as these new buffers
2770 * are used up if memory shortage has subsided.
2771 */
2772static void sge_timer_cb(unsigned long data)
2773{
2774 spinlock_t *lock;
2775 struct sge_qset *qs = (struct sge_qset *)data;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002776 struct adapter *adap = qs->adap;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002777
2778 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2779 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2780 spin_unlock(&qs->txq[TXQ_ETH].lock);
2781 }
2782 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2783 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2784 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2785 }
2786 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002787 &adap->sge.qs[0].rspq.lock;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002788 if (spin_trylock_irq(lock)) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002789 if (!napi_is_scheduled(&qs->napi)) {
Divy Le Raybae73f42007-02-24 16:44:12 -08002790 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2791
Divy Le Ray4d22de32007-01-18 22:04:14 -05002792 if (qs->fl[0].credits < qs->fl[0].size)
2793 __refill_fl(adap, &qs->fl[0]);
2794 if (qs->fl[1].credits < qs->fl[1].size)
2795 __refill_fl(adap, &qs->fl[1]);
Divy Le Raybae73f42007-02-24 16:44:12 -08002796
2797 if (status & (1 << qs->rspq.cntxt_id)) {
2798 qs->rspq.starved++;
2799 if (qs->rspq.credits) {
2800 refill_rspq(adap, &qs->rspq, 1);
2801 qs->rspq.credits--;
2802 qs->rspq.restarted++;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002803 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
Divy Le Raybae73f42007-02-24 16:44:12 -08002804 1 << qs->rspq.cntxt_id);
2805 }
2806 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05002807 }
2808 spin_unlock_irq(lock);
2809 }
2810 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2811}
2812
2813/**
2814 * t3_update_qset_coalesce - update coalescing settings for a queue set
2815 * @qs: the SGE queue set
2816 * @p: new queue set parameters
2817 *
2818 * Update the coalescing settings for an SGE queue set. Nothing is done
2819 * if the queue set is not initialized yet.
2820 */
2821void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2822{
Divy Le Ray4d22de32007-01-18 22:04:14 -05002823 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2824 qs->rspq.polling = p->polling;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002825 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002826}
2827
2828/**
2829 * t3_sge_alloc_qset - initialize an SGE queue set
2830 * @adapter: the adapter
2831 * @id: the queue set id
2832 * @nports: how many Ethernet ports will be using this queue set
2833 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2834 * @p: configuration parameters for this queue set
2835 * @ntxq: number of Tx queues for the queue set
2836 * @netdev: net device associated with this queue set
Divy Le Ray82ad3322008-12-16 01:09:39 -08002837 * @netdevq: net device TX queue associated with this queue set
Divy Le Ray4d22de32007-01-18 22:04:14 -05002838 *
2839 * Allocate resources and initialize an SGE queue set. A queue set
2840 * comprises a response queue, two Rx free-buffer queues, and up to 3
2841 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2842 * queue, offload queue, and control queue.
2843 */
2844int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2845 int irq_vec_idx, const struct qset_params *p,
Divy Le Ray82ad3322008-12-16 01:09:39 -08002846 int ntxq, struct net_device *dev,
2847 struct netdev_queue *netdevq)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002848{
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002849 int i, avail, ret = -ENOMEM;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002850 struct sge_qset *q = &adapter->sge.qs[id];
2851
2852 init_qset_cntxt(q, id);
Divy Le Ray20d3fc12008-10-08 17:36:03 -07002853 setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002854
2855 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2856 sizeof(struct rx_desc),
2857 sizeof(struct rx_sw_desc),
2858 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2859 if (!q->fl[0].desc)
2860 goto err;
2861
2862 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2863 sizeof(struct rx_desc),
2864 sizeof(struct rx_sw_desc),
2865 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2866 if (!q->fl[1].desc)
2867 goto err;
2868
2869 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2870 sizeof(struct rsp_desc), 0,
2871 &q->rspq.phys_addr, NULL);
2872 if (!q->rspq.desc)
2873 goto err;
2874
2875 for (i = 0; i < ntxq; ++i) {
2876 /*
2877 * The control queue always uses immediate data so does not
2878 * need to keep track of any sk_buffs.
2879 */
2880 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2881
2882 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2883 sizeof(struct tx_desc), sz,
2884 &q->txq[i].phys_addr,
2885 &q->txq[i].sdesc);
2886 if (!q->txq[i].desc)
2887 goto err;
2888
2889 q->txq[i].gen = 1;
2890 q->txq[i].size = p->txq_size[i];
2891 spin_lock_init(&q->txq[i].lock);
2892 skb_queue_head_init(&q->txq[i].sendq);
2893 }
2894
2895 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2896 (unsigned long)q);
2897 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2898 (unsigned long)q);
2899
2900 q->fl[0].gen = q->fl[1].gen = 1;
2901 q->fl[0].size = p->fl_size;
2902 q->fl[1].size = p->jumbo_size;
2903
2904 q->rspq.gen = 1;
2905 q->rspq.size = p->rspq_size;
2906 spin_lock_init(&q->rspq.lock);
David S. Miller147e70e2008-09-22 01:29:52 -07002907 skb_queue_head_init(&q->rspq.rx_queue);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002908
2909 q->txq[TXQ_ETH].stop_thres = nports *
2910 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2911
Divy Le Raycf992af2007-05-30 21:10:47 -07002912#if FL0_PG_CHUNK_SIZE > 0
2913 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002914#else
Divy Le Raycf992af2007-05-30 21:10:47 -07002915 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
Divy Le Raye0994eb2007-02-24 16:44:17 -08002916#endif
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002917#if FL1_PG_CHUNK_SIZE > 0
2918 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2919#else
Divy Le Raycf992af2007-05-30 21:10:47 -07002920 q->fl[1].buf_size = is_offload(adapter) ?
2921 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2922 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002923#endif
2924
2925 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2926 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2927 q->fl[0].order = FL0_PG_ORDER;
2928 q->fl[1].order = FL1_PG_ORDER;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002929
Roland Dreierb1186de2008-03-20 13:30:48 -07002930 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002931
2932 /* FL threshold comparison uses < */
2933 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2934 q->rspq.phys_addr, q->rspq.size,
2935 q->fl[0].buf_size, 1, 0);
2936 if (ret)
2937 goto err_unlock;
2938
2939 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2940 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2941 q->fl[i].phys_addr, q->fl[i].size,
2942 q->fl[i].buf_size, p->cong_thres, 1,
2943 0);
2944 if (ret)
2945 goto err_unlock;
2946 }
2947
2948 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2949 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2950 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2951 1, 0);
2952 if (ret)
2953 goto err_unlock;
2954
2955 if (ntxq > 1) {
2956 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2957 USE_GTS, SGE_CNTXT_OFLD, id,
2958 q->txq[TXQ_OFLD].phys_addr,
2959 q->txq[TXQ_OFLD].size, 0, 1, 0);
2960 if (ret)
2961 goto err_unlock;
2962 }
2963
2964 if (ntxq > 2) {
2965 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2966 SGE_CNTXT_CTRL, id,
2967 q->txq[TXQ_CTRL].phys_addr,
2968 q->txq[TXQ_CTRL].size,
2969 q->txq[TXQ_CTRL].token, 1, 0);
2970 if (ret)
2971 goto err_unlock;
2972 }
2973
Roland Dreierb1186de2008-03-20 13:30:48 -07002974 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002975
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002976 q->adap = adapter;
2977 q->netdev = dev;
Divy Le Ray82ad3322008-12-16 01:09:39 -08002978 q->tx_q = netdevq;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002979 t3_update_qset_coalesce(q, p);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002980
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002981 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
2982 GFP_KERNEL | __GFP_COMP);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002983 if (!avail) {
2984 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2985 goto err;
2986 }
2987 if (avail < q->fl[0].size)
2988 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2989 avail);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002990
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002991 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
2992 GFP_KERNEL | __GFP_COMP);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002993 if (avail < q->fl[1].size)
2994 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2995 avail);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002996 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2997
2998 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2999 V_NEWTIMER(q->rspq.holdoff_tmr));
3000
3001 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
Divy Le Rayb2b964f2009-03-12 21:13:59 +00003002
Divy Le Ray4d22de32007-01-18 22:04:14 -05003003 return 0;
3004
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07003005err_unlock:
Roland Dreierb1186de2008-03-20 13:30:48 -07003006 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07003007err:
Divy Le Ray4d22de32007-01-18 22:04:14 -05003008 t3_free_qset(adapter, q);
3009 return ret;
3010}
3011
3012/**
Divy Le Ray0ca41c02008-09-25 14:05:28 +00003013 * t3_stop_sge_timers - stop SGE timer call backs
3014 * @adap: the adapter
3015 *
3016 * Stops each SGE queue set's timer call back
3017 */
3018void t3_stop_sge_timers(struct adapter *adap)
3019{
3020 int i;
3021
3022 for (i = 0; i < SGE_QSETS; ++i) {
3023 struct sge_qset *q = &adap->sge.qs[i];
3024
3025 if (q->tx_reclaim_timer.function)
3026 del_timer_sync(&q->tx_reclaim_timer);
3027 }
3028}
3029
3030/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05003031 * t3_free_sge_resources - free SGE resources
3032 * @adap: the adapter
3033 *
3034 * Frees resources used by the SGE queue sets.
3035 */
3036void t3_free_sge_resources(struct adapter *adap)
3037{
3038 int i;
3039
3040 for (i = 0; i < SGE_QSETS; ++i)
3041 t3_free_qset(adap, &adap->sge.qs[i]);
3042}
3043
3044/**
3045 * t3_sge_start - enable SGE
3046 * @adap: the adapter
3047 *
3048 * Enables the SGE for DMAs. This is the last step in starting packet
3049 * transfers.
3050 */
3051void t3_sge_start(struct adapter *adap)
3052{
3053 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3054}
3055
3056/**
3057 * t3_sge_stop - disable SGE operation
3058 * @adap: the adapter
3059 *
3060 * Disables the DMA engine. This can be called in emeregencies (e.g.,
3061 * from error interrupts) or from normal process context. In the latter
3062 * case it also disables any pending queue restart tasklets. Note that
3063 * if it is called in interrupt context it cannot disable the restart
3064 * tasklets as it cannot wait, however the tasklets will have no effect
3065 * since the doorbells are disabled and the driver will call this again
3066 * later from process context, at which time the tasklets will be stopped
3067 * if they are still running.
3068 */
3069void t3_sge_stop(struct adapter *adap)
3070{
3071 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3072 if (!in_interrupt()) {
3073 int i;
3074
3075 for (i = 0; i < SGE_QSETS; ++i) {
3076 struct sge_qset *qs = &adap->sge.qs[i];
3077
3078 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3079 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3080 }
3081 }
3082}
3083
3084/**
3085 * t3_sge_init - initialize SGE
3086 * @adap: the adapter
3087 * @p: the SGE parameters
3088 *
3089 * Performs SGE initialization needed every time after a chip reset.
3090 * We do not initialize any of the queue sets here, instead the driver
3091 * top-level must request those individually. We also do not enable DMA
3092 * here, that should be done after the queues have been set up.
3093 */
3094void t3_sge_init(struct adapter *adap, struct sge_params *p)
3095{
3096 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3097
3098 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
Divy Le Rayb8819552007-12-17 18:47:31 -08003099 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
Divy Le Ray4d22de32007-01-18 22:04:14 -05003100 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3101 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3102#if SGE_NUM_GENBITS == 1
3103 ctrl |= F_EGRGENCTRL;
3104#endif
3105 if (adap->params.rev > 0) {
3106 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3107 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
Divy Le Ray4d22de32007-01-18 22:04:14 -05003108 }
3109 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3110 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3111 V_LORCQDRBTHRSH(512));
3112 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3113 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
Divy Le Ray6195c712007-01-30 19:43:56 -08003114 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
Divy Le Rayb8819552007-12-17 18:47:31 -08003115 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3116 adap->params.rev < T3_REV_C ? 1000 : 500);
Divy Le Ray4d22de32007-01-18 22:04:14 -05003117 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3118 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3119 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3120 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3121 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3122}
3123
3124/**
3125 * t3_sge_prep - one-time SGE initialization
3126 * @adap: the associated adapter
3127 * @p: SGE parameters
3128 *
3129 * Performs one-time initialization of SGE SW state. Includes determining
3130 * defaults for the assorted SGE parameters, which admins can change until
3131 * they are used to initialize the SGE.
3132 */
Roland Dreier7b9b0942008-01-29 14:45:11 -08003133void t3_sge_prep(struct adapter *adap, struct sge_params *p)
Divy Le Ray4d22de32007-01-18 22:04:14 -05003134{
3135 int i;
3136
3137 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3138 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3139
3140 for (i = 0; i < SGE_QSETS; ++i) {
3141 struct qset_params *q = p->qset + i;
3142
3143 q->polling = adap->params.rev > 0;
3144 q->coalesce_usecs = 5;
3145 q->rspq_size = 1024;
Divy Le Raye0994eb2007-02-24 16:44:17 -08003146 q->fl_size = 1024;
Divy Le Ray7385ecf2008-05-21 18:56:21 -07003147 q->jumbo_size = 512;
Divy Le Ray4d22de32007-01-18 22:04:14 -05003148 q->txq_size[TXQ_ETH] = 1024;
3149 q->txq_size[TXQ_OFLD] = 1024;
3150 q->txq_size[TXQ_CTRL] = 256;
3151 q->cong_thres = 0;
3152 }
3153
3154 spin_lock_init(&adap->sge.reg_lock);
3155}
3156
3157/**
3158 * t3_get_desc - dump an SGE descriptor for debugging purposes
3159 * @qs: the queue set
3160 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3161 * @idx: the descriptor index in the queue
3162 * @data: where to dump the descriptor contents
3163 *
3164 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3165 * size of the descriptor.
3166 */
3167int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3168 unsigned char *data)
3169{
3170 if (qnum >= 6)
3171 return -EINVAL;
3172
3173 if (qnum < 3) {
3174 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3175 return -EINVAL;
3176 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3177 return sizeof(struct tx_desc);
3178 }
3179
3180 if (qnum == 3) {
3181 if (!qs->rspq.desc || idx >= qs->rspq.size)
3182 return -EINVAL;
3183 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3184 return sizeof(struct rsp_desc);
3185 }
3186
3187 qnum -= 4;
3188 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3189 return -EINVAL;
3190 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3191 return sizeof(struct rx_desc);
3192}