blob: 90f6f82d3bf238472a9b062822ddb8e6f760a817 [file] [log] [blame]
Divy Le Ray4d22de32007-01-18 22:04:14 -05001/*
Divy Le Raya02d44a2008-10-13 18:47:30 -07002 * Copyright (c) 2005-2008 Chelsio, Inc. All rights reserved.
Divy Le Ray4d22de32007-01-18 22:04:14 -05003 *
Divy Le Ray1d68e932007-01-30 19:44:35 -08004 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
Divy Le Ray4d22de32007-01-18 22:04:14 -05009 *
Divy Le Ray1d68e932007-01-30 19:44:35 -080010 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
Divy Le Ray4d22de32007-01-18 22:04:14 -050031 */
Divy Le Ray4d22de32007-01-18 22:04:14 -050032#include <linux/skbuff.h>
33#include <linux/netdevice.h>
34#include <linux/etherdevice.h>
35#include <linux/if_vlan.h>
36#include <linux/ip.h>
37#include <linux/tcp.h>
38#include <linux/dma-mapping.h>
Karen Xiea109a5b2008-12-18 22:56:20 -080039#include <net/arp.h>
Divy Le Ray4d22de32007-01-18 22:04:14 -050040#include "common.h"
41#include "regs.h"
42#include "sge_defs.h"
43#include "t3_cpl.h"
44#include "firmware_exports.h"
45
46#define USE_GTS 0
47
48#define SGE_RX_SM_BUF_SIZE 1536
Divy Le Raye0994eb2007-02-24 16:44:17 -080049
Divy Le Ray4d22de32007-01-18 22:04:14 -050050#define SGE_RX_COPY_THRES 256
Divy Le Raycf992af2007-05-30 21:10:47 -070051#define SGE_RX_PULL_LEN 128
Divy Le Ray4d22de32007-01-18 22:04:14 -050052
Divy Le Raye0994eb2007-02-24 16:44:17 -080053/*
Divy Le Raycf992af2007-05-30 21:10:47 -070054 * Page chunk size for FL0 buffers if FL0 is to be populated with page chunks.
55 * It must be a divisor of PAGE_SIZE. If set to 0 FL0 will use sk_buffs
56 * directly.
Divy Le Raye0994eb2007-02-24 16:44:17 -080057 */
Divy Le Raycf992af2007-05-30 21:10:47 -070058#define FL0_PG_CHUNK_SIZE 2048
Divy Le Ray7385ecf2008-05-21 18:56:21 -070059#define FL0_PG_ORDER 0
60#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192)
61#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
Divy Le Raycf992af2007-05-30 21:10:47 -070062
Divy Le Raye0994eb2007-02-24 16:44:17 -080063#define SGE_RX_DROP_THRES 16
Divy Le Ray4d22de32007-01-18 22:04:14 -050064
65/*
Divy Le Ray26b38712009-03-12 21:13:43 +000066 * Max number of Rx buffers we replenish at a time.
67 */
68#define MAX_RX_REFILL 16U
69/*
Divy Le Ray4d22de32007-01-18 22:04:14 -050070 * Period of the Tx buffer reclaim timer. This timer does not need to run
71 * frequently as Tx buffers are usually reclaimed by new Tx packets.
72 */
73#define TX_RECLAIM_PERIOD (HZ / 4)
74
75/* WR size in bytes */
76#define WR_LEN (WR_FLITS * 8)
77
78/*
79 * Types of Tx queues in each queue set. Order here matters, do not change.
80 */
81enum { TXQ_ETH, TXQ_OFLD, TXQ_CTRL };
82
83/* Values for sge_txq.flags */
84enum {
85 TXQ_RUNNING = 1 << 0, /* fetch engine is running */
86 TXQ_LAST_PKT_DB = 1 << 1, /* last packet rang the doorbell */
87};
88
89struct tx_desc {
Al Virofb8e4442007-08-23 03:04:12 -040090 __be64 flit[TX_DESC_FLITS];
Divy Le Ray4d22de32007-01-18 22:04:14 -050091};
92
93struct rx_desc {
94 __be32 addr_lo;
95 __be32 len_gen;
96 __be32 gen2;
97 __be32 addr_hi;
98};
99
100struct tx_sw_desc { /* SW state per Tx descriptor */
101 struct sk_buff *skb;
Divy Le Ray23561c92007-11-16 11:22:05 -0800102 u8 eop; /* set if last descriptor for packet */
103 u8 addr_idx; /* buffer index of first SGL entry in descriptor */
104 u8 fragidx; /* first page fragment associated with descriptor */
105 s8 sflit; /* start flit of first SGL entry in descriptor */
Divy Le Ray4d22de32007-01-18 22:04:14 -0500106};
107
Divy Le Raycf992af2007-05-30 21:10:47 -0700108struct rx_sw_desc { /* SW state per Rx descriptor */
Divy Le Raye0994eb2007-02-24 16:44:17 -0800109 union {
110 struct sk_buff *skb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700111 struct fl_pg_chunk pg_chunk;
112 };
113 DECLARE_PCI_UNMAP_ADDR(dma_addr);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500114};
115
116struct rsp_desc { /* response queue descriptor */
117 struct rss_header rss_hdr;
118 __be32 flags;
119 __be32 len_cq;
120 u8 imm_data[47];
121 u8 intr_gen;
122};
123
Divy Le Ray4d22de32007-01-18 22:04:14 -0500124/*
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800125 * Holds unmapping information for Tx packets that need deferred unmapping.
126 * This structure lives at skb->head and must be allocated by callers.
127 */
128struct deferred_unmap_info {
129 struct pci_dev *pdev;
130 dma_addr_t addr[MAX_SKB_FRAGS + 1];
131};
132
133/*
Divy Le Ray4d22de32007-01-18 22:04:14 -0500134 * Maps a number of flits to the number of Tx descriptors that can hold them.
135 * The formula is
136 *
137 * desc = 1 + (flits - 2) / (WR_FLITS - 1).
138 *
139 * HW allows up to 4 descriptors to be combined into a WR.
140 */
141static u8 flit_desc_map[] = {
142 0,
143#if SGE_NUM_GENBITS == 1
144 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
146 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
147 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
148#elif SGE_NUM_GENBITS == 2
149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
150 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
151 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
152 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
153#else
154# error "SGE_NUM_GENBITS must be 1 or 2"
155#endif
156};
157
158static inline struct sge_qset *fl_to_qset(const struct sge_fl *q, int qidx)
159{
160 return container_of(q, struct sge_qset, fl[qidx]);
161}
162
163static inline struct sge_qset *rspq_to_qset(const struct sge_rspq *q)
164{
165 return container_of(q, struct sge_qset, rspq);
166}
167
168static inline struct sge_qset *txq_to_qset(const struct sge_txq *q, int qidx)
169{
170 return container_of(q, struct sge_qset, txq[qidx]);
171}
172
173/**
174 * refill_rspq - replenish an SGE response queue
175 * @adapter: the adapter
176 * @q: the response queue to replenish
177 * @credits: how many new responses to make available
178 *
179 * Replenishes a response queue by making the supplied number of responses
180 * available to HW.
181 */
182static inline void refill_rspq(struct adapter *adapter,
183 const struct sge_rspq *q, unsigned int credits)
184{
Divy Le Rayafefce62007-11-16 11:22:21 -0800185 rmb();
Divy Le Ray4d22de32007-01-18 22:04:14 -0500186 t3_write_reg(adapter, A_SG_RSPQ_CREDIT_RETURN,
187 V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
188}
189
190/**
191 * need_skb_unmap - does the platform need unmapping of sk_buffs?
192 *
193 * Returns true if the platfrom needs sk_buff unmapping. The compiler
194 * optimizes away unecessary code if this returns true.
195 */
196static inline int need_skb_unmap(void)
197{
198 /*
199 * This structure is used to tell if the platfrom needs buffer
200 * unmapping by checking if DECLARE_PCI_UNMAP_ADDR defines anything.
201 */
202 struct dummy {
203 DECLARE_PCI_UNMAP_ADDR(addr);
204 };
205
206 return sizeof(struct dummy) != 0;
207}
208
209/**
210 * unmap_skb - unmap a packet main body and its page fragments
211 * @skb: the packet
212 * @q: the Tx queue containing Tx descriptors for the packet
213 * @cidx: index of Tx descriptor
214 * @pdev: the PCI device
215 *
216 * Unmap the main body of an sk_buff and its page fragments, if any.
217 * Because of the fairly complicated structure of our SGLs and the desire
Divy Le Ray23561c92007-11-16 11:22:05 -0800218 * to conserve space for metadata, the information necessary to unmap an
219 * sk_buff is spread across the sk_buff itself (buffer lengths), the HW Tx
220 * descriptors (the physical addresses of the various data buffers), and
221 * the SW descriptor state (assorted indices). The send functions
222 * initialize the indices for the first packet descriptor so we can unmap
223 * the buffers held in the first Tx descriptor here, and we have enough
224 * information at this point to set the state for the next Tx descriptor.
225 *
226 * Note that it is possible to clean up the first descriptor of a packet
227 * before the send routines have written the next descriptors, but this
228 * race does not cause any problem. We just end up writing the unmapping
229 * info for the descriptor first.
Divy Le Ray4d22de32007-01-18 22:04:14 -0500230 */
231static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q,
232 unsigned int cidx, struct pci_dev *pdev)
233{
234 const struct sg_ent *sgp;
Divy Le Ray23561c92007-11-16 11:22:05 -0800235 struct tx_sw_desc *d = &q->sdesc[cidx];
236 int nfrags, frag_idx, curflit, j = d->addr_idx;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500237
Divy Le Ray23561c92007-11-16 11:22:05 -0800238 sgp = (struct sg_ent *)&q->desc[cidx].flit[d->sflit];
239 frag_idx = d->fragidx;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500240
Divy Le Ray23561c92007-11-16 11:22:05 -0800241 if (frag_idx == 0 && skb_headlen(skb)) {
242 pci_unmap_single(pdev, be64_to_cpu(sgp->addr[0]),
243 skb_headlen(skb), PCI_DMA_TODEVICE);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500244 j = 1;
245 }
246
Divy Le Ray23561c92007-11-16 11:22:05 -0800247 curflit = d->sflit + 1 + j;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500248 nfrags = skb_shinfo(skb)->nr_frags;
249
250 while (frag_idx < nfrags && curflit < WR_FLITS) {
251 pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]),
252 skb_shinfo(skb)->frags[frag_idx].size,
253 PCI_DMA_TODEVICE);
254 j ^= 1;
255 if (j == 0) {
256 sgp++;
257 curflit++;
258 }
259 curflit++;
260 frag_idx++;
261 }
262
Divy Le Ray23561c92007-11-16 11:22:05 -0800263 if (frag_idx < nfrags) { /* SGL continues into next Tx descriptor */
264 d = cidx + 1 == q->size ? q->sdesc : d + 1;
265 d->fragidx = frag_idx;
266 d->addr_idx = j;
267 d->sflit = curflit - WR_FLITS - j; /* sflit can be -1 */
Divy Le Ray4d22de32007-01-18 22:04:14 -0500268 }
269}
270
271/**
272 * free_tx_desc - reclaims Tx descriptors and their buffers
273 * @adapter: the adapter
274 * @q: the Tx queue to reclaim descriptors from
275 * @n: the number of descriptors to reclaim
276 *
277 * Reclaims Tx descriptors from an SGE Tx queue and frees the associated
278 * Tx buffers. Called with the Tx queue lock held.
279 */
280static void free_tx_desc(struct adapter *adapter, struct sge_txq *q,
281 unsigned int n)
282{
283 struct tx_sw_desc *d;
284 struct pci_dev *pdev = adapter->pdev;
285 unsigned int cidx = q->cidx;
286
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800287 const int need_unmap = need_skb_unmap() &&
288 q->cntxt_id >= FW_TUNNEL_SGEEC_START;
289
Divy Le Ray4d22de32007-01-18 22:04:14 -0500290 d = &q->sdesc[cidx];
291 while (n--) {
292 if (d->skb) { /* an SGL is present */
Divy Le Ray99d7cf32007-02-24 16:44:06 -0800293 if (need_unmap)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500294 unmap_skb(d->skb, q, cidx, pdev);
Divy Le Ray23561c92007-11-16 11:22:05 -0800295 if (d->eop)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500296 kfree_skb(d->skb);
297 }
298 ++d;
299 if (++cidx == q->size) {
300 cidx = 0;
301 d = q->sdesc;
302 }
303 }
304 q->cidx = cidx;
305}
306
307/**
308 * reclaim_completed_tx - reclaims completed Tx descriptors
309 * @adapter: the adapter
310 * @q: the Tx queue to reclaim completed descriptors from
311 *
312 * Reclaims Tx descriptors that the SGE has indicated it has processed,
313 * and frees the associated buffers if possible. Called with the Tx
314 * queue's lock held.
315 */
316static inline void reclaim_completed_tx(struct adapter *adapter,
317 struct sge_txq *q)
318{
319 unsigned int reclaim = q->processed - q->cleaned;
320
321 if (reclaim) {
322 free_tx_desc(adapter, q, reclaim);
323 q->cleaned += reclaim;
324 q->in_use -= reclaim;
325 }
326}
327
328/**
329 * should_restart_tx - are there enough resources to restart a Tx queue?
330 * @q: the Tx queue
331 *
332 * Checks if there are enough descriptors to restart a suspended Tx queue.
333 */
334static inline int should_restart_tx(const struct sge_txq *q)
335{
336 unsigned int r = q->processed - q->cleaned;
337
338 return q->in_use - r < (q->size >> 1);
339}
340
Divy Le Ray9bb2b312009-03-12 21:13:49 +0000341static void clear_rx_desc(const struct sge_fl *q, struct rx_sw_desc *d)
342{
343 if (q->use_pages) {
344 if (d->pg_chunk.page)
345 put_page(d->pg_chunk.page);
346 d->pg_chunk.page = NULL;
347 } else {
348 kfree_skb(d->skb);
349 d->skb = NULL;
350 }
351}
352
Divy Le Ray4d22de32007-01-18 22:04:14 -0500353/**
354 * free_rx_bufs - free the Rx buffers on an SGE free list
355 * @pdev: the PCI device associated with the adapter
356 * @rxq: the SGE free list to clean up
357 *
358 * Release the buffers on an SGE free-buffer Rx queue. HW fetching from
359 * this queue should be stopped before calling this function.
360 */
361static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q)
362{
363 unsigned int cidx = q->cidx;
364
365 while (q->credits--) {
366 struct rx_sw_desc *d = &q->sdesc[cidx];
367
368 pci_unmap_single(pdev, pci_unmap_addr(d, dma_addr),
369 q->buf_size, PCI_DMA_FROMDEVICE);
Divy Le Ray9bb2b312009-03-12 21:13:49 +0000370 clear_rx_desc(q, d);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500371 if (++cidx == q->size)
372 cidx = 0;
373 }
Divy Le Raye0994eb2007-02-24 16:44:17 -0800374
Divy Le Raycf992af2007-05-30 21:10:47 -0700375 if (q->pg_chunk.page) {
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700376 __free_pages(q->pg_chunk.page, q->order);
Divy Le Raycf992af2007-05-30 21:10:47 -0700377 q->pg_chunk.page = NULL;
378 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500379}
380
381/**
382 * add_one_rx_buf - add a packet buffer to a free-buffer list
Divy Le Raycf992af2007-05-30 21:10:47 -0700383 * @va: buffer start VA
Divy Le Ray4d22de32007-01-18 22:04:14 -0500384 * @len: the buffer length
385 * @d: the HW Rx descriptor to write
386 * @sd: the SW Rx descriptor to write
387 * @gen: the generation bit value
388 * @pdev: the PCI device associated with the adapter
389 *
390 * Add a buffer of the given length to the supplied HW and SW Rx
391 * descriptors.
392 */
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700393static inline int add_one_rx_buf(void *va, unsigned int len,
394 struct rx_desc *d, struct rx_sw_desc *sd,
395 unsigned int gen, struct pci_dev *pdev)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500396{
397 dma_addr_t mapping;
398
Divy Le Raye0994eb2007-02-24 16:44:17 -0800399 mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE);
FUJITA Tomonori8d8bb392008-07-25 19:44:49 -0700400 if (unlikely(pci_dma_mapping_error(pdev, mapping)))
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700401 return -ENOMEM;
402
Divy Le Ray4d22de32007-01-18 22:04:14 -0500403 pci_unmap_addr_set(sd, dma_addr, mapping);
404
405 d->addr_lo = cpu_to_be32(mapping);
406 d->addr_hi = cpu_to_be32((u64) mapping >> 32);
407 wmb();
408 d->len_gen = cpu_to_be32(V_FLD_GEN1(gen));
409 d->gen2 = cpu_to_be32(V_FLD_GEN2(gen));
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700410 return 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500411}
412
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700413static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp,
414 unsigned int order)
Divy Le Raycf992af2007-05-30 21:10:47 -0700415{
416 if (!q->pg_chunk.page) {
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700417 q->pg_chunk.page = alloc_pages(gfp, order);
Divy Le Raycf992af2007-05-30 21:10:47 -0700418 if (unlikely(!q->pg_chunk.page))
419 return -ENOMEM;
420 q->pg_chunk.va = page_address(q->pg_chunk.page);
421 q->pg_chunk.offset = 0;
422 }
423 sd->pg_chunk = q->pg_chunk;
424
425 q->pg_chunk.offset += q->buf_size;
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700426 if (q->pg_chunk.offset == (PAGE_SIZE << order))
Divy Le Raycf992af2007-05-30 21:10:47 -0700427 q->pg_chunk.page = NULL;
428 else {
429 q->pg_chunk.va += q->buf_size;
430 get_page(q->pg_chunk.page);
431 }
432 return 0;
433}
434
Divy Le Ray26b38712009-03-12 21:13:43 +0000435static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
436{
437 if (q->pend_cred >= q->credits / 4) {
438 q->pend_cred = 0;
439 t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
440 }
441}
442
Divy Le Ray4d22de32007-01-18 22:04:14 -0500443/**
444 * refill_fl - refill an SGE free-buffer list
445 * @adapter: the adapter
446 * @q: the free-list to refill
447 * @n: the number of new buffers to allocate
448 * @gfp: the gfp flags for allocating new buffers
449 *
450 * (Re)populate an SGE free-buffer list with up to @n new packet buffers,
451 * allocated with the supplied gfp flags. The caller must assure that
452 * @n does not exceed the queue's capacity.
453 */
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700454static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500455{
Divy Le Raycf992af2007-05-30 21:10:47 -0700456 void *buf_start;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500457 struct rx_sw_desc *sd = &q->sdesc[q->pidx];
458 struct rx_desc *d = &q->desc[q->pidx];
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700459 unsigned int count = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500460
461 while (n--) {
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700462 int err;
463
Divy Le Raycf992af2007-05-30 21:10:47 -0700464 if (q->use_pages) {
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700465 if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) {
Divy Le Raycf992af2007-05-30 21:10:47 -0700466nomem: q->alloc_failed++;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800467 break;
468 }
Divy Le Raycf992af2007-05-30 21:10:47 -0700469 buf_start = sd->pg_chunk.va;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800470 } else {
Divy Le Raycf992af2007-05-30 21:10:47 -0700471 struct sk_buff *skb = alloc_skb(q->buf_size, gfp);
Divy Le Raye0994eb2007-02-24 16:44:17 -0800472
Divy Le Raycf992af2007-05-30 21:10:47 -0700473 if (!skb)
474 goto nomem;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800475
Divy Le Raycf992af2007-05-30 21:10:47 -0700476 sd->skb = skb;
477 buf_start = skb->data;
Divy Le Raye0994eb2007-02-24 16:44:17 -0800478 }
479
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700480 err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen,
481 adap->pdev);
482 if (unlikely(err)) {
Divy Le Ray9bb2b312009-03-12 21:13:49 +0000483 clear_rx_desc(q, sd);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700484 break;
485 }
486
Divy Le Ray4d22de32007-01-18 22:04:14 -0500487 d++;
488 sd++;
489 if (++q->pidx == q->size) {
490 q->pidx = 0;
491 q->gen ^= 1;
492 sd = q->sdesc;
493 d = q->desc;
494 }
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700495 count++;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500496 }
Divy Le Ray26b38712009-03-12 21:13:43 +0000497
498 q->credits += count;
499 q->pend_cred += count;
500 ring_fl_db(adap, q);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -0700501
502 return count;
Divy Le Ray4d22de32007-01-18 22:04:14 -0500503}
504
505static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl)
506{
Divy Le Ray26b38712009-03-12 21:13:43 +0000507 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits),
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700508 GFP_ATOMIC | __GFP_COMP);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500509}
510
511/**
512 * recycle_rx_buf - recycle a receive buffer
513 * @adapter: the adapter
514 * @q: the SGE free list
515 * @idx: index of buffer to recycle
516 *
517 * Recycles the specified buffer on the given free list by adding it at
518 * the next available slot on the list.
519 */
520static void recycle_rx_buf(struct adapter *adap, struct sge_fl *q,
521 unsigned int idx)
522{
523 struct rx_desc *from = &q->desc[idx];
524 struct rx_desc *to = &q->desc[q->pidx];
525
Divy Le Raycf992af2007-05-30 21:10:47 -0700526 q->sdesc[q->pidx] = q->sdesc[idx];
Divy Le Ray4d22de32007-01-18 22:04:14 -0500527 to->addr_lo = from->addr_lo; /* already big endian */
528 to->addr_hi = from->addr_hi; /* likewise */
529 wmb();
530 to->len_gen = cpu_to_be32(V_FLD_GEN1(q->gen));
531 to->gen2 = cpu_to_be32(V_FLD_GEN2(q->gen));
Divy Le Ray4d22de32007-01-18 22:04:14 -0500532
533 if (++q->pidx == q->size) {
534 q->pidx = 0;
535 q->gen ^= 1;
536 }
Divy Le Ray26b38712009-03-12 21:13:43 +0000537
538 q->credits++;
539 q->pend_cred++;
540 ring_fl_db(adap, q);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500541}
542
543/**
544 * alloc_ring - allocate resources for an SGE descriptor ring
545 * @pdev: the PCI device
546 * @nelem: the number of descriptors
547 * @elem_size: the size of each descriptor
548 * @sw_size: the size of the SW state associated with each ring element
549 * @phys: the physical address of the allocated ring
550 * @metadata: address of the array holding the SW state for the ring
551 *
552 * Allocates resources for an SGE descriptor ring, such as Tx queues,
553 * free buffer lists, or response queues. Each SGE ring requires
554 * space for its HW descriptors plus, optionally, space for the SW state
555 * associated with each HW entry (the metadata). The function returns
556 * three values: the virtual address for the HW ring (the return value
557 * of the function), the physical address of the HW ring, and the address
558 * of the SW ring.
559 */
560static void *alloc_ring(struct pci_dev *pdev, size_t nelem, size_t elem_size,
Divy Le Raye0994eb2007-02-24 16:44:17 -0800561 size_t sw_size, dma_addr_t * phys, void *metadata)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500562{
563 size_t len = nelem * elem_size;
564 void *s = NULL;
565 void *p = dma_alloc_coherent(&pdev->dev, len, phys, GFP_KERNEL);
566
567 if (!p)
568 return NULL;
Divy Le Ray52565542008-11-26 15:35:59 -0800569 if (sw_size && metadata) {
Divy Le Ray4d22de32007-01-18 22:04:14 -0500570 s = kcalloc(nelem, sw_size, GFP_KERNEL);
571
572 if (!s) {
573 dma_free_coherent(&pdev->dev, len, p, *phys);
574 return NULL;
575 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500576 *(void **)metadata = s;
Divy Le Ray52565542008-11-26 15:35:59 -0800577 }
Divy Le Ray4d22de32007-01-18 22:04:14 -0500578 memset(p, 0, len);
579 return p;
580}
581
582/**
Divy Le Ray204e2f92008-05-06 19:26:01 -0700583 * t3_reset_qset - reset a sge qset
584 * @q: the queue set
585 *
586 * Reset the qset structure.
587 * the NAPI structure is preserved in the event of
588 * the qset's reincarnation, for example during EEH recovery.
589 */
590static void t3_reset_qset(struct sge_qset *q)
591{
592 if (q->adap &&
593 !(q->adap->flags & NAPI_INIT)) {
594 memset(q, 0, sizeof(*q));
595 return;
596 }
597
598 q->adap = NULL;
599 memset(&q->rspq, 0, sizeof(q->rspq));
600 memset(q->fl, 0, sizeof(struct sge_fl) * SGE_RXQ_PER_SET);
601 memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET);
602 q->txq_stopped = 0;
Divy Le Ray20d3fc12008-10-08 17:36:03 -0700603 q->tx_reclaim_timer.function = NULL; /* for t3_stop_sge_timers() */
Herbert Xu7be2df42009-01-21 14:39:13 -0800604 q->lro_frag_tbl.nr_frags = q->lro_frag_tbl.len = 0;
Divy Le Ray204e2f92008-05-06 19:26:01 -0700605}
606
607
608/**
Divy Le Ray4d22de32007-01-18 22:04:14 -0500609 * free_qset - free the resources of an SGE queue set
610 * @adapter: the adapter owning the queue set
611 * @q: the queue set
612 *
613 * Release the HW and SW resources associated with an SGE queue set, such
614 * as HW contexts, packet buffers, and descriptor rings. Traffic to the
615 * queue set must be quiesced prior to calling this.
616 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -0700617static void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500618{
619 int i;
620 struct pci_dev *pdev = adapter->pdev;
621
Divy Le Ray4d22de32007-01-18 22:04:14 -0500622 for (i = 0; i < SGE_RXQ_PER_SET; ++i)
623 if (q->fl[i].desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700624 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500625 t3_sge_disable_fl(adapter, q->fl[i].cntxt_id);
Roland Dreierb1186de2008-03-20 13:30:48 -0700626 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500627 free_rx_bufs(pdev, &q->fl[i]);
628 kfree(q->fl[i].sdesc);
629 dma_free_coherent(&pdev->dev,
630 q->fl[i].size *
631 sizeof(struct rx_desc), q->fl[i].desc,
632 q->fl[i].phys_addr);
633 }
634
635 for (i = 0; i < SGE_TXQ_PER_SET; ++i)
636 if (q->txq[i].desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700637 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500638 t3_sge_enable_ecntxt(adapter, q->txq[i].cntxt_id, 0);
Roland Dreierb1186de2008-03-20 13:30:48 -0700639 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500640 if (q->txq[i].sdesc) {
641 free_tx_desc(adapter, &q->txq[i],
642 q->txq[i].in_use);
643 kfree(q->txq[i].sdesc);
644 }
645 dma_free_coherent(&pdev->dev,
646 q->txq[i].size *
647 sizeof(struct tx_desc),
648 q->txq[i].desc, q->txq[i].phys_addr);
649 __skb_queue_purge(&q->txq[i].sendq);
650 }
651
652 if (q->rspq.desc) {
Roland Dreierb1186de2008-03-20 13:30:48 -0700653 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500654 t3_sge_disable_rspcntxt(adapter, q->rspq.cntxt_id);
Roland Dreierb1186de2008-03-20 13:30:48 -0700655 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500656 dma_free_coherent(&pdev->dev,
657 q->rspq.size * sizeof(struct rsp_desc),
658 q->rspq.desc, q->rspq.phys_addr);
659 }
660
Divy Le Ray204e2f92008-05-06 19:26:01 -0700661 t3_reset_qset(q);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500662}
663
664/**
665 * init_qset_cntxt - initialize an SGE queue set context info
666 * @qs: the queue set
667 * @id: the queue set id
668 *
669 * Initializes the TIDs and context ids for the queues of a queue set.
670 */
671static void init_qset_cntxt(struct sge_qset *qs, unsigned int id)
672{
673 qs->rspq.cntxt_id = id;
674 qs->fl[0].cntxt_id = 2 * id;
675 qs->fl[1].cntxt_id = 2 * id + 1;
676 qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
677 qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
678 qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
679 qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
680 qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
681}
682
683/**
684 * sgl_len - calculates the size of an SGL of the given capacity
685 * @n: the number of SGL entries
686 *
687 * Calculates the number of flits needed for a scatter/gather list that
688 * can hold the given number of entries.
689 */
690static inline unsigned int sgl_len(unsigned int n)
691{
692 /* alternatively: 3 * (n / 2) + 2 * (n & 1) */
693 return (3 * n) / 2 + (n & 1);
694}
695
696/**
697 * flits_to_desc - returns the num of Tx descriptors for the given flits
698 * @n: the number of flits
699 *
700 * Calculates the number of Tx descriptors needed for the supplied number
701 * of flits.
702 */
703static inline unsigned int flits_to_desc(unsigned int n)
704{
705 BUG_ON(n >= ARRAY_SIZE(flit_desc_map));
706 return flit_desc_map[n];
707}
708
709/**
Divy Le Raycf992af2007-05-30 21:10:47 -0700710 * get_packet - return the next ingress packet buffer from a free list
711 * @adap: the adapter that received the packet
712 * @fl: the SGE free list holding the packet
713 * @len: the packet length including any SGE padding
714 * @drop_thres: # of remaining buffers before we start dropping packets
715 *
716 * Get the next packet from a free list and complete setup of the
717 * sk_buff. If the packet is small we make a copy and recycle the
718 * original buffer, otherwise we use the original buffer itself. If a
719 * positive drop threshold is supplied packets are dropped and their
720 * buffers recycled if (a) the number of remaining buffers is under the
721 * threshold and the packet is too big to copy, or (b) the packet should
722 * be copied but there is no memory for the copy.
723 */
724static struct sk_buff *get_packet(struct adapter *adap, struct sge_fl *fl,
725 unsigned int len, unsigned int drop_thres)
726{
727 struct sk_buff *skb = NULL;
728 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
729
730 prefetch(sd->skb->data);
731 fl->credits--;
732
733 if (len <= SGE_RX_COPY_THRES) {
734 skb = alloc_skb(len, GFP_ATOMIC);
735 if (likely(skb != NULL)) {
736 __skb_put(skb, len);
737 pci_dma_sync_single_for_cpu(adap->pdev,
738 pci_unmap_addr(sd, dma_addr), len,
739 PCI_DMA_FROMDEVICE);
740 memcpy(skb->data, sd->skb->data, len);
741 pci_dma_sync_single_for_device(adap->pdev,
742 pci_unmap_addr(sd, dma_addr), len,
743 PCI_DMA_FROMDEVICE);
744 } else if (!drop_thres)
745 goto use_orig_buf;
746recycle:
747 recycle_rx_buf(adap, fl, fl->cidx);
748 return skb;
749 }
750
Divy Le Ray26b38712009-03-12 21:13:43 +0000751 if (unlikely(fl->credits < drop_thres) &&
752 refill_fl(adap, fl, min(MAX_RX_REFILL, fl->size - fl->credits - 1),
753 GFP_ATOMIC | __GFP_COMP) == 0)
Divy Le Raycf992af2007-05-30 21:10:47 -0700754 goto recycle;
755
756use_orig_buf:
757 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
758 fl->buf_size, PCI_DMA_FROMDEVICE);
759 skb = sd->skb;
760 skb_put(skb, len);
761 __refill_fl(adap, fl);
762 return skb;
763}
764
765/**
766 * get_packet_pg - return the next ingress packet buffer from a free list
767 * @adap: the adapter that received the packet
768 * @fl: the SGE free list holding the packet
769 * @len: the packet length including any SGE padding
770 * @drop_thres: # of remaining buffers before we start dropping packets
771 *
772 * Get the next packet from a free list populated with page chunks.
773 * If the packet is small we make a copy and recycle the original buffer,
774 * otherwise we attach the original buffer as a page fragment to a fresh
775 * sk_buff. If a positive drop threshold is supplied packets are dropped
776 * and their buffers recycled if (a) the number of remaining buffers is
777 * under the threshold and the packet is too big to copy, or (b) there's
778 * no system memory.
779 *
780 * Note: this function is similar to @get_packet but deals with Rx buffers
781 * that are page chunks rather than sk_buffs.
782 */
783static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl,
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700784 struct sge_rspq *q, unsigned int len,
785 unsigned int drop_thres)
Divy Le Raycf992af2007-05-30 21:10:47 -0700786{
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700787 struct sk_buff *newskb, *skb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700788 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
789
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700790 newskb = skb = q->pg_skb;
791
792 if (!skb && (len <= SGE_RX_COPY_THRES)) {
793 newskb = alloc_skb(len, GFP_ATOMIC);
794 if (likely(newskb != NULL)) {
795 __skb_put(newskb, len);
Divy Le Raycf992af2007-05-30 21:10:47 -0700796 pci_dma_sync_single_for_cpu(adap->pdev,
797 pci_unmap_addr(sd, dma_addr), len,
798 PCI_DMA_FROMDEVICE);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700799 memcpy(newskb->data, sd->pg_chunk.va, len);
Divy Le Raycf992af2007-05-30 21:10:47 -0700800 pci_dma_sync_single_for_device(adap->pdev,
801 pci_unmap_addr(sd, dma_addr), len,
802 PCI_DMA_FROMDEVICE);
803 } else if (!drop_thres)
804 return NULL;
805recycle:
806 fl->credits--;
807 recycle_rx_buf(adap, fl, fl->cidx);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700808 q->rx_recycle_buf++;
809 return newskb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700810 }
811
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700812 if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres)))
Divy Le Raycf992af2007-05-30 21:10:47 -0700813 goto recycle;
814
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700815 if (!skb)
Divy Le Rayb47385b2008-05-21 18:56:26 -0700816 newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700817 if (unlikely(!newskb)) {
Divy Le Raycf992af2007-05-30 21:10:47 -0700818 if (!drop_thres)
819 return NULL;
820 goto recycle;
821 }
822
823 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
824 fl->buf_size, PCI_DMA_FROMDEVICE);
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700825 if (!skb) {
826 __skb_put(newskb, SGE_RX_PULL_LEN);
827 memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN);
828 skb_fill_page_desc(newskb, 0, sd->pg_chunk.page,
829 sd->pg_chunk.offset + SGE_RX_PULL_LEN,
830 len - SGE_RX_PULL_LEN);
831 newskb->len = len;
832 newskb->data_len = len - SGE_RX_PULL_LEN;
Divy Le Ray8f435802009-03-12 21:13:54 +0000833 newskb->truesize += newskb->data_len;
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700834 } else {
835 skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags,
836 sd->pg_chunk.page,
837 sd->pg_chunk.offset, len);
838 newskb->len += len;
839 newskb->data_len += len;
Divy Le Ray8f435802009-03-12 21:13:54 +0000840 newskb->truesize += len;
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700841 }
Divy Le Raycf992af2007-05-30 21:10:47 -0700842
843 fl->credits--;
844 /*
845 * We do not refill FLs here, we let the caller do it to overlap a
846 * prefetch.
847 */
Divy Le Ray7385ecf2008-05-21 18:56:21 -0700848 return newskb;
Divy Le Raycf992af2007-05-30 21:10:47 -0700849}
850
851/**
Divy Le Ray4d22de32007-01-18 22:04:14 -0500852 * get_imm_packet - return the next ingress packet buffer from a response
853 * @resp: the response descriptor containing the packet data
854 *
855 * Return a packet containing the immediate data of the given response.
856 */
857static inline struct sk_buff *get_imm_packet(const struct rsp_desc *resp)
858{
859 struct sk_buff *skb = alloc_skb(IMMED_PKT_SIZE, GFP_ATOMIC);
860
861 if (skb) {
862 __skb_put(skb, IMMED_PKT_SIZE);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -0300863 skb_copy_to_linear_data(skb, resp->imm_data, IMMED_PKT_SIZE);
Divy Le Ray4d22de32007-01-18 22:04:14 -0500864 }
865 return skb;
866}
867
868/**
869 * calc_tx_descs - calculate the number of Tx descriptors for a packet
870 * @skb: the packet
871 *
872 * Returns the number of Tx descriptors needed for the given Ethernet
873 * packet. Ethernet packets require addition of WR and CPL headers.
874 */
875static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
876{
877 unsigned int flits;
878
879 if (skb->len <= WR_LEN - sizeof(struct cpl_tx_pkt))
880 return 1;
881
882 flits = sgl_len(skb_shinfo(skb)->nr_frags + 1) + 2;
883 if (skb_shinfo(skb)->gso_size)
884 flits++;
885 return flits_to_desc(flits);
886}
887
888/**
889 * make_sgl - populate a scatter/gather list for a packet
890 * @skb: the packet
891 * @sgp: the SGL to populate
892 * @start: start address of skb main body data to include in the SGL
893 * @len: length of skb main body data to include in the SGL
894 * @pdev: the PCI device
895 *
896 * Generates a scatter/gather list for the buffers that make up a packet
897 * and returns the SGL size in 8-byte words. The caller must size the SGL
898 * appropriately.
899 */
900static inline unsigned int make_sgl(const struct sk_buff *skb,
901 struct sg_ent *sgp, unsigned char *start,
902 unsigned int len, struct pci_dev *pdev)
903{
904 dma_addr_t mapping;
905 unsigned int i, j = 0, nfrags;
906
907 if (len) {
908 mapping = pci_map_single(pdev, start, len, PCI_DMA_TODEVICE);
909 sgp->len[0] = cpu_to_be32(len);
910 sgp->addr[0] = cpu_to_be64(mapping);
911 j = 1;
912 }
913
914 nfrags = skb_shinfo(skb)->nr_frags;
915 for (i = 0; i < nfrags; i++) {
916 skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
917
918 mapping = pci_map_page(pdev, frag->page, frag->page_offset,
919 frag->size, PCI_DMA_TODEVICE);
920 sgp->len[j] = cpu_to_be32(frag->size);
921 sgp->addr[j] = cpu_to_be64(mapping);
922 j ^= 1;
923 if (j == 0)
924 ++sgp;
925 }
926 if (j)
927 sgp->len[j] = 0;
928 return ((nfrags + (len != 0)) * 3) / 2 + j;
929}
930
931/**
932 * check_ring_tx_db - check and potentially ring a Tx queue's doorbell
933 * @adap: the adapter
934 * @q: the Tx queue
935 *
936 * Ring the doorbel if a Tx queue is asleep. There is a natural race,
937 * where the HW is going to sleep just after we checked, however,
938 * then the interrupt handler will detect the outstanding TX packet
939 * and ring the doorbell for us.
940 *
941 * When GTS is disabled we unconditionally ring the doorbell.
942 */
943static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
944{
945#if USE_GTS
946 clear_bit(TXQ_LAST_PKT_DB, &q->flags);
947 if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
948 set_bit(TXQ_LAST_PKT_DB, &q->flags);
949 t3_write_reg(adap, A_SG_KDOORBELL,
950 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
951 }
952#else
953 wmb(); /* write descriptors before telling HW */
954 t3_write_reg(adap, A_SG_KDOORBELL,
955 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
956#endif
957}
958
959static inline void wr_gen2(struct tx_desc *d, unsigned int gen)
960{
961#if SGE_NUM_GENBITS == 2
962 d->flit[TX_DESC_FLITS - 1] = cpu_to_be64(gen);
963#endif
964}
965
966/**
967 * write_wr_hdr_sgl - write a WR header and, optionally, SGL
968 * @ndesc: number of Tx descriptors spanned by the SGL
969 * @skb: the packet corresponding to the WR
970 * @d: first Tx descriptor to be written
971 * @pidx: index of above descriptors
972 * @q: the SGE Tx queue
973 * @sgl: the SGL
974 * @flits: number of flits to the start of the SGL in the first descriptor
975 * @sgl_flits: the SGL size in flits
976 * @gen: the Tx descriptor generation
977 * @wr_hi: top 32 bits of WR header based on WR type (big endian)
978 * @wr_lo: low 32 bits of WR header based on WR type (big endian)
979 *
980 * Write a work request header and an associated SGL. If the SGL is
981 * small enough to fit into one Tx descriptor it has already been written
982 * and we just need to write the WR header. Otherwise we distribute the
983 * SGL across the number of descriptors it spans.
984 */
985static void write_wr_hdr_sgl(unsigned int ndesc, struct sk_buff *skb,
986 struct tx_desc *d, unsigned int pidx,
987 const struct sge_txq *q,
988 const struct sg_ent *sgl,
989 unsigned int flits, unsigned int sgl_flits,
Al Virofb8e4442007-08-23 03:04:12 -0400990 unsigned int gen, __be32 wr_hi,
991 __be32 wr_lo)
Divy Le Ray4d22de32007-01-18 22:04:14 -0500992{
993 struct work_request_hdr *wrp = (struct work_request_hdr *)d;
994 struct tx_sw_desc *sd = &q->sdesc[pidx];
995
996 sd->skb = skb;
997 if (need_skb_unmap()) {
Divy Le Ray23561c92007-11-16 11:22:05 -0800998 sd->fragidx = 0;
999 sd->addr_idx = 0;
1000 sd->sflit = flits;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001001 }
1002
1003 if (likely(ndesc == 1)) {
Divy Le Ray23561c92007-11-16 11:22:05 -08001004 sd->eop = 1;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001005 wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1006 V_WR_SGLSFLT(flits)) | wr_hi;
1007 wmb();
1008 wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1009 V_WR_GEN(gen)) | wr_lo;
1010 wr_gen2(d, gen);
1011 } else {
1012 unsigned int ogen = gen;
1013 const u64 *fp = (const u64 *)sgl;
1014 struct work_request_hdr *wp = wrp;
1015
1016 wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1017 V_WR_SGLSFLT(flits)) | wr_hi;
1018
1019 while (sgl_flits) {
1020 unsigned int avail = WR_FLITS - flits;
1021
1022 if (avail > sgl_flits)
1023 avail = sgl_flits;
1024 memcpy(&d->flit[flits], fp, avail * sizeof(*fp));
1025 sgl_flits -= avail;
1026 ndesc--;
1027 if (!sgl_flits)
1028 break;
1029
1030 fp += avail;
1031 d++;
Divy Le Ray23561c92007-11-16 11:22:05 -08001032 sd->eop = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001033 sd++;
1034 if (++pidx == q->size) {
1035 pidx = 0;
1036 gen ^= 1;
1037 d = q->desc;
1038 sd = q->sdesc;
1039 }
1040
1041 sd->skb = skb;
1042 wrp = (struct work_request_hdr *)d;
1043 wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1044 V_WR_SGLSFLT(1)) | wr_hi;
1045 wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1046 sgl_flits + 1)) |
1047 V_WR_GEN(gen)) | wr_lo;
1048 wr_gen2(d, gen);
1049 flits = 1;
1050 }
Divy Le Ray23561c92007-11-16 11:22:05 -08001051 sd->eop = 1;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001052 wrp->wr_hi |= htonl(F_WR_EOP);
1053 wmb();
1054 wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1055 wr_gen2((struct tx_desc *)wp, ogen);
1056 WARN_ON(ndesc != 0);
1057 }
1058}
1059
1060/**
1061 * write_tx_pkt_wr - write a TX_PKT work request
1062 * @adap: the adapter
1063 * @skb: the packet to send
1064 * @pi: the egress interface
1065 * @pidx: index of the first Tx descriptor to write
1066 * @gen: the generation value to use
1067 * @q: the Tx queue
1068 * @ndesc: number of descriptors the packet will occupy
1069 * @compl: the value of the COMPL bit to use
1070 *
1071 * Generate a TX_PKT work request to send the supplied packet.
1072 */
1073static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
1074 const struct port_info *pi,
1075 unsigned int pidx, unsigned int gen,
1076 struct sge_txq *q, unsigned int ndesc,
1077 unsigned int compl)
1078{
1079 unsigned int flits, sgl_flits, cntrl, tso_info;
1080 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1081 struct tx_desc *d = &q->desc[pidx];
1082 struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)d;
1083
1084 cpl->len = htonl(skb->len | 0x80000000);
1085 cntrl = V_TXPKT_INTF(pi->port_id);
1086
1087 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1088 cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
1089
1090 tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
1091 if (tso_info) {
1092 int eth_type;
1093 struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)cpl;
1094
1095 d->flit[2] = 0;
1096 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1097 hdr->cntrl = htonl(cntrl);
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001098 eth_type = skb_network_offset(skb) == ETH_HLEN ?
Divy Le Ray4d22de32007-01-18 22:04:14 -05001099 CPL_ETH_II : CPL_ETH_II_VLAN;
1100 tso_info |= V_LSO_ETH_TYPE(eth_type) |
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07001101 V_LSO_IPHDR_WORDS(ip_hdr(skb)->ihl) |
Arnaldo Carvalho de Meloaa8223c2007-04-10 21:04:22 -07001102 V_LSO_TCPHDR_WORDS(tcp_hdr(skb)->doff);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001103 hdr->lso_info = htonl(tso_info);
1104 flits = 3;
1105 } else {
1106 cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1107 cntrl |= F_TXPKT_IPCSUM_DIS; /* SW calculates IP csum */
1108 cntrl |= V_TXPKT_L4CSUM_DIS(skb->ip_summed != CHECKSUM_PARTIAL);
1109 cpl->cntrl = htonl(cntrl);
1110
1111 if (skb->len <= WR_LEN - sizeof(*cpl)) {
1112 q->sdesc[pidx].skb = NULL;
1113 if (!skb->data_len)
Arnaldo Carvalho de Melod626f622007-03-27 18:55:52 -03001114 skb_copy_from_linear_data(skb, &d->flit[2],
1115 skb->len);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001116 else
1117 skb_copy_bits(skb, 0, &d->flit[2], skb->len);
1118
1119 flits = (skb->len + 7) / 8 + 2;
1120 cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(skb->len & 7) |
1121 V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT)
1122 | F_WR_SOP | F_WR_EOP | compl);
1123 wmb();
1124 cpl->wr.wr_lo = htonl(V_WR_LEN(flits) | V_WR_GEN(gen) |
1125 V_WR_TID(q->token));
1126 wr_gen2(d, gen);
1127 kfree_skb(skb);
1128 return;
1129 }
1130
1131 flits = 2;
1132 }
1133
1134 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
1135 sgl_flits = make_sgl(skb, sgp, skb->data, skb_headlen(skb), adap->pdev);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001136
1137 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits, gen,
1138 htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | compl),
1139 htonl(V_WR_TID(q->token)));
1140}
1141
Divy Le Ray82ad3322008-12-16 01:09:39 -08001142static inline void t3_stop_tx_queue(struct netdev_queue *txq,
1143 struct sge_qset *qs, struct sge_txq *q)
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301144{
Divy Le Ray82ad3322008-12-16 01:09:39 -08001145 netif_tx_stop_queue(txq);
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301146 set_bit(TXQ_ETH, &qs->txq_stopped);
1147 q->stops++;
1148}
1149
Divy Le Ray4d22de32007-01-18 22:04:14 -05001150/**
1151 * eth_xmit - add a packet to the Ethernet Tx queue
1152 * @skb: the packet
1153 * @dev: the egress net device
1154 *
1155 * Add a packet to an SGE Tx queue. Runs with softirqs disabled.
1156 */
1157int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
1158{
Divy Le Ray82ad3322008-12-16 01:09:39 -08001159 int qidx;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001160 unsigned int ndesc, pidx, credits, gen, compl;
1161 const struct port_info *pi = netdev_priv(dev);
Divy Le Ray5fbf8162007-08-29 19:15:47 -07001162 struct adapter *adap = pi->adapter;
Divy Le Ray82ad3322008-12-16 01:09:39 -08001163 struct netdev_queue *txq;
1164 struct sge_qset *qs;
1165 struct sge_txq *q;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001166
1167 /*
1168 * The chip min packet length is 9 octets but play safe and reject
1169 * anything shorter than an Ethernet header.
1170 */
1171 if (unlikely(skb->len < ETH_HLEN)) {
1172 dev_kfree_skb(skb);
1173 return NETDEV_TX_OK;
1174 }
1175
Divy Le Ray82ad3322008-12-16 01:09:39 -08001176 qidx = skb_get_queue_mapping(skb);
1177 qs = &pi->qs[qidx];
1178 q = &qs->txq[TXQ_ETH];
1179 txq = netdev_get_tx_queue(dev, qidx);
1180
Divy Le Ray4d22de32007-01-18 22:04:14 -05001181 spin_lock(&q->lock);
1182 reclaim_completed_tx(adap, q);
1183
1184 credits = q->size - q->in_use;
1185 ndesc = calc_tx_descs(skb);
1186
1187 if (unlikely(credits < ndesc)) {
Divy Le Ray82ad3322008-12-16 01:09:39 -08001188 t3_stop_tx_queue(txq, qs, q);
Krishna Kumara8cc21f2008-01-30 12:30:16 +05301189 dev_err(&adap->pdev->dev,
1190 "%s: Tx ring %u full while queue awake!\n",
1191 dev->name, q->cntxt_id & 7);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001192 spin_unlock(&q->lock);
1193 return NETDEV_TX_BUSY;
1194 }
1195
1196 q->in_use += ndesc;
Divy Le Raycd7e9032008-03-13 00:13:30 -07001197 if (unlikely(credits - ndesc < q->stop_thres)) {
Divy Le Ray82ad3322008-12-16 01:09:39 -08001198 t3_stop_tx_queue(txq, qs, q);
Divy Le Raycd7e9032008-03-13 00:13:30 -07001199
1200 if (should_restart_tx(q) &&
1201 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1202 q->restarts++;
Divy Le Ray82ad3322008-12-16 01:09:39 -08001203 netif_tx_wake_queue(txq);
Divy Le Raycd7e9032008-03-13 00:13:30 -07001204 }
1205 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001206
1207 gen = q->gen;
1208 q->unacked += ndesc;
1209 compl = (q->unacked & 8) << (S_WR_COMPL - 3);
1210 q->unacked &= 7;
1211 pidx = q->pidx;
1212 q->pidx += ndesc;
1213 if (q->pidx >= q->size) {
1214 q->pidx -= q->size;
1215 q->gen ^= 1;
1216 }
1217
1218 /* update port statistics */
1219 if (skb->ip_summed == CHECKSUM_COMPLETE)
1220 qs->port_stats[SGE_PSTAT_TX_CSUM]++;
1221 if (skb_shinfo(skb)->gso_size)
1222 qs->port_stats[SGE_PSTAT_TSO]++;
1223 if (vlan_tx_tag_present(skb) && pi->vlan_grp)
1224 qs->port_stats[SGE_PSTAT_VLANINS]++;
1225
1226 dev->trans_start = jiffies;
1227 spin_unlock(&q->lock);
1228
1229 /*
1230 * We do not use Tx completion interrupts to free DMAd Tx packets.
1231 * This is good for performamce but means that we rely on new Tx
1232 * packets arriving to run the destructors of completed packets,
1233 * which open up space in their sockets' send queues. Sometimes
1234 * we do not get such new packets causing Tx to stall. A single
1235 * UDP transmitter is a good example of this situation. We have
1236 * a clean up timer that periodically reclaims completed packets
1237 * but it doesn't run often enough (nor do we want it to) to prevent
1238 * lengthy stalls. A solution to this problem is to run the
1239 * destructor early, after the packet is queued but before it's DMAd.
1240 * A cons is that we lie to socket memory accounting, but the amount
1241 * of extra memory is reasonable (limited by the number of Tx
1242 * descriptors), the packets do actually get freed quickly by new
1243 * packets almost always, and for protocols like TCP that wait for
1244 * acks to really free up the data the extra memory is even less.
1245 * On the positive side we run the destructors on the sending CPU
1246 * rather than on a potentially different completing CPU, usually a
1247 * good thing. We also run them without holding our Tx queue lock,
1248 * unlike what reclaim_completed_tx() would otherwise do.
1249 *
1250 * Run the destructor before telling the DMA engine about the packet
1251 * to make sure it doesn't complete and get freed prematurely.
1252 */
1253 if (likely(!skb_shared(skb)))
1254 skb_orphan(skb);
1255
1256 write_tx_pkt_wr(adap, skb, pi, pidx, gen, q, ndesc, compl);
1257 check_ring_tx_db(adap, q);
1258 return NETDEV_TX_OK;
1259}
1260
1261/**
1262 * write_imm - write a packet into a Tx descriptor as immediate data
1263 * @d: the Tx descriptor to write
1264 * @skb: the packet
1265 * @len: the length of packet data to write as immediate data
1266 * @gen: the generation bit value to write
1267 *
1268 * Writes a packet as immediate data into a Tx descriptor. The packet
1269 * contains a work request at its beginning. We must write the packet
Divy Le Ray27186dc2007-08-21 20:49:15 -07001270 * carefully so the SGE doesn't read it accidentally before it's written
1271 * in its entirety.
Divy Le Ray4d22de32007-01-18 22:04:14 -05001272 */
1273static inline void write_imm(struct tx_desc *d, struct sk_buff *skb,
1274 unsigned int len, unsigned int gen)
1275{
1276 struct work_request_hdr *from = (struct work_request_hdr *)skb->data;
1277 struct work_request_hdr *to = (struct work_request_hdr *)d;
1278
Divy Le Ray27186dc2007-08-21 20:49:15 -07001279 if (likely(!skb->data_len))
1280 memcpy(&to[1], &from[1], len - sizeof(*from));
1281 else
1282 skb_copy_bits(skb, sizeof(*from), &to[1], len - sizeof(*from));
1283
Divy Le Ray4d22de32007-01-18 22:04:14 -05001284 to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1285 V_WR_BCNTLFLT(len & 7));
1286 wmb();
1287 to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1288 V_WR_LEN((len + 7) / 8));
1289 wr_gen2(d, gen);
1290 kfree_skb(skb);
1291}
1292
1293/**
1294 * check_desc_avail - check descriptor availability on a send queue
1295 * @adap: the adapter
1296 * @q: the send queue
1297 * @skb: the packet needing the descriptors
1298 * @ndesc: the number of Tx descriptors needed
1299 * @qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1300 *
1301 * Checks if the requested number of Tx descriptors is available on an
1302 * SGE send queue. If the queue is already suspended or not enough
1303 * descriptors are available the packet is queued for later transmission.
1304 * Must be called with the Tx queue locked.
1305 *
1306 * Returns 0 if enough descriptors are available, 1 if there aren't
1307 * enough descriptors and the packet has been queued, and 2 if the caller
1308 * needs to retry because there weren't enough descriptors at the
1309 * beginning of the call but some freed up in the mean time.
1310 */
1311static inline int check_desc_avail(struct adapter *adap, struct sge_txq *q,
1312 struct sk_buff *skb, unsigned int ndesc,
1313 unsigned int qid)
1314{
1315 if (unlikely(!skb_queue_empty(&q->sendq))) {
1316 addq_exit:__skb_queue_tail(&q->sendq, skb);
1317 return 1;
1318 }
1319 if (unlikely(q->size - q->in_use < ndesc)) {
1320 struct sge_qset *qs = txq_to_qset(q, qid);
1321
1322 set_bit(qid, &qs->txq_stopped);
1323 smp_mb__after_clear_bit();
1324
1325 if (should_restart_tx(q) &&
1326 test_and_clear_bit(qid, &qs->txq_stopped))
1327 return 2;
1328
1329 q->stops++;
1330 goto addq_exit;
1331 }
1332 return 0;
1333}
1334
1335/**
1336 * reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1337 * @q: the SGE control Tx queue
1338 *
1339 * This is a variant of reclaim_completed_tx() that is used for Tx queues
1340 * that send only immediate data (presently just the control queues) and
1341 * thus do not have any sk_buffs to release.
1342 */
1343static inline void reclaim_completed_tx_imm(struct sge_txq *q)
1344{
1345 unsigned int reclaim = q->processed - q->cleaned;
1346
1347 q->in_use -= reclaim;
1348 q->cleaned += reclaim;
1349}
1350
1351static inline int immediate(const struct sk_buff *skb)
1352{
Divy Le Ray27186dc2007-08-21 20:49:15 -07001353 return skb->len <= WR_LEN;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001354}
1355
1356/**
1357 * ctrl_xmit - send a packet through an SGE control Tx queue
1358 * @adap: the adapter
1359 * @q: the control queue
1360 * @skb: the packet
1361 *
1362 * Send a packet through an SGE control Tx queue. Packets sent through
1363 * a control queue must fit entirely as immediate data in a single Tx
1364 * descriptor and have no page fragments.
1365 */
1366static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
1367 struct sk_buff *skb)
1368{
1369 int ret;
1370 struct work_request_hdr *wrp = (struct work_request_hdr *)skb->data;
1371
1372 if (unlikely(!immediate(skb))) {
1373 WARN_ON(1);
1374 dev_kfree_skb(skb);
1375 return NET_XMIT_SUCCESS;
1376 }
1377
1378 wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1379 wrp->wr_lo = htonl(V_WR_TID(q->token));
1380
1381 spin_lock(&q->lock);
1382 again:reclaim_completed_tx_imm(q);
1383
1384 ret = check_desc_avail(adap, q, skb, 1, TXQ_CTRL);
1385 if (unlikely(ret)) {
1386 if (ret == 1) {
1387 spin_unlock(&q->lock);
1388 return NET_XMIT_CN;
1389 }
1390 goto again;
1391 }
1392
1393 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1394
1395 q->in_use++;
1396 if (++q->pidx >= q->size) {
1397 q->pidx = 0;
1398 q->gen ^= 1;
1399 }
1400 spin_unlock(&q->lock);
1401 wmb();
1402 t3_write_reg(adap, A_SG_KDOORBELL,
1403 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1404 return NET_XMIT_SUCCESS;
1405}
1406
1407/**
1408 * restart_ctrlq - restart a suspended control queue
1409 * @qs: the queue set cotaining the control queue
1410 *
1411 * Resumes transmission on a suspended Tx control queue.
1412 */
1413static void restart_ctrlq(unsigned long data)
1414{
1415 struct sk_buff *skb;
1416 struct sge_qset *qs = (struct sge_qset *)data;
1417 struct sge_txq *q = &qs->txq[TXQ_CTRL];
Divy Le Ray4d22de32007-01-18 22:04:14 -05001418
1419 spin_lock(&q->lock);
1420 again:reclaim_completed_tx_imm(q);
1421
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001422 while (q->in_use < q->size &&
1423 (skb = __skb_dequeue(&q->sendq)) != NULL) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05001424
1425 write_imm(&q->desc[q->pidx], skb, skb->len, q->gen);
1426
1427 if (++q->pidx >= q->size) {
1428 q->pidx = 0;
1429 q->gen ^= 1;
1430 }
1431 q->in_use++;
1432 }
1433
1434 if (!skb_queue_empty(&q->sendq)) {
1435 set_bit(TXQ_CTRL, &qs->txq_stopped);
1436 smp_mb__after_clear_bit();
1437
1438 if (should_restart_tx(q) &&
1439 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1440 goto again;
1441 q->stops++;
1442 }
1443
1444 spin_unlock(&q->lock);
Divy Le Rayafefce62007-11-16 11:22:21 -08001445 wmb();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001446 t3_write_reg(qs->adap, A_SG_KDOORBELL,
Divy Le Ray4d22de32007-01-18 22:04:14 -05001447 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1448}
1449
Divy Le Ray14ab9892007-01-30 19:43:50 -08001450/*
1451 * Send a management message through control queue 0
1452 */
1453int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb)
1454{
Divy Le Ray204e2f92008-05-06 19:26:01 -07001455 int ret;
Divy Le Raybc4b6b52007-12-17 18:47:41 -08001456 local_bh_disable();
1457 ret = ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], skb);
1458 local_bh_enable();
1459
1460 return ret;
Divy Le Ray14ab9892007-01-30 19:43:50 -08001461}
1462
Divy Le Ray4d22de32007-01-18 22:04:14 -05001463/**
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001464 * deferred_unmap_destructor - unmap a packet when it is freed
1465 * @skb: the packet
1466 *
1467 * This is the packet destructor used for Tx packets that need to remain
1468 * mapped until they are freed rather than until their Tx descriptors are
1469 * freed.
1470 */
1471static void deferred_unmap_destructor(struct sk_buff *skb)
1472{
1473 int i;
1474 const dma_addr_t *p;
1475 const struct skb_shared_info *si;
1476 const struct deferred_unmap_info *dui;
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001477
1478 dui = (struct deferred_unmap_info *)skb->head;
1479 p = dui->addr;
1480
Divy Le Ray23561c92007-11-16 11:22:05 -08001481 if (skb->tail - skb->transport_header)
1482 pci_unmap_single(dui->pdev, *p++,
1483 skb->tail - skb->transport_header,
1484 PCI_DMA_TODEVICE);
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001485
1486 si = skb_shinfo(skb);
1487 for (i = 0; i < si->nr_frags; i++)
1488 pci_unmap_page(dui->pdev, *p++, si->frags[i].size,
1489 PCI_DMA_TODEVICE);
1490}
1491
1492static void setup_deferred_unmapping(struct sk_buff *skb, struct pci_dev *pdev,
1493 const struct sg_ent *sgl, int sgl_flits)
1494{
1495 dma_addr_t *p;
1496 struct deferred_unmap_info *dui;
1497
1498 dui = (struct deferred_unmap_info *)skb->head;
1499 dui->pdev = pdev;
1500 for (p = dui->addr; sgl_flits >= 3; sgl++, sgl_flits -= 3) {
1501 *p++ = be64_to_cpu(sgl->addr[0]);
1502 *p++ = be64_to_cpu(sgl->addr[1]);
1503 }
1504 if (sgl_flits)
1505 *p = be64_to_cpu(sgl->addr[0]);
1506}
1507
1508/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001509 * write_ofld_wr - write an offload work request
1510 * @adap: the adapter
1511 * @skb: the packet to send
1512 * @q: the Tx queue
1513 * @pidx: index of the first Tx descriptor to write
1514 * @gen: the generation value to use
1515 * @ndesc: number of descriptors the packet will occupy
1516 *
1517 * Write an offload work request to send the supplied packet. The packet
1518 * data already carry the work request with most fields populated.
1519 */
1520static void write_ofld_wr(struct adapter *adap, struct sk_buff *skb,
1521 struct sge_txq *q, unsigned int pidx,
1522 unsigned int gen, unsigned int ndesc)
1523{
1524 unsigned int sgl_flits, flits;
1525 struct work_request_hdr *from;
1526 struct sg_ent *sgp, sgl[MAX_SKB_FRAGS / 2 + 1];
1527 struct tx_desc *d = &q->desc[pidx];
1528
1529 if (immediate(skb)) {
1530 q->sdesc[pidx].skb = NULL;
1531 write_imm(d, skb, skb->len, gen);
1532 return;
1533 }
1534
1535 /* Only TX_DATA builds SGLs */
1536
1537 from = (struct work_request_hdr *)skb->data;
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001538 memcpy(&d->flit[1], &from[1],
1539 skb_transport_offset(skb) - sizeof(*from));
Divy Le Ray4d22de32007-01-18 22:04:14 -05001540
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001541 flits = skb_transport_offset(skb) / 8;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001542 sgp = ndesc == 1 ? (struct sg_ent *)&d->flit[flits] : sgl;
Arnaldo Carvalho de Melo9c702202007-04-25 18:04:18 -07001543 sgl_flits = make_sgl(skb, sgp, skb_transport_header(skb),
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001544 skb->tail - skb->transport_header,
Divy Le Ray4d22de32007-01-18 22:04:14 -05001545 adap->pdev);
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001546 if (need_skb_unmap()) {
1547 setup_deferred_unmapping(skb, adap->pdev, sgp, sgl_flits);
1548 skb->destructor = deferred_unmap_destructor;
Divy Le Ray99d7cf32007-02-24 16:44:06 -08001549 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001550
1551 write_wr_hdr_sgl(ndesc, skb, d, pidx, q, sgl, flits, sgl_flits,
1552 gen, from->wr_hi, from->wr_lo);
1553}
1554
1555/**
1556 * calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1557 * @skb: the packet
1558 *
1559 * Returns the number of Tx descriptors needed for the given offload
1560 * packet. These packets are already fully constructed.
1561 */
1562static inline unsigned int calc_tx_descs_ofld(const struct sk_buff *skb)
1563{
Divy Le Ray27186dc2007-08-21 20:49:15 -07001564 unsigned int flits, cnt;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001565
Divy Le Ray27186dc2007-08-21 20:49:15 -07001566 if (skb->len <= WR_LEN)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001567 return 1; /* packet fits as immediate data */
1568
Arnaldo Carvalho de Meloea2ae172007-04-25 17:55:53 -07001569 flits = skb_transport_offset(skb) / 8; /* headers */
Divy Le Ray27186dc2007-08-21 20:49:15 -07001570 cnt = skb_shinfo(skb)->nr_frags;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001571 if (skb->tail != skb->transport_header)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001572 cnt++;
1573 return flits_to_desc(flits + sgl_len(cnt));
1574}
1575
1576/**
1577 * ofld_xmit - send a packet through an offload queue
1578 * @adap: the adapter
1579 * @q: the Tx offload queue
1580 * @skb: the packet
1581 *
1582 * Send an offload packet through an SGE offload queue.
1583 */
1584static int ofld_xmit(struct adapter *adap, struct sge_txq *q,
1585 struct sk_buff *skb)
1586{
1587 int ret;
1588 unsigned int ndesc = calc_tx_descs_ofld(skb), pidx, gen;
1589
1590 spin_lock(&q->lock);
1591 again:reclaim_completed_tx(adap, q);
1592
1593 ret = check_desc_avail(adap, q, skb, ndesc, TXQ_OFLD);
1594 if (unlikely(ret)) {
1595 if (ret == 1) {
1596 skb->priority = ndesc; /* save for restart */
1597 spin_unlock(&q->lock);
1598 return NET_XMIT_CN;
1599 }
1600 goto again;
1601 }
1602
1603 gen = q->gen;
1604 q->in_use += ndesc;
1605 pidx = q->pidx;
1606 q->pidx += ndesc;
1607 if (q->pidx >= q->size) {
1608 q->pidx -= q->size;
1609 q->gen ^= 1;
1610 }
1611 spin_unlock(&q->lock);
1612
1613 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1614 check_ring_tx_db(adap, q);
1615 return NET_XMIT_SUCCESS;
1616}
1617
1618/**
1619 * restart_offloadq - restart a suspended offload queue
1620 * @qs: the queue set cotaining the offload queue
1621 *
1622 * Resumes transmission on a suspended Tx offload queue.
1623 */
1624static void restart_offloadq(unsigned long data)
1625{
1626 struct sk_buff *skb;
1627 struct sge_qset *qs = (struct sge_qset *)data;
1628 struct sge_txq *q = &qs->txq[TXQ_OFLD];
Divy Le Ray5fbf8162007-08-29 19:15:47 -07001629 const struct port_info *pi = netdev_priv(qs->netdev);
1630 struct adapter *adap = pi->adapter;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001631
1632 spin_lock(&q->lock);
1633 again:reclaim_completed_tx(adap, q);
1634
1635 while ((skb = skb_peek(&q->sendq)) != NULL) {
1636 unsigned int gen, pidx;
1637 unsigned int ndesc = skb->priority;
1638
1639 if (unlikely(q->size - q->in_use < ndesc)) {
1640 set_bit(TXQ_OFLD, &qs->txq_stopped);
1641 smp_mb__after_clear_bit();
1642
1643 if (should_restart_tx(q) &&
1644 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
1645 goto again;
1646 q->stops++;
1647 break;
1648 }
1649
1650 gen = q->gen;
1651 q->in_use += ndesc;
1652 pidx = q->pidx;
1653 q->pidx += ndesc;
1654 if (q->pidx >= q->size) {
1655 q->pidx -= q->size;
1656 q->gen ^= 1;
1657 }
1658 __skb_unlink(skb, &q->sendq);
1659 spin_unlock(&q->lock);
1660
1661 write_ofld_wr(adap, skb, q, pidx, gen, ndesc);
1662 spin_lock(&q->lock);
1663 }
1664 spin_unlock(&q->lock);
1665
1666#if USE_GTS
1667 set_bit(TXQ_RUNNING, &q->flags);
1668 set_bit(TXQ_LAST_PKT_DB, &q->flags);
1669#endif
Divy Le Rayafefce62007-11-16 11:22:21 -08001670 wmb();
Divy Le Ray4d22de32007-01-18 22:04:14 -05001671 t3_write_reg(adap, A_SG_KDOORBELL,
1672 F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1673}
1674
1675/**
1676 * queue_set - return the queue set a packet should use
1677 * @skb: the packet
1678 *
1679 * Maps a packet to the SGE queue set it should use. The desired queue
1680 * set is carried in bits 1-3 in the packet's priority.
1681 */
1682static inline int queue_set(const struct sk_buff *skb)
1683{
1684 return skb->priority >> 1;
1685}
1686
1687/**
1688 * is_ctrl_pkt - return whether an offload packet is a control packet
1689 * @skb: the packet
1690 *
1691 * Determines whether an offload packet should use an OFLD or a CTRL
1692 * Tx queue. This is indicated by bit 0 in the packet's priority.
1693 */
1694static inline int is_ctrl_pkt(const struct sk_buff *skb)
1695{
1696 return skb->priority & 1;
1697}
1698
1699/**
1700 * t3_offload_tx - send an offload packet
1701 * @tdev: the offload device to send to
1702 * @skb: the packet
1703 *
1704 * Sends an offload packet. We use the packet priority to select the
1705 * appropriate Tx queue as follows: bit 0 indicates whether the packet
1706 * should be sent as regular or control, bits 1-3 select the queue set.
1707 */
1708int t3_offload_tx(struct t3cdev *tdev, struct sk_buff *skb)
1709{
1710 struct adapter *adap = tdev2adap(tdev);
1711 struct sge_qset *qs = &adap->sge.qs[queue_set(skb)];
1712
1713 if (unlikely(is_ctrl_pkt(skb)))
1714 return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], skb);
1715
1716 return ofld_xmit(adap, &qs->txq[TXQ_OFLD], skb);
1717}
1718
1719/**
1720 * offload_enqueue - add an offload packet to an SGE offload receive queue
1721 * @q: the SGE response queue
1722 * @skb: the packet
1723 *
1724 * Add a new offload packet to an SGE response queue's offload packet
1725 * queue. If the packet is the first on the queue it schedules the RX
1726 * softirq to process the queue.
1727 */
1728static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
1729{
David S. Miller147e70e2008-09-22 01:29:52 -07001730 int was_empty = skb_queue_empty(&q->rx_queue);
1731
1732 __skb_queue_tail(&q->rx_queue, skb);
1733
1734 if (was_empty) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05001735 struct sge_qset *qs = rspq_to_qset(q);
1736
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001737 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001738 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001739}
1740
1741/**
1742 * deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
1743 * @tdev: the offload device that will be receiving the packets
1744 * @q: the SGE response queue that assembled the bundle
1745 * @skbs: the partial bundle
1746 * @n: the number of packets in the bundle
1747 *
1748 * Delivers a (partial) bundle of Rx offload packets to an offload device.
1749 */
1750static inline void deliver_partial_bundle(struct t3cdev *tdev,
1751 struct sge_rspq *q,
1752 struct sk_buff *skbs[], int n)
1753{
1754 if (n) {
1755 q->offload_bundles++;
1756 tdev->recv(tdev, skbs, n);
1757 }
1758}
1759
1760/**
1761 * ofld_poll - NAPI handler for offload packets in interrupt mode
1762 * @dev: the network device doing the polling
1763 * @budget: polling budget
1764 *
1765 * The NAPI handler for offload packets when a response queue is serviced
1766 * by the hard interrupt handler, i.e., when it's operating in non-polling
1767 * mode. Creates small packet batches and sends them through the offload
1768 * receive handler. Batches need to be of modest size as we do prefetches
1769 * on the packets in each.
1770 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001771static int ofld_poll(struct napi_struct *napi, int budget)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001772{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001773 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001774 struct sge_rspq *q = &qs->rspq;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001775 struct adapter *adapter = qs->adap;
1776 int work_done = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001777
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001778 while (work_done < budget) {
David S. Miller147e70e2008-09-22 01:29:52 -07001779 struct sk_buff *skb, *tmp, *skbs[RX_BUNDLE_SIZE];
1780 struct sk_buff_head queue;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001781 int ngathered;
1782
1783 spin_lock_irq(&q->lock);
David S. Miller147e70e2008-09-22 01:29:52 -07001784 __skb_queue_head_init(&queue);
1785 skb_queue_splice_init(&q->rx_queue, &queue);
1786 if (skb_queue_empty(&queue)) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001787 napi_complete(napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001788 spin_unlock_irq(&q->lock);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001789 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001790 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001791 spin_unlock_irq(&q->lock);
1792
David S. Miller147e70e2008-09-22 01:29:52 -07001793 ngathered = 0;
1794 skb_queue_walk_safe(&queue, skb, tmp) {
1795 if (work_done >= budget)
1796 break;
1797 work_done++;
1798
1799 __skb_unlink(skb, &queue);
1800 prefetch(skb->data);
1801 skbs[ngathered] = skb;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001802 if (++ngathered == RX_BUNDLE_SIZE) {
1803 q->offload_bundles++;
1804 adapter->tdev.recv(&adapter->tdev, skbs,
1805 ngathered);
1806 ngathered = 0;
1807 }
1808 }
David S. Miller147e70e2008-09-22 01:29:52 -07001809 if (!skb_queue_empty(&queue)) {
1810 /* splice remaining packets back onto Rx queue */
Divy Le Ray4d22de32007-01-18 22:04:14 -05001811 spin_lock_irq(&q->lock);
David S. Miller147e70e2008-09-22 01:29:52 -07001812 skb_queue_splice(&queue, &q->rx_queue);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001813 spin_unlock_irq(&q->lock);
1814 }
1815 deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
1816 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001817
1818 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001819}
1820
1821/**
1822 * rx_offload - process a received offload packet
1823 * @tdev: the offload device receiving the packet
1824 * @rq: the response queue that received the packet
1825 * @skb: the packet
1826 * @rx_gather: a gather list of packets if we are building a bundle
1827 * @gather_idx: index of the next available slot in the bundle
1828 *
1829 * Process an ingress offload pakcet and add it to the offload ingress
1830 * queue. Returns the index of the next available slot in the bundle.
1831 */
1832static inline int rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
1833 struct sk_buff *skb, struct sk_buff *rx_gather[],
1834 unsigned int gather_idx)
1835{
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001836 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001837 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03001838 skb_reset_transport_header(skb);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001839
1840 if (rq->polling) {
1841 rx_gather[gather_idx++] = skb;
1842 if (gather_idx == RX_BUNDLE_SIZE) {
1843 tdev->recv(tdev, rx_gather, RX_BUNDLE_SIZE);
1844 gather_idx = 0;
1845 rq->offload_bundles++;
1846 }
1847 } else
1848 offload_enqueue(rq, skb);
1849
1850 return gather_idx;
1851}
1852
1853/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001854 * restart_tx - check whether to restart suspended Tx queues
1855 * @qs: the queue set to resume
1856 *
1857 * Restarts suspended Tx queues of an SGE queue set if they have enough
1858 * free resources to resume operation.
1859 */
1860static void restart_tx(struct sge_qset *qs)
1861{
1862 if (test_bit(TXQ_ETH, &qs->txq_stopped) &&
1863 should_restart_tx(&qs->txq[TXQ_ETH]) &&
1864 test_and_clear_bit(TXQ_ETH, &qs->txq_stopped)) {
1865 qs->txq[TXQ_ETH].restarts++;
1866 if (netif_running(qs->netdev))
Divy Le Ray82ad3322008-12-16 01:09:39 -08001867 netif_tx_wake_queue(qs->tx_q);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001868 }
1869
1870 if (test_bit(TXQ_OFLD, &qs->txq_stopped) &&
1871 should_restart_tx(&qs->txq[TXQ_OFLD]) &&
1872 test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
1873 qs->txq[TXQ_OFLD].restarts++;
1874 tasklet_schedule(&qs->txq[TXQ_OFLD].qresume_tsk);
1875 }
1876 if (test_bit(TXQ_CTRL, &qs->txq_stopped) &&
1877 should_restart_tx(&qs->txq[TXQ_CTRL]) &&
1878 test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
1879 qs->txq[TXQ_CTRL].restarts++;
1880 tasklet_schedule(&qs->txq[TXQ_CTRL].qresume_tsk);
1881 }
1882}
1883
1884/**
Karen Xiea109a5b2008-12-18 22:56:20 -08001885 * cxgb3_arp_process - process an ARP request probing a private IP address
1886 * @adapter: the adapter
1887 * @skb: the skbuff containing the ARP request
1888 *
1889 * Check if the ARP request is probing the private IP address
1890 * dedicated to iSCSI, generate an ARP reply if so.
1891 */
1892static void cxgb3_arp_process(struct adapter *adapter, struct sk_buff *skb)
1893{
1894 struct net_device *dev = skb->dev;
1895 struct port_info *pi;
1896 struct arphdr *arp;
1897 unsigned char *arp_ptr;
1898 unsigned char *sha;
1899 __be32 sip, tip;
1900
1901 if (!dev)
1902 return;
1903
1904 skb_reset_network_header(skb);
1905 arp = arp_hdr(skb);
1906
1907 if (arp->ar_op != htons(ARPOP_REQUEST))
1908 return;
1909
1910 arp_ptr = (unsigned char *)(arp + 1);
1911 sha = arp_ptr;
1912 arp_ptr += dev->addr_len;
1913 memcpy(&sip, arp_ptr, sizeof(sip));
1914 arp_ptr += sizeof(sip);
1915 arp_ptr += dev->addr_len;
1916 memcpy(&tip, arp_ptr, sizeof(tip));
1917
1918 pi = netdev_priv(dev);
1919 if (tip != pi->iscsi_ipv4addr)
1920 return;
1921
1922 arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
1923 dev->dev_addr, sha);
1924
1925}
1926
1927static inline int is_arp(struct sk_buff *skb)
1928{
1929 return skb->protocol == htons(ETH_P_ARP);
1930}
1931
1932/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05001933 * rx_eth - process an ingress ethernet packet
1934 * @adap: the adapter
1935 * @rq: the response queue that received the packet
1936 * @skb: the packet
1937 * @pad: amount of padding at the start of the buffer
1938 *
1939 * Process an ingress ethernet pakcet and deliver it to the stack.
1940 * The padding is 2 if the packet was delivered in an Rx buffer and 0
1941 * if it was immediate data in a response.
1942 */
1943static void rx_eth(struct adapter *adap, struct sge_rspq *rq,
Divy Le Rayb47385b2008-05-21 18:56:26 -07001944 struct sk_buff *skb, int pad, int lro)
Divy Le Ray4d22de32007-01-18 22:04:14 -05001945{
1946 struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad);
Divy Le Rayb47385b2008-05-21 18:56:26 -07001947 struct sge_qset *qs = rspq_to_qset(rq);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001948 struct port_info *pi;
1949
Divy Le Ray4d22de32007-01-18 22:04:14 -05001950 skb_pull(skb, sizeof(*p) + pad);
Arnaldo Carvalho de Melo4c13eb62007-04-25 17:40:23 -07001951 skb->protocol = eth_type_trans(skb, adap->port[p->iff]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001952 pi = netdev_priv(skb->dev);
Roland Dreier47fd23f2009-01-11 00:19:36 -08001953 if ((pi->rx_offload & T3_RX_CSUM) && p->csum_valid && p->csum == htons(0xffff) &&
Divy Le Ray4d22de32007-01-18 22:04:14 -05001954 !p->fragment) {
Karen Xiea109a5b2008-12-18 22:56:20 -08001955 qs->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001956 skb->ip_summed = CHECKSUM_UNNECESSARY;
1957 } else
1958 skb->ip_summed = CHECKSUM_NONE;
David S. Miller0c8dfc82009-01-27 16:22:32 -08001959 skb_record_rx_queue(skb, qs - &adap->sge.qs[0]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05001960
1961 if (unlikely(p->vlan_valid)) {
1962 struct vlan_group *grp = pi->vlan_grp;
1963
Divy Le Rayb47385b2008-05-21 18:56:26 -07001964 qs->port_stats[SGE_PSTAT_VLANEX]++;
Divy Le Ray4d22de32007-01-18 22:04:14 -05001965 if (likely(grp))
Divy Le Rayb47385b2008-05-21 18:56:26 -07001966 if (lro)
Herbert Xu7be2df42009-01-21 14:39:13 -08001967 vlan_gro_receive(&qs->napi, grp,
1968 ntohs(p->vlan), skb);
Karen Xiea109a5b2008-12-18 22:56:20 -08001969 else {
1970 if (unlikely(pi->iscsi_ipv4addr &&
1971 is_arp(skb))) {
1972 unsigned short vtag = ntohs(p->vlan) &
1973 VLAN_VID_MASK;
1974 skb->dev = vlan_group_get_device(grp,
1975 vtag);
1976 cxgb3_arp_process(adap, skb);
1977 }
Divy Le Rayb47385b2008-05-21 18:56:26 -07001978 __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan),
1979 rq->polling);
Karen Xiea109a5b2008-12-18 22:56:20 -08001980 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05001981 else
1982 dev_kfree_skb_any(skb);
Divy Le Rayb47385b2008-05-21 18:56:26 -07001983 } else if (rq->polling) {
1984 if (lro)
Herbert Xu7be2df42009-01-21 14:39:13 -08001985 napi_gro_receive(&qs->napi, skb);
Karen Xiea109a5b2008-12-18 22:56:20 -08001986 else {
1987 if (unlikely(pi->iscsi_ipv4addr && is_arp(skb)))
1988 cxgb3_arp_process(adap, skb);
Divy Le Rayb47385b2008-05-21 18:56:26 -07001989 netif_receive_skb(skb);
Karen Xiea109a5b2008-12-18 22:56:20 -08001990 }
Divy Le Rayb47385b2008-05-21 18:56:26 -07001991 } else
Divy Le Ray4d22de32007-01-18 22:04:14 -05001992 netif_rx(skb);
1993}
1994
Divy Le Rayb47385b2008-05-21 18:56:26 -07001995static inline int is_eth_tcp(u32 rss)
1996{
1997 return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE;
1998}
1999
2000/**
Divy Le Rayb47385b2008-05-21 18:56:26 -07002001 * lro_add_page - add a page chunk to an LRO session
2002 * @adap: the adapter
2003 * @qs: the associated queue set
2004 * @fl: the free list containing the page chunk to add
2005 * @len: packet length
2006 * @complete: Indicates the last fragment of a frame
2007 *
2008 * Add a received packet contained in a page chunk to an existing LRO
2009 * session.
2010 */
2011static void lro_add_page(struct adapter *adap, struct sge_qset *qs,
2012 struct sge_fl *fl, int len, int complete)
2013{
2014 struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2015 struct cpl_rx_pkt *cpl;
Herbert Xu7be2df42009-01-21 14:39:13 -08002016 struct skb_frag_struct *rx_frag = qs->lro_frag_tbl.frags;
2017 int nr_frags = qs->lro_frag_tbl.nr_frags;
2018 int frag_len = qs->lro_frag_tbl.len;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002019 int offset = 0;
2020
2021 if (!nr_frags) {
2022 offset = 2 + sizeof(struct cpl_rx_pkt);
2023 qs->lro_va = cpl = sd->pg_chunk.va + 2;
2024 }
2025
2026 fl->credits--;
2027
2028 len -= offset;
2029 pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr),
2030 fl->buf_size, PCI_DMA_FROMDEVICE);
2031
2032 rx_frag += nr_frags;
2033 rx_frag->page = sd->pg_chunk.page;
2034 rx_frag->page_offset = sd->pg_chunk.offset + offset;
2035 rx_frag->size = len;
2036 frag_len += len;
Herbert Xu7be2df42009-01-21 14:39:13 -08002037 qs->lro_frag_tbl.nr_frags++;
2038 qs->lro_frag_tbl.len = frag_len;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002039
2040 if (!complete)
2041 return;
2042
Herbert Xu7be2df42009-01-21 14:39:13 -08002043 qs->lro_frag_tbl.ip_summed = CHECKSUM_UNNECESSARY;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002044 cpl = qs->lro_va;
2045
2046 if (unlikely(cpl->vlan_valid)) {
2047 struct net_device *dev = qs->netdev;
2048 struct port_info *pi = netdev_priv(dev);
2049 struct vlan_group *grp = pi->vlan_grp;
2050
2051 if (likely(grp != NULL)) {
Herbert Xu7be2df42009-01-21 14:39:13 -08002052 vlan_gro_frags(&qs->napi, grp, ntohs(cpl->vlan),
2053 &qs->lro_frag_tbl);
2054 goto out;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002055 }
2056 }
Herbert Xu7be2df42009-01-21 14:39:13 -08002057 napi_gro_frags(&qs->napi, &qs->lro_frag_tbl);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002058
Herbert Xu7be2df42009-01-21 14:39:13 -08002059out:
2060 qs->lro_frag_tbl.nr_frags = qs->lro_frag_tbl.len = 0;
Divy Le Rayb47385b2008-05-21 18:56:26 -07002061}
2062
Divy Le Ray4d22de32007-01-18 22:04:14 -05002063/**
2064 * handle_rsp_cntrl_info - handles control information in a response
2065 * @qs: the queue set corresponding to the response
2066 * @flags: the response control flags
Divy Le Ray4d22de32007-01-18 22:04:14 -05002067 *
2068 * Handles the control information of an SGE response, such as GTS
2069 * indications and completion credits for the queue set's Tx queues.
Divy Le Ray6195c712007-01-30 19:43:56 -08002070 * HW coalesces credits, we don't do any extra SW coalescing.
Divy Le Ray4d22de32007-01-18 22:04:14 -05002071 */
Divy Le Ray6195c712007-01-30 19:43:56 -08002072static inline void handle_rsp_cntrl_info(struct sge_qset *qs, u32 flags)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002073{
2074 unsigned int credits;
2075
2076#if USE_GTS
2077 if (flags & F_RSPD_TXQ0_GTS)
2078 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2079#endif
2080
Divy Le Ray4d22de32007-01-18 22:04:14 -05002081 credits = G_RSPD_TXQ0_CR(flags);
2082 if (credits)
2083 qs->txq[TXQ_ETH].processed += credits;
2084
Divy Le Ray6195c712007-01-30 19:43:56 -08002085 credits = G_RSPD_TXQ2_CR(flags);
2086 if (credits)
2087 qs->txq[TXQ_CTRL].processed += credits;
2088
Divy Le Ray4d22de32007-01-18 22:04:14 -05002089# if USE_GTS
2090 if (flags & F_RSPD_TXQ1_GTS)
2091 clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2092# endif
Divy Le Ray6195c712007-01-30 19:43:56 -08002093 credits = G_RSPD_TXQ1_CR(flags);
2094 if (credits)
2095 qs->txq[TXQ_OFLD].processed += credits;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002096}
2097
2098/**
2099 * check_ring_db - check if we need to ring any doorbells
2100 * @adapter: the adapter
2101 * @qs: the queue set whose Tx queues are to be examined
2102 * @sleeping: indicates which Tx queue sent GTS
2103 *
2104 * Checks if some of a queue set's Tx queues need to ring their doorbells
2105 * to resume transmission after idling while they still have unprocessed
2106 * descriptors.
2107 */
2108static void check_ring_db(struct adapter *adap, struct sge_qset *qs,
2109 unsigned int sleeping)
2110{
2111 if (sleeping & F_RSPD_TXQ0_GTS) {
2112 struct sge_txq *txq = &qs->txq[TXQ_ETH];
2113
2114 if (txq->cleaned + txq->in_use != txq->processed &&
2115 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2116 set_bit(TXQ_RUNNING, &txq->flags);
2117 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2118 V_EGRCNTX(txq->cntxt_id));
2119 }
2120 }
2121
2122 if (sleeping & F_RSPD_TXQ1_GTS) {
2123 struct sge_txq *txq = &qs->txq[TXQ_OFLD];
2124
2125 if (txq->cleaned + txq->in_use != txq->processed &&
2126 !test_and_set_bit(TXQ_LAST_PKT_DB, &txq->flags)) {
2127 set_bit(TXQ_RUNNING, &txq->flags);
2128 t3_write_reg(adap, A_SG_KDOORBELL, F_SELEGRCNTX |
2129 V_EGRCNTX(txq->cntxt_id));
2130 }
2131 }
2132}
2133
2134/**
2135 * is_new_response - check if a response is newly written
2136 * @r: the response descriptor
2137 * @q: the response queue
2138 *
2139 * Returns true if a response descriptor contains a yet unprocessed
2140 * response.
2141 */
2142static inline int is_new_response(const struct rsp_desc *r,
2143 const struct sge_rspq *q)
2144{
2145 return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2146}
2147
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002148static inline void clear_rspq_bufstate(struct sge_rspq * const q)
2149{
2150 q->pg_skb = NULL;
2151 q->rx_recycle_buf = 0;
2152}
2153
Divy Le Ray4d22de32007-01-18 22:04:14 -05002154#define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2155#define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2156 V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2157 V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2158 V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2159
2160/* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2161#define NOMEM_INTR_DELAY 2500
2162
2163/**
2164 * process_responses - process responses from an SGE response queue
2165 * @adap: the adapter
2166 * @qs: the queue set to which the response queue belongs
2167 * @budget: how many responses can be processed in this round
2168 *
2169 * Process responses from an SGE response queue up to the supplied budget.
2170 * Responses include received packets as well as credits and other events
2171 * for the queues that belong to the response queue's queue set.
2172 * A negative budget is effectively unlimited.
2173 *
2174 * Additionally choose the interrupt holdoff time for the next interrupt
2175 * on this queue. If the system is under memory shortage use a fairly
2176 * long delay to help recovery.
2177 */
2178static int process_responses(struct adapter *adap, struct sge_qset *qs,
2179 int budget)
2180{
2181 struct sge_rspq *q = &qs->rspq;
2182 struct rsp_desc *r = &q->desc[q->cidx];
2183 int budget_left = budget;
Divy Le Ray6195c712007-01-30 19:43:56 -08002184 unsigned int sleeping = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002185 struct sk_buff *offload_skbs[RX_BUNDLE_SIZE];
2186 int ngathered = 0;
2187
2188 q->next_holdoff = q->holdoff_tmr;
2189
2190 while (likely(budget_left && is_new_response(r, q))) {
Divy Le Rayb47385b2008-05-21 18:56:26 -07002191 int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002192 struct sk_buff *skb = NULL;
2193 u32 len, flags = ntohl(r->flags);
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002194 __be32 rss_hi = *(const __be32 *)r,
2195 rss_lo = r->rss_hdr.rss_hash_val;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002196
2197 eth = r->rss_hdr.opcode == CPL_RX_PKT;
2198
2199 if (unlikely(flags & F_RSPD_ASYNC_NOTIF)) {
2200 skb = alloc_skb(AN_PKT_SIZE, GFP_ATOMIC);
2201 if (!skb)
2202 goto no_mem;
2203
2204 memcpy(__skb_put(skb, AN_PKT_SIZE), r, AN_PKT_SIZE);
2205 skb->data[0] = CPL_ASYNC_NOTIF;
2206 rss_hi = htonl(CPL_ASYNC_NOTIF << 24);
2207 q->async_notif++;
2208 } else if (flags & F_RSPD_IMM_DATA_VALID) {
2209 skb = get_imm_packet(r);
2210 if (unlikely(!skb)) {
Divy Le Raycf992af2007-05-30 21:10:47 -07002211no_mem:
Divy Le Ray4d22de32007-01-18 22:04:14 -05002212 q->next_holdoff = NOMEM_INTR_DELAY;
2213 q->nomem++;
2214 /* consume one credit since we tried */
2215 budget_left--;
2216 break;
2217 }
2218 q->imm_data++;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002219 ethpad = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002220 } else if ((len = ntohl(r->len_cq)) != 0) {
Divy Le Raycf992af2007-05-30 21:10:47 -07002221 struct sge_fl *fl;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002222
Divy Le Ray65ab8382009-02-04 16:31:39 -08002223 lro &= eth && is_eth_tcp(rss_hi);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002224
Divy Le Raycf992af2007-05-30 21:10:47 -07002225 fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2226 if (fl->use_pages) {
2227 void *addr = fl->sdesc[fl->cidx].pg_chunk.va;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002228
Divy Le Raycf992af2007-05-30 21:10:47 -07002229 prefetch(addr);
2230#if L1_CACHE_BYTES < 128
2231 prefetch(addr + L1_CACHE_BYTES);
2232#endif
Divy Le Raye0994eb2007-02-24 16:44:17 -08002233 __refill_fl(adap, fl);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002234 if (lro > 0) {
2235 lro_add_page(adap, qs, fl,
2236 G_RSPD_LEN(len),
2237 flags & F_RSPD_EOP);
2238 goto next_fl;
2239 }
Divy Le Raye0994eb2007-02-24 16:44:17 -08002240
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002241 skb = get_packet_pg(adap, fl, q,
2242 G_RSPD_LEN(len),
2243 eth ?
2244 SGE_RX_DROP_THRES : 0);
2245 q->pg_skb = skb;
Divy Le Raycf992af2007-05-30 21:10:47 -07002246 } else
Divy Le Raye0994eb2007-02-24 16:44:17 -08002247 skb = get_packet(adap, fl, G_RSPD_LEN(len),
2248 eth ? SGE_RX_DROP_THRES : 0);
Divy Le Raycf992af2007-05-30 21:10:47 -07002249 if (unlikely(!skb)) {
2250 if (!eth)
2251 goto no_mem;
2252 q->rx_drops++;
2253 } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT))
2254 __skb_pull(skb, 2);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002255next_fl:
Divy Le Ray4d22de32007-01-18 22:04:14 -05002256 if (++fl->cidx == fl->size)
2257 fl->cidx = 0;
2258 } else
2259 q->pure_rsps++;
2260
2261 if (flags & RSPD_CTRL_MASK) {
2262 sleeping |= flags & RSPD_GTS_MASK;
Divy Le Ray6195c712007-01-30 19:43:56 -08002263 handle_rsp_cntrl_info(qs, flags);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002264 }
2265
2266 r++;
2267 if (unlikely(++q->cidx == q->size)) {
2268 q->cidx = 0;
2269 q->gen ^= 1;
2270 r = q->desc;
2271 }
2272 prefetch(r);
2273
2274 if (++q->credits >= (q->size / 4)) {
2275 refill_rspq(adap, q, q->credits);
2276 q->credits = 0;
2277 }
2278
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002279 packet_complete = flags &
2280 (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID |
2281 F_RSPD_ASYNC_NOTIF);
2282
2283 if (skb != NULL && packet_complete) {
Divy Le Ray4d22de32007-01-18 22:04:14 -05002284 if (eth)
Divy Le Rayb47385b2008-05-21 18:56:26 -07002285 rx_eth(adap, q, skb, ethpad, lro);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002286 else {
Divy Le Rayafefce62007-11-16 11:22:21 -08002287 q->offload_pkts++;
Divy Le Raycf992af2007-05-30 21:10:47 -07002288 /* Preserve the RSS info in csum & priority */
2289 skb->csum = rss_hi;
2290 skb->priority = rss_lo;
2291 ngathered = rx_offload(&adap->tdev, q, skb,
2292 offload_skbs,
Divy Le Raye0994eb2007-02-24 16:44:17 -08002293 ngathered);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002294 }
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002295
2296 if (flags & F_RSPD_EOP)
Divy Le Rayb47385b2008-05-21 18:56:26 -07002297 clear_rspq_bufstate(q);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002298 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05002299 --budget_left;
2300 }
2301
Divy Le Ray4d22de32007-01-18 22:04:14 -05002302 deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002303
Divy Le Ray4d22de32007-01-18 22:04:14 -05002304 if (sleeping)
2305 check_ring_db(adap, qs, sleeping);
2306
2307 smp_mb(); /* commit Tx queue .processed updates */
2308 if (unlikely(qs->txq_stopped != 0))
2309 restart_tx(qs);
2310
2311 budget -= budget_left;
2312 return budget;
2313}
2314
2315static inline int is_pure_response(const struct rsp_desc *r)
2316{
Roland Dreierc5419e62008-11-28 21:55:42 -08002317 __be32 n = r->flags & htonl(F_RSPD_ASYNC_NOTIF | F_RSPD_IMM_DATA_VALID);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002318
2319 return (n | r->len_cq) == 0;
2320}
2321
2322/**
2323 * napi_rx_handler - the NAPI handler for Rx processing
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002324 * @napi: the napi instance
Divy Le Ray4d22de32007-01-18 22:04:14 -05002325 * @budget: how many packets we can process in this round
2326 *
2327 * Handler for new data events when using NAPI.
2328 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002329static int napi_rx_handler(struct napi_struct *napi, int budget)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002330{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002331 struct sge_qset *qs = container_of(napi, struct sge_qset, napi);
2332 struct adapter *adap = qs->adap;
2333 int work_done = process_responses(adap, qs, budget);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002334
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002335 if (likely(work_done < budget)) {
2336 napi_complete(napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002337
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002338 /*
2339 * Because we don't atomically flush the following
2340 * write it is possible that in very rare cases it can
2341 * reach the device in a way that races with a new
2342 * response being written plus an error interrupt
2343 * causing the NAPI interrupt handler below to return
2344 * unhandled status to the OS. To protect against
2345 * this would require flushing the write and doing
2346 * both the write and the flush with interrupts off.
2347 * Way too expensive and unjustifiable given the
2348 * rarity of the race.
2349 *
2350 * The race cannot happen at all with MSI-X.
2351 */
2352 t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
2353 V_NEWTIMER(qs->rspq.next_holdoff) |
2354 V_NEWINDEX(qs->rspq.cidx));
2355 }
2356 return work_done;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002357}
2358
2359/*
2360 * Returns true if the device is already scheduled for polling.
2361 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002362static inline int napi_is_scheduled(struct napi_struct *napi)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002363{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002364 return test_bit(NAPI_STATE_SCHED, &napi->state);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002365}
2366
2367/**
2368 * process_pure_responses - process pure responses from a response queue
2369 * @adap: the adapter
2370 * @qs: the queue set owning the response queue
2371 * @r: the first pure response to process
2372 *
2373 * A simpler version of process_responses() that handles only pure (i.e.,
2374 * non data-carrying) responses. Such respones are too light-weight to
2375 * justify calling a softirq under NAPI, so we handle them specially in
2376 * the interrupt handler. The function is called with a pointer to a
2377 * response, which the caller must ensure is a valid pure response.
2378 *
2379 * Returns 1 if it encounters a valid data-carrying response, 0 otherwise.
2380 */
2381static int process_pure_responses(struct adapter *adap, struct sge_qset *qs,
2382 struct rsp_desc *r)
2383{
2384 struct sge_rspq *q = &qs->rspq;
Divy Le Ray6195c712007-01-30 19:43:56 -08002385 unsigned int sleeping = 0;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002386
2387 do {
2388 u32 flags = ntohl(r->flags);
2389
2390 r++;
2391 if (unlikely(++q->cidx == q->size)) {
2392 q->cidx = 0;
2393 q->gen ^= 1;
2394 r = q->desc;
2395 }
2396 prefetch(r);
2397
2398 if (flags & RSPD_CTRL_MASK) {
2399 sleeping |= flags & RSPD_GTS_MASK;
Divy Le Ray6195c712007-01-30 19:43:56 -08002400 handle_rsp_cntrl_info(qs, flags);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002401 }
2402
2403 q->pure_rsps++;
2404 if (++q->credits >= (q->size / 4)) {
2405 refill_rspq(adap, q, q->credits);
2406 q->credits = 0;
2407 }
2408 } while (is_new_response(r, q) && is_pure_response(r));
2409
Divy Le Ray4d22de32007-01-18 22:04:14 -05002410 if (sleeping)
2411 check_ring_db(adap, qs, sleeping);
2412
2413 smp_mb(); /* commit Tx queue .processed updates */
2414 if (unlikely(qs->txq_stopped != 0))
2415 restart_tx(qs);
2416
2417 return is_new_response(r, q);
2418}
2419
2420/**
2421 * handle_responses - decide what to do with new responses in NAPI mode
2422 * @adap: the adapter
2423 * @q: the response queue
2424 *
2425 * This is used by the NAPI interrupt handlers to decide what to do with
2426 * new SGE responses. If there are no new responses it returns -1. If
2427 * there are new responses and they are pure (i.e., non-data carrying)
2428 * it handles them straight in hard interrupt context as they are very
2429 * cheap and don't deliver any packets. Finally, if there are any data
2430 * signaling responses it schedules the NAPI handler. Returns 1 if it
2431 * schedules NAPI, 0 if all new responses were pure.
2432 *
2433 * The caller must ascertain NAPI is not already running.
2434 */
2435static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
2436{
2437 struct sge_qset *qs = rspq_to_qset(q);
2438 struct rsp_desc *r = &q->desc[q->cidx];
2439
2440 if (!is_new_response(r, q))
2441 return -1;
2442 if (is_pure_response(r) && process_pure_responses(adap, qs, r) == 0) {
2443 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2444 V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
2445 return 0;
2446 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002447 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002448 return 1;
2449}
2450
2451/*
2452 * The MSI-X interrupt handler for an SGE response queue for the non-NAPI case
2453 * (i.e., response queue serviced in hard interrupt).
2454 */
2455irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
2456{
2457 struct sge_qset *qs = cookie;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002458 struct adapter *adap = qs->adap;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002459 struct sge_rspq *q = &qs->rspq;
2460
2461 spin_lock(&q->lock);
2462 if (process_responses(adap, qs, -1) == 0)
2463 q->unhandled_irqs++;
2464 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2465 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2466 spin_unlock(&q->lock);
2467 return IRQ_HANDLED;
2468}
2469
2470/*
2471 * The MSI-X interrupt handler for an SGE response queue for the NAPI case
2472 * (i.e., response queue serviced by NAPI polling).
2473 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -07002474static irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002475{
2476 struct sge_qset *qs = cookie;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002477 struct sge_rspq *q = &qs->rspq;
2478
2479 spin_lock(&q->lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002480
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002481 if (handle_responses(qs->adap, q) < 0)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002482 q->unhandled_irqs++;
2483 spin_unlock(&q->lock);
2484 return IRQ_HANDLED;
2485}
2486
2487/*
2488 * The non-NAPI MSI interrupt handler. This needs to handle data events from
2489 * SGE response queues as well as error and other async events as they all use
2490 * the same MSI vector. We use one SGE response queue per port in this mode
2491 * and protect all response queues with queue 0's lock.
2492 */
2493static irqreturn_t t3_intr_msi(int irq, void *cookie)
2494{
2495 int new_packets = 0;
2496 struct adapter *adap = cookie;
2497 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2498
2499 spin_lock(&q->lock);
2500
2501 if (process_responses(adap, &adap->sge.qs[0], -1)) {
2502 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q->cntxt_id) |
2503 V_NEWTIMER(q->next_holdoff) | V_NEWINDEX(q->cidx));
2504 new_packets = 1;
2505 }
2506
2507 if (adap->params.nports == 2 &&
2508 process_responses(adap, &adap->sge.qs[1], -1)) {
2509 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2510
2511 t3_write_reg(adap, A_SG_GTS, V_RSPQ(q1->cntxt_id) |
2512 V_NEWTIMER(q1->next_holdoff) |
2513 V_NEWINDEX(q1->cidx));
2514 new_packets = 1;
2515 }
2516
2517 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2518 q->unhandled_irqs++;
2519
2520 spin_unlock(&q->lock);
2521 return IRQ_HANDLED;
2522}
2523
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002524static int rspq_check_napi(struct sge_qset *qs)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002525{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002526 struct sge_rspq *q = &qs->rspq;
2527
2528 if (!napi_is_scheduled(&qs->napi) &&
2529 is_new_response(&q->desc[q->cidx], q)) {
2530 napi_schedule(&qs->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002531 return 1;
2532 }
2533 return 0;
2534}
2535
2536/*
2537 * The MSI interrupt handler for the NAPI case (i.e., response queues serviced
2538 * by NAPI polling). Handles data events from SGE response queues as well as
2539 * error and other async events as they all use the same MSI vector. We use
2540 * one SGE response queue per port in this mode and protect all response
2541 * queues with queue 0's lock.
2542 */
Stephen Hemminger9265fab2007-10-08 16:22:29 -07002543static irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002544{
2545 int new_packets;
2546 struct adapter *adap = cookie;
2547 struct sge_rspq *q = &adap->sge.qs[0].rspq;
2548
2549 spin_lock(&q->lock);
2550
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002551 new_packets = rspq_check_napi(&adap->sge.qs[0]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002552 if (adap->params.nports == 2)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002553 new_packets += rspq_check_napi(&adap->sge.qs[1]);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002554 if (!new_packets && t3_slow_intr_handler(adap) == 0)
2555 q->unhandled_irqs++;
2556
2557 spin_unlock(&q->lock);
2558 return IRQ_HANDLED;
2559}
2560
2561/*
2562 * A helper function that processes responses and issues GTS.
2563 */
2564static inline int process_responses_gts(struct adapter *adap,
2565 struct sge_rspq *rq)
2566{
2567 int work;
2568
2569 work = process_responses(adap, rspq_to_qset(rq), -1);
2570 t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2571 V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2572 return work;
2573}
2574
2575/*
2576 * The legacy INTx interrupt handler. This needs to handle data events from
2577 * SGE response queues as well as error and other async events as they all use
2578 * the same interrupt pin. We use one SGE response queue per port in this mode
2579 * and protect all response queues with queue 0's lock.
2580 */
2581static irqreturn_t t3_intr(int irq, void *cookie)
2582{
2583 int work_done, w0, w1;
2584 struct adapter *adap = cookie;
2585 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2586 struct sge_rspq *q1 = &adap->sge.qs[1].rspq;
2587
2588 spin_lock(&q0->lock);
2589
2590 w0 = is_new_response(&q0->desc[q0->cidx], q0);
2591 w1 = adap->params.nports == 2 &&
2592 is_new_response(&q1->desc[q1->cidx], q1);
2593
2594 if (likely(w0 | w1)) {
2595 t3_write_reg(adap, A_PL_CLI, 0);
2596 t3_read_reg(adap, A_PL_CLI); /* flush */
2597
2598 if (likely(w0))
2599 process_responses_gts(adap, q0);
2600
2601 if (w1)
2602 process_responses_gts(adap, q1);
2603
2604 work_done = w0 | w1;
2605 } else
2606 work_done = t3_slow_intr_handler(adap);
2607
2608 spin_unlock(&q0->lock);
2609 return IRQ_RETVAL(work_done != 0);
2610}
2611
2612/*
2613 * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2614 * Handles data events from SGE response queues as well as error and other
2615 * async events as they all use the same interrupt pin. We use one SGE
2616 * response queue per port in this mode and protect all response queues with
2617 * queue 0's lock.
2618 */
2619static irqreturn_t t3b_intr(int irq, void *cookie)
2620{
2621 u32 map;
2622 struct adapter *adap = cookie;
2623 struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2624
2625 t3_write_reg(adap, A_PL_CLI, 0);
2626 map = t3_read_reg(adap, A_SG_DATA_INTR);
2627
2628 if (unlikely(!map)) /* shared interrupt, most likely */
2629 return IRQ_NONE;
2630
2631 spin_lock(&q0->lock);
2632
2633 if (unlikely(map & F_ERRINTR))
2634 t3_slow_intr_handler(adap);
2635
2636 if (likely(map & 1))
2637 process_responses_gts(adap, q0);
2638
2639 if (map & 2)
2640 process_responses_gts(adap, &adap->sge.qs[1].rspq);
2641
2642 spin_unlock(&q0->lock);
2643 return IRQ_HANDLED;
2644}
2645
2646/*
2647 * NAPI interrupt handler for legacy INTx interrupts for T3B-based cards.
2648 * Handles data events from SGE response queues as well as error and other
2649 * async events as they all use the same interrupt pin. We use one SGE
2650 * response queue per port in this mode and protect all response queues with
2651 * queue 0's lock.
2652 */
2653static irqreturn_t t3b_intr_napi(int irq, void *cookie)
2654{
2655 u32 map;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002656 struct adapter *adap = cookie;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002657 struct sge_qset *qs0 = &adap->sge.qs[0];
2658 struct sge_rspq *q0 = &qs0->rspq;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002659
2660 t3_write_reg(adap, A_PL_CLI, 0);
2661 map = t3_read_reg(adap, A_SG_DATA_INTR);
2662
2663 if (unlikely(!map)) /* shared interrupt, most likely */
2664 return IRQ_NONE;
2665
2666 spin_lock(&q0->lock);
2667
2668 if (unlikely(map & F_ERRINTR))
2669 t3_slow_intr_handler(adap);
2670
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002671 if (likely(map & 1))
2672 napi_schedule(&qs0->napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002673
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002674 if (map & 2)
2675 napi_schedule(&adap->sge.qs[1].napi);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002676
2677 spin_unlock(&q0->lock);
2678 return IRQ_HANDLED;
2679}
2680
2681/**
2682 * t3_intr_handler - select the top-level interrupt handler
2683 * @adap: the adapter
2684 * @polling: whether using NAPI to service response queues
2685 *
2686 * Selects the top-level interrupt handler based on the type of interrupts
2687 * (MSI-X, MSI, or legacy) and whether NAPI will be used to service the
2688 * response queues.
2689 */
Jeff Garzik7c239972007-10-19 03:12:20 -04002690irq_handler_t t3_intr_handler(struct adapter *adap, int polling)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002691{
2692 if (adap->flags & USING_MSIX)
2693 return polling ? t3_sge_intr_msix_napi : t3_sge_intr_msix;
2694 if (adap->flags & USING_MSI)
2695 return polling ? t3_intr_msi_napi : t3_intr_msi;
2696 if (adap->params.rev > 0)
2697 return polling ? t3b_intr_napi : t3b_intr;
2698 return t3_intr;
2699}
2700
Divy Le Rayb8819552007-12-17 18:47:31 -08002701#define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
2702 F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
2703 V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
2704 F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
2705 F_HIRCQPARITYERROR)
2706#define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
2707#define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
2708 F_RSPQDISABLED)
2709
Divy Le Ray4d22de32007-01-18 22:04:14 -05002710/**
2711 * t3_sge_err_intr_handler - SGE async event interrupt handler
2712 * @adapter: the adapter
2713 *
2714 * Interrupt handler for SGE asynchronous (non-data) events.
2715 */
2716void t3_sge_err_intr_handler(struct adapter *adapter)
2717{
2718 unsigned int v, status = t3_read_reg(adapter, A_SG_INT_CAUSE);
2719
Divy Le Rayb8819552007-12-17 18:47:31 -08002720 if (status & SGE_PARERR)
2721 CH_ALERT(adapter, "SGE parity error (0x%x)\n",
2722 status & SGE_PARERR);
2723 if (status & SGE_FRAMINGERR)
2724 CH_ALERT(adapter, "SGE framing error (0x%x)\n",
2725 status & SGE_FRAMINGERR);
2726
Divy Le Ray4d22de32007-01-18 22:04:14 -05002727 if (status & F_RSPQCREDITOVERFOW)
2728 CH_ALERT(adapter, "SGE response queue credit overflow\n");
2729
2730 if (status & F_RSPQDISABLED) {
2731 v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
2732
2733 CH_ALERT(adapter,
2734 "packet delivered to disabled response queue "
2735 "(0x%x)\n", (v >> S_RSPQ0DISABLED) & 0xff);
2736 }
2737
Divy Le Ray6e3f03b2007-08-21 20:49:10 -07002738 if (status & (F_HIPIODRBDROPERR | F_LOPIODRBDROPERR))
2739 CH_ALERT(adapter, "SGE dropped %s priority doorbell\n",
2740 status & F_HIPIODRBDROPERR ? "high" : "lo");
2741
Divy Le Ray4d22de32007-01-18 22:04:14 -05002742 t3_write_reg(adapter, A_SG_INT_CAUSE, status);
Divy Le Rayb8819552007-12-17 18:47:31 -08002743 if (status & SGE_FATALERR)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002744 t3_fatal_err(adapter);
2745}
2746
2747/**
2748 * sge_timer_cb - perform periodic maintenance of an SGE qset
2749 * @data: the SGE queue set to maintain
2750 *
2751 * Runs periodically from a timer to perform maintenance of an SGE queue
2752 * set. It performs two tasks:
2753 *
2754 * a) Cleans up any completed Tx descriptors that may still be pending.
2755 * Normal descriptor cleanup happens when new packets are added to a Tx
2756 * queue so this timer is relatively infrequent and does any cleanup only
2757 * if the Tx queue has not seen any new packets in a while. We make a
2758 * best effort attempt to reclaim descriptors, in that we don't wait
2759 * around if we cannot get a queue's lock (which most likely is because
2760 * someone else is queueing new packets and so will also handle the clean
2761 * up). Since control queues use immediate data exclusively we don't
2762 * bother cleaning them up here.
2763 *
2764 * b) Replenishes Rx queues that have run out due to memory shortage.
2765 * Normally new Rx buffers are added when existing ones are consumed but
2766 * when out of memory a queue can become empty. We try to add only a few
2767 * buffers here, the queue will be replenished fully as these new buffers
2768 * are used up if memory shortage has subsided.
2769 */
2770static void sge_timer_cb(unsigned long data)
2771{
2772 spinlock_t *lock;
2773 struct sge_qset *qs = (struct sge_qset *)data;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002774 struct adapter *adap = qs->adap;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002775
2776 if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
2777 reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
2778 spin_unlock(&qs->txq[TXQ_ETH].lock);
2779 }
2780 if (spin_trylock(&qs->txq[TXQ_OFLD].lock)) {
2781 reclaim_completed_tx(adap, &qs->txq[TXQ_OFLD]);
2782 spin_unlock(&qs->txq[TXQ_OFLD].lock);
2783 }
2784 lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002785 &adap->sge.qs[0].rspq.lock;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002786 if (spin_trylock_irq(lock)) {
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002787 if (!napi_is_scheduled(&qs->napi)) {
Divy Le Raybae73f42007-02-24 16:44:12 -08002788 u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
2789
Divy Le Ray4d22de32007-01-18 22:04:14 -05002790 if (qs->fl[0].credits < qs->fl[0].size)
2791 __refill_fl(adap, &qs->fl[0]);
2792 if (qs->fl[1].credits < qs->fl[1].size)
2793 __refill_fl(adap, &qs->fl[1]);
Divy Le Raybae73f42007-02-24 16:44:12 -08002794
2795 if (status & (1 << qs->rspq.cntxt_id)) {
2796 qs->rspq.starved++;
2797 if (qs->rspq.credits) {
2798 refill_rspq(adap, &qs->rspq, 1);
2799 qs->rspq.credits--;
2800 qs->rspq.restarted++;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002801 t3_write_reg(adap, A_SG_RSPQ_FL_STATUS,
Divy Le Raybae73f42007-02-24 16:44:12 -08002802 1 << qs->rspq.cntxt_id);
2803 }
2804 }
Divy Le Ray4d22de32007-01-18 22:04:14 -05002805 }
2806 spin_unlock_irq(lock);
2807 }
2808 mod_timer(&qs->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
2809}
2810
2811/**
2812 * t3_update_qset_coalesce - update coalescing settings for a queue set
2813 * @qs: the SGE queue set
2814 * @p: new queue set parameters
2815 *
2816 * Update the coalescing settings for an SGE queue set. Nothing is done
2817 * if the queue set is not initialized yet.
2818 */
2819void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
2820{
Divy Le Ray4d22de32007-01-18 22:04:14 -05002821 qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
2822 qs->rspq.polling = p->polling;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002823 qs->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002824}
2825
2826/**
2827 * t3_sge_alloc_qset - initialize an SGE queue set
2828 * @adapter: the adapter
2829 * @id: the queue set id
2830 * @nports: how many Ethernet ports will be using this queue set
2831 * @irq_vec_idx: the IRQ vector index for response queue interrupts
2832 * @p: configuration parameters for this queue set
2833 * @ntxq: number of Tx queues for the queue set
2834 * @netdev: net device associated with this queue set
Divy Le Ray82ad3322008-12-16 01:09:39 -08002835 * @netdevq: net device TX queue associated with this queue set
Divy Le Ray4d22de32007-01-18 22:04:14 -05002836 *
2837 * Allocate resources and initialize an SGE queue set. A queue set
2838 * comprises a response queue, two Rx free-buffer queues, and up to 3
2839 * Tx queues. The Tx queues are assigned roles in the order Ethernet
2840 * queue, offload queue, and control queue.
2841 */
2842int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
2843 int irq_vec_idx, const struct qset_params *p,
Divy Le Ray82ad3322008-12-16 01:09:39 -08002844 int ntxq, struct net_device *dev,
2845 struct netdev_queue *netdevq)
Divy Le Ray4d22de32007-01-18 22:04:14 -05002846{
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002847 int i, avail, ret = -ENOMEM;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002848 struct sge_qset *q = &adapter->sge.qs[id];
2849
2850 init_qset_cntxt(q, id);
Divy Le Ray20d3fc12008-10-08 17:36:03 -07002851 setup_timer(&q->tx_reclaim_timer, sge_timer_cb, (unsigned long)q);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002852
2853 q->fl[0].desc = alloc_ring(adapter->pdev, p->fl_size,
2854 sizeof(struct rx_desc),
2855 sizeof(struct rx_sw_desc),
2856 &q->fl[0].phys_addr, &q->fl[0].sdesc);
2857 if (!q->fl[0].desc)
2858 goto err;
2859
2860 q->fl[1].desc = alloc_ring(adapter->pdev, p->jumbo_size,
2861 sizeof(struct rx_desc),
2862 sizeof(struct rx_sw_desc),
2863 &q->fl[1].phys_addr, &q->fl[1].sdesc);
2864 if (!q->fl[1].desc)
2865 goto err;
2866
2867 q->rspq.desc = alloc_ring(adapter->pdev, p->rspq_size,
2868 sizeof(struct rsp_desc), 0,
2869 &q->rspq.phys_addr, NULL);
2870 if (!q->rspq.desc)
2871 goto err;
2872
2873 for (i = 0; i < ntxq; ++i) {
2874 /*
2875 * The control queue always uses immediate data so does not
2876 * need to keep track of any sk_buffs.
2877 */
2878 size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2879
2880 q->txq[i].desc = alloc_ring(adapter->pdev, p->txq_size[i],
2881 sizeof(struct tx_desc), sz,
2882 &q->txq[i].phys_addr,
2883 &q->txq[i].sdesc);
2884 if (!q->txq[i].desc)
2885 goto err;
2886
2887 q->txq[i].gen = 1;
2888 q->txq[i].size = p->txq_size[i];
2889 spin_lock_init(&q->txq[i].lock);
2890 skb_queue_head_init(&q->txq[i].sendq);
2891 }
2892
2893 tasklet_init(&q->txq[TXQ_OFLD].qresume_tsk, restart_offloadq,
2894 (unsigned long)q);
2895 tasklet_init(&q->txq[TXQ_CTRL].qresume_tsk, restart_ctrlq,
2896 (unsigned long)q);
2897
2898 q->fl[0].gen = q->fl[1].gen = 1;
2899 q->fl[0].size = p->fl_size;
2900 q->fl[1].size = p->jumbo_size;
2901
2902 q->rspq.gen = 1;
2903 q->rspq.size = p->rspq_size;
2904 spin_lock_init(&q->rspq.lock);
David S. Miller147e70e2008-09-22 01:29:52 -07002905 skb_queue_head_init(&q->rspq.rx_queue);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002906
2907 q->txq[TXQ_ETH].stop_thres = nports *
2908 flits_to_desc(sgl_len(MAX_SKB_FRAGS + 1) + 3);
2909
Divy Le Raycf992af2007-05-30 21:10:47 -07002910#if FL0_PG_CHUNK_SIZE > 0
2911 q->fl[0].buf_size = FL0_PG_CHUNK_SIZE;
Divy Le Raye0994eb2007-02-24 16:44:17 -08002912#else
Divy Le Raycf992af2007-05-30 21:10:47 -07002913 q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data);
Divy Le Raye0994eb2007-02-24 16:44:17 -08002914#endif
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002915#if FL1_PG_CHUNK_SIZE > 0
2916 q->fl[1].buf_size = FL1_PG_CHUNK_SIZE;
2917#else
Divy Le Raycf992af2007-05-30 21:10:47 -07002918 q->fl[1].buf_size = is_offload(adapter) ?
2919 (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
2920 MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt);
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002921#endif
2922
2923 q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0;
2924 q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0;
2925 q->fl[0].order = FL0_PG_ORDER;
2926 q->fl[1].order = FL1_PG_ORDER;
Divy Le Ray4d22de32007-01-18 22:04:14 -05002927
Roland Dreierb1186de2008-03-20 13:30:48 -07002928 spin_lock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002929
2930 /* FL threshold comparison uses < */
2931 ret = t3_sge_init_rspcntxt(adapter, q->rspq.cntxt_id, irq_vec_idx,
2932 q->rspq.phys_addr, q->rspq.size,
2933 q->fl[0].buf_size, 1, 0);
2934 if (ret)
2935 goto err_unlock;
2936
2937 for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2938 ret = t3_sge_init_flcntxt(adapter, q->fl[i].cntxt_id, 0,
2939 q->fl[i].phys_addr, q->fl[i].size,
2940 q->fl[i].buf_size, p->cong_thres, 1,
2941 0);
2942 if (ret)
2943 goto err_unlock;
2944 }
2945
2946 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2947 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2948 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2949 1, 0);
2950 if (ret)
2951 goto err_unlock;
2952
2953 if (ntxq > 1) {
2954 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_OFLD].cntxt_id,
2955 USE_GTS, SGE_CNTXT_OFLD, id,
2956 q->txq[TXQ_OFLD].phys_addr,
2957 q->txq[TXQ_OFLD].size, 0, 1, 0);
2958 if (ret)
2959 goto err_unlock;
2960 }
2961
2962 if (ntxq > 2) {
2963 ret = t3_sge_init_ecntxt(adapter, q->txq[TXQ_CTRL].cntxt_id, 0,
2964 SGE_CNTXT_CTRL, id,
2965 q->txq[TXQ_CTRL].phys_addr,
2966 q->txq[TXQ_CTRL].size,
2967 q->txq[TXQ_CTRL].token, 1, 0);
2968 if (ret)
2969 goto err_unlock;
2970 }
2971
Roland Dreierb1186de2008-03-20 13:30:48 -07002972 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002973
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002974 q->adap = adapter;
2975 q->netdev = dev;
Divy Le Ray82ad3322008-12-16 01:09:39 -08002976 q->tx_q = netdevq;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002977 t3_update_qset_coalesce(q, p);
Divy Le Rayb47385b2008-05-21 18:56:26 -07002978
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002979 avail = refill_fl(adapter, &q->fl[0], q->fl[0].size,
2980 GFP_KERNEL | __GFP_COMP);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002981 if (!avail) {
2982 CH_ALERT(adapter, "free list queue 0 initialization failed\n");
2983 goto err;
2984 }
2985 if (avail < q->fl[0].size)
2986 CH_WARN(adapter, "free list queue 0 enabled with %d credits\n",
2987 avail);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002988
Divy Le Ray7385ecf2008-05-21 18:56:21 -07002989 avail = refill_fl(adapter, &q->fl[1], q->fl[1].size,
2990 GFP_KERNEL | __GFP_COMP);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07002991 if (avail < q->fl[1].size)
2992 CH_WARN(adapter, "free list queue 1 enabled with %d credits\n",
2993 avail);
Divy Le Ray4d22de32007-01-18 22:04:14 -05002994 refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
2995
2996 t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2997 V_NEWTIMER(q->rspq.holdoff_tmr));
2998
2999 mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD);
3000 return 0;
3001
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07003002err_unlock:
Roland Dreierb1186de2008-03-20 13:30:48 -07003003 spin_unlock_irq(&adapter->sge.reg_lock);
Divy Le Rayb1fb1f22008-05-21 18:56:16 -07003004err:
Divy Le Ray4d22de32007-01-18 22:04:14 -05003005 t3_free_qset(adapter, q);
3006 return ret;
3007}
3008
3009/**
Divy Le Ray0ca41c02008-09-25 14:05:28 +00003010 * t3_stop_sge_timers - stop SGE timer call backs
3011 * @adap: the adapter
3012 *
3013 * Stops each SGE queue set's timer call back
3014 */
3015void t3_stop_sge_timers(struct adapter *adap)
3016{
3017 int i;
3018
3019 for (i = 0; i < SGE_QSETS; ++i) {
3020 struct sge_qset *q = &adap->sge.qs[i];
3021
3022 if (q->tx_reclaim_timer.function)
3023 del_timer_sync(&q->tx_reclaim_timer);
3024 }
3025}
3026
3027/**
Divy Le Ray4d22de32007-01-18 22:04:14 -05003028 * t3_free_sge_resources - free SGE resources
3029 * @adap: the adapter
3030 *
3031 * Frees resources used by the SGE queue sets.
3032 */
3033void t3_free_sge_resources(struct adapter *adap)
3034{
3035 int i;
3036
3037 for (i = 0; i < SGE_QSETS; ++i)
3038 t3_free_qset(adap, &adap->sge.qs[i]);
3039}
3040
3041/**
3042 * t3_sge_start - enable SGE
3043 * @adap: the adapter
3044 *
3045 * Enables the SGE for DMAs. This is the last step in starting packet
3046 * transfers.
3047 */
3048void t3_sge_start(struct adapter *adap)
3049{
3050 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
3051}
3052
3053/**
3054 * t3_sge_stop - disable SGE operation
3055 * @adap: the adapter
3056 *
3057 * Disables the DMA engine. This can be called in emeregencies (e.g.,
3058 * from error interrupts) or from normal process context. In the latter
3059 * case it also disables any pending queue restart tasklets. Note that
3060 * if it is called in interrupt context it cannot disable the restart
3061 * tasklets as it cannot wait, however the tasklets will have no effect
3062 * since the doorbells are disabled and the driver will call this again
3063 * later from process context, at which time the tasklets will be stopped
3064 * if they are still running.
3065 */
3066void t3_sge_stop(struct adapter *adap)
3067{
3068 t3_set_reg_field(adap, A_SG_CONTROL, F_GLOBALENABLE, 0);
3069 if (!in_interrupt()) {
3070 int i;
3071
3072 for (i = 0; i < SGE_QSETS; ++i) {
3073 struct sge_qset *qs = &adap->sge.qs[i];
3074
3075 tasklet_kill(&qs->txq[TXQ_OFLD].qresume_tsk);
3076 tasklet_kill(&qs->txq[TXQ_CTRL].qresume_tsk);
3077 }
3078 }
3079}
3080
3081/**
3082 * t3_sge_init - initialize SGE
3083 * @adap: the adapter
3084 * @p: the SGE parameters
3085 *
3086 * Performs SGE initialization needed every time after a chip reset.
3087 * We do not initialize any of the queue sets here, instead the driver
3088 * top-level must request those individually. We also do not enable DMA
3089 * here, that should be done after the queues have been set up.
3090 */
3091void t3_sge_init(struct adapter *adap, struct sge_params *p)
3092{
3093 unsigned int ctrl, ups = ffs(pci_resource_len(adap->pdev, 2) >> 12);
3094
3095 ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
Divy Le Rayb8819552007-12-17 18:47:31 -08003096 F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
Divy Le Ray4d22de32007-01-18 22:04:14 -05003097 V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
3098 V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
3099#if SGE_NUM_GENBITS == 1
3100 ctrl |= F_EGRGENCTRL;
3101#endif
3102 if (adap->params.rev > 0) {
3103 if (!(adap->flags & (USING_MSIX | USING_MSI)))
3104 ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
Divy Le Ray4d22de32007-01-18 22:04:14 -05003105 }
3106 t3_write_reg(adap, A_SG_CONTROL, ctrl);
3107 t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
3108 V_LORCQDRBTHRSH(512));
3109 t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
3110 t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
Divy Le Ray6195c712007-01-30 19:43:56 -08003111 V_TIMEOUT(200 * core_ticks_per_usec(adap)));
Divy Le Rayb8819552007-12-17 18:47:31 -08003112 t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
3113 adap->params.rev < T3_REV_C ? 1000 : 500);
Divy Le Ray4d22de32007-01-18 22:04:14 -05003114 t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
3115 t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
3116 t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
3117 t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
3118 t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
3119}
3120
3121/**
3122 * t3_sge_prep - one-time SGE initialization
3123 * @adap: the associated adapter
3124 * @p: SGE parameters
3125 *
3126 * Performs one-time initialization of SGE SW state. Includes determining
3127 * defaults for the assorted SGE parameters, which admins can change until
3128 * they are used to initialize the SGE.
3129 */
Roland Dreier7b9b0942008-01-29 14:45:11 -08003130void t3_sge_prep(struct adapter *adap, struct sge_params *p)
Divy Le Ray4d22de32007-01-18 22:04:14 -05003131{
3132 int i;
3133
3134 p->max_pkt_size = (16 * 1024) - sizeof(struct cpl_rx_data) -
3135 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
3136
3137 for (i = 0; i < SGE_QSETS; ++i) {
3138 struct qset_params *q = p->qset + i;
3139
3140 q->polling = adap->params.rev > 0;
3141 q->coalesce_usecs = 5;
3142 q->rspq_size = 1024;
Divy Le Raye0994eb2007-02-24 16:44:17 -08003143 q->fl_size = 1024;
Divy Le Ray7385ecf2008-05-21 18:56:21 -07003144 q->jumbo_size = 512;
Divy Le Ray4d22de32007-01-18 22:04:14 -05003145 q->txq_size[TXQ_ETH] = 1024;
3146 q->txq_size[TXQ_OFLD] = 1024;
3147 q->txq_size[TXQ_CTRL] = 256;
3148 q->cong_thres = 0;
3149 }
3150
3151 spin_lock_init(&adap->sge.reg_lock);
3152}
3153
3154/**
3155 * t3_get_desc - dump an SGE descriptor for debugging purposes
3156 * @qs: the queue set
3157 * @qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3158 * @idx: the descriptor index in the queue
3159 * @data: where to dump the descriptor contents
3160 *
3161 * Dumps the contents of a HW descriptor of an SGE queue. Returns the
3162 * size of the descriptor.
3163 */
3164int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3165 unsigned char *data)
3166{
3167 if (qnum >= 6)
3168 return -EINVAL;
3169
3170 if (qnum < 3) {
3171 if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3172 return -EINVAL;
3173 memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3174 return sizeof(struct tx_desc);
3175 }
3176
3177 if (qnum == 3) {
3178 if (!qs->rspq.desc || idx >= qs->rspq.size)
3179 return -EINVAL;
3180 memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3181 return sizeof(struct rsp_desc);
3182 }
3183
3184 qnum -= 4;
3185 if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3186 return -EINVAL;
3187 memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3188 return sizeof(struct rx_desc);
3189}