blob: fbb866b2185ebdd5e37317f9f18341e23ed684e6 [file] [log] [blame]
Ben Hutchings8ceee662008-04-27 12:55:59 +01001/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
4 * Copyright 2005-2008 Solarflare Communications Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include <linux/pci.h>
12#include <linux/tcp.h>
13#include <linux/ip.h>
14#include <linux/in.h>
15#include <linux/if_ether.h>
16#include <linux/highmem.h>
17#include "net_driver.h"
18#include "tx.h"
19#include "efx.h"
20#include "falcon.h"
21#include "workarounds.h"
22
23/*
24 * TX descriptor ring full threshold
25 *
26 * The tx_queue descriptor ring fill-level must fall below this value
27 * before we restart the netif queue
28 */
29#define EFX_NETDEV_TX_THRESHOLD(_tx_queue) \
30 (_tx_queue->efx->type->txd_ring_mask / 2u)
31
32/* We want to be able to nest calls to netif_stop_queue(), since each
33 * channel can have an individual stop on the queue.
34 */
35void efx_stop_queue(struct efx_nic *efx)
36{
37 spin_lock_bh(&efx->netif_stop_lock);
38 EFX_TRACE(efx, "stop TX queue\n");
39
40 atomic_inc(&efx->netif_stop_count);
41 netif_stop_queue(efx->net_dev);
42
43 spin_unlock_bh(&efx->netif_stop_lock);
44}
45
46/* Wake netif's TX queue
47 * We want to be able to nest calls to netif_stop_queue(), since each
48 * channel can have an individual stop on the queue.
49 */
50inline void efx_wake_queue(struct efx_nic *efx)
51{
52 local_bh_disable();
53 if (atomic_dec_and_lock(&efx->netif_stop_count,
54 &efx->netif_stop_lock)) {
55 EFX_TRACE(efx, "waking TX queue\n");
56 netif_wake_queue(efx->net_dev);
57 spin_unlock(&efx->netif_stop_lock);
58 }
59 local_bh_enable();
60}
61
62static inline void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
63 struct efx_tx_buffer *buffer)
64{
65 if (buffer->unmap_len) {
66 struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
67 if (buffer->unmap_single)
68 pci_unmap_single(pci_dev, buffer->unmap_addr,
69 buffer->unmap_len, PCI_DMA_TODEVICE);
70 else
71 pci_unmap_page(pci_dev, buffer->unmap_addr,
72 buffer->unmap_len, PCI_DMA_TODEVICE);
73 buffer->unmap_len = 0;
74 buffer->unmap_single = 0;
75 }
76
77 if (buffer->skb) {
78 dev_kfree_skb_any((struct sk_buff *) buffer->skb);
79 buffer->skb = NULL;
80 EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
81 "complete\n", tx_queue->queue, read_ptr);
82 }
83}
84
85
86/*
87 * Add a socket buffer to a TX queue
88 *
89 * This maps all fragments of a socket buffer for DMA and adds them to
90 * the TX queue. The queue's insert pointer will be incremented by
91 * the number of fragments in the socket buffer.
92 *
93 * If any DMA mapping fails, any mapped fragments will be unmapped,
94 * the queue's insert pointer will be restored to its original value.
95 *
96 * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
97 * You must hold netif_tx_lock() to call this function.
98 */
99static inline int efx_enqueue_skb(struct efx_tx_queue *tx_queue,
100 const struct sk_buff *skb)
101{
102 struct efx_nic *efx = tx_queue->efx;
103 struct pci_dev *pci_dev = efx->pci_dev;
104 struct efx_tx_buffer *buffer;
105 skb_frag_t *fragment;
106 struct page *page;
107 int page_offset;
108 unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
109 dma_addr_t dma_addr, unmap_addr = 0;
110 unsigned int dma_len;
111 unsigned unmap_single;
112 int q_space, i = 0;
113 int rc = NETDEV_TX_OK;
114
115 EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
116
117 /* Get size of the initial fragment */
118 len = skb_headlen(skb);
119
120 fill_level = tx_queue->insert_count - tx_queue->old_read_count;
121 q_space = efx->type->txd_ring_mask - 1 - fill_level;
122
123 /* Map for DMA. Use pci_map_single rather than pci_map_page
124 * since this is more efficient on machines with sparse
125 * memory.
126 */
127 unmap_single = 1;
128 dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
129
130 /* Process all fragments */
131 while (1) {
132 if (unlikely(pci_dma_mapping_error(dma_addr)))
133 goto pci_err;
134
135 /* Store fields for marking in the per-fragment final
136 * descriptor */
137 unmap_len = len;
138 unmap_addr = dma_addr;
139
140 /* Add to TX queue, splitting across DMA boundaries */
141 do {
142 if (unlikely(q_space-- <= 0)) {
143 /* It might be that completions have
144 * happened since the xmit path last
145 * checked. Update the xmit path's
146 * copy of read_count.
147 */
148 ++tx_queue->stopped;
149 /* This memory barrier protects the
150 * change of stopped from the access
151 * of read_count. */
152 smp_mb();
153 tx_queue->old_read_count =
154 *(volatile unsigned *)
155 &tx_queue->read_count;
156 fill_level = (tx_queue->insert_count
157 - tx_queue->old_read_count);
158 q_space = (efx->type->txd_ring_mask - 1 -
159 fill_level);
160 if (unlikely(q_space-- <= 0))
161 goto stop;
162 smp_mb();
163 --tx_queue->stopped;
164 }
165
166 insert_ptr = (tx_queue->insert_count &
167 efx->type->txd_ring_mask);
168 buffer = &tx_queue->buffer[insert_ptr];
169 EFX_BUG_ON_PARANOID(buffer->skb);
170 EFX_BUG_ON_PARANOID(buffer->len);
171 EFX_BUG_ON_PARANOID(buffer->continuation != 1);
172 EFX_BUG_ON_PARANOID(buffer->unmap_len);
173
174 dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
175 if (likely(dma_len > len))
176 dma_len = len;
177
178 misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
179 if (misalign && dma_len + misalign > 512)
180 dma_len = 512 - misalign;
181
182 /* Fill out per descriptor fields */
183 buffer->len = dma_len;
184 buffer->dma_addr = dma_addr;
185 len -= dma_len;
186 dma_addr += dma_len;
187 ++tx_queue->insert_count;
188 } while (len);
189
190 /* Transfer ownership of the unmapping to the final buffer */
191 buffer->unmap_addr = unmap_addr;
192 buffer->unmap_single = unmap_single;
193 buffer->unmap_len = unmap_len;
194 unmap_len = 0;
195
196 /* Get address and size of next fragment */
197 if (i >= skb_shinfo(skb)->nr_frags)
198 break;
199 fragment = &skb_shinfo(skb)->frags[i];
200 len = fragment->size;
201 page = fragment->page;
202 page_offset = fragment->page_offset;
203 i++;
204 /* Map for DMA */
205 unmap_single = 0;
206 dma_addr = pci_map_page(pci_dev, page, page_offset, len,
207 PCI_DMA_TODEVICE);
208 }
209
210 /* Transfer ownership of the skb to the final buffer */
211 buffer->skb = skb;
212 buffer->continuation = 0;
213
214 /* Pass off to hardware */
215 falcon_push_buffers(tx_queue);
216
217 return NETDEV_TX_OK;
218
219 pci_err:
220 EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
221 "fragments for DMA\n", tx_queue->queue, skb->len,
222 skb_shinfo(skb)->nr_frags + 1);
223
224 /* Mark the packet as transmitted, and free the SKB ourselves */
225 dev_kfree_skb_any((struct sk_buff *)skb);
226 goto unwind;
227
228 stop:
229 rc = NETDEV_TX_BUSY;
230
231 if (tx_queue->stopped == 1)
232 efx_stop_queue(efx);
233
234 unwind:
235 /* Work backwards until we hit the original insert pointer value */
236 while (tx_queue->insert_count != tx_queue->write_count) {
237 --tx_queue->insert_count;
238 insert_ptr = tx_queue->insert_count & efx->type->txd_ring_mask;
239 buffer = &tx_queue->buffer[insert_ptr];
240 efx_dequeue_buffer(tx_queue, buffer);
241 buffer->len = 0;
242 }
243
244 /* Free the fragment we were mid-way through pushing */
245 if (unmap_len)
246 pci_unmap_page(pci_dev, unmap_addr, unmap_len,
247 PCI_DMA_TODEVICE);
248
249 return rc;
250}
251
252/* Remove packets from the TX queue
253 *
254 * This removes packets from the TX queue, up to and including the
255 * specified index.
256 */
257static inline void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
258 unsigned int index)
259{
260 struct efx_nic *efx = tx_queue->efx;
261 unsigned int stop_index, read_ptr;
262 unsigned int mask = tx_queue->efx->type->txd_ring_mask;
263
264 stop_index = (index + 1) & mask;
265 read_ptr = tx_queue->read_count & mask;
266
267 while (read_ptr != stop_index) {
268 struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
269 if (unlikely(buffer->len == 0)) {
270 EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
271 "completion id %x\n", tx_queue->queue,
272 read_ptr);
273 efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
274 return;
275 }
276
277 efx_dequeue_buffer(tx_queue, buffer);
278 buffer->continuation = 1;
279 buffer->len = 0;
280
281 ++tx_queue->read_count;
282 read_ptr = tx_queue->read_count & mask;
283 }
284}
285
286/* Initiate a packet transmission on the specified TX queue.
287 * Note that returning anything other than NETDEV_TX_OK will cause the
288 * OS to free the skb.
289 *
290 * This function is split out from efx_hard_start_xmit to allow the
291 * loopback test to direct packets via specific TX queues. It is
292 * therefore a non-static inline, so as not to penalise performance
293 * for non-loopback transmissions.
294 *
295 * Context: netif_tx_lock held
296 */
297inline int efx_xmit(struct efx_nic *efx,
298 struct efx_tx_queue *tx_queue, struct sk_buff *skb)
299{
300 int rc;
301
302 /* Map fragments for DMA and add to TX queue */
303 rc = efx_enqueue_skb(tx_queue, skb);
304 if (unlikely(rc != NETDEV_TX_OK))
305 goto out;
306
307 /* Update last TX timer */
308 efx->net_dev->trans_start = jiffies;
309
310 out:
311 return rc;
312}
313
314/* Initiate a packet transmission. We use one channel per CPU
315 * (sharing when we have more CPUs than channels). On Falcon, the TX
316 * completion events will be directed back to the CPU that transmitted
317 * the packet, which should be cache-efficient.
318 *
319 * Context: non-blocking.
320 * Note that returning anything other than NETDEV_TX_OK will cause the
321 * OS to free the skb.
322 */
323int efx_hard_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
324{
325 struct efx_nic *efx = net_dev->priv;
326 return efx_xmit(efx, &efx->tx_queue[0], skb);
327}
328
329void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
330{
331 unsigned fill_level;
332 struct efx_nic *efx = tx_queue->efx;
333
334 EFX_BUG_ON_PARANOID(index > efx->type->txd_ring_mask);
335
336 efx_dequeue_buffers(tx_queue, index);
337
338 /* See if we need to restart the netif queue. This barrier
339 * separates the update of read_count from the test of
340 * stopped. */
341 smp_mb();
342 if (unlikely(tx_queue->stopped)) {
343 fill_level = tx_queue->insert_count - tx_queue->read_count;
344 if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) {
345 EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx));
346
347 /* Do this under netif_tx_lock(), to avoid racing
348 * with efx_xmit(). */
349 netif_tx_lock(efx->net_dev);
350 if (tx_queue->stopped) {
351 tx_queue->stopped = 0;
352 efx_wake_queue(efx);
353 }
354 netif_tx_unlock(efx->net_dev);
355 }
356 }
357}
358
359int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
360{
361 struct efx_nic *efx = tx_queue->efx;
362 unsigned int txq_size;
363 int i, rc;
364
365 EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
366
367 /* Allocate software ring */
368 txq_size = (efx->type->txd_ring_mask + 1) * sizeof(*tx_queue->buffer);
369 tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
370 if (!tx_queue->buffer) {
371 rc = -ENOMEM;
372 goto fail1;
373 }
374 for (i = 0; i <= efx->type->txd_ring_mask; ++i)
375 tx_queue->buffer[i].continuation = 1;
376
377 /* Allocate hardware ring */
378 rc = falcon_probe_tx(tx_queue);
379 if (rc)
380 goto fail2;
381
382 return 0;
383
384 fail2:
385 kfree(tx_queue->buffer);
386 tx_queue->buffer = NULL;
387 fail1:
388 tx_queue->used = 0;
389
390 return rc;
391}
392
393int efx_init_tx_queue(struct efx_tx_queue *tx_queue)
394{
395 EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
396
397 tx_queue->insert_count = 0;
398 tx_queue->write_count = 0;
399 tx_queue->read_count = 0;
400 tx_queue->old_read_count = 0;
401 BUG_ON(tx_queue->stopped);
402
403 /* Set up TX descriptor ring */
404 return falcon_init_tx(tx_queue);
405}
406
407void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
408{
409 struct efx_tx_buffer *buffer;
410
411 if (!tx_queue->buffer)
412 return;
413
414 /* Free any buffers left in the ring */
415 while (tx_queue->read_count != tx_queue->write_count) {
416 buffer = &tx_queue->buffer[tx_queue->read_count &
417 tx_queue->efx->type->txd_ring_mask];
418 efx_dequeue_buffer(tx_queue, buffer);
419 buffer->continuation = 1;
420 buffer->len = 0;
421
422 ++tx_queue->read_count;
423 }
424}
425
426void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
427{
428 EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
429
430 /* Flush TX queue, remove descriptor ring */
431 falcon_fini_tx(tx_queue);
432
433 efx_release_tx_buffers(tx_queue);
434
435 /* Release queue's stop on port, if any */
436 if (tx_queue->stopped) {
437 tx_queue->stopped = 0;
438 efx_wake_queue(tx_queue->efx);
439 }
440}
441
442void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
443{
444 EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
445 falcon_remove_tx(tx_queue);
446
447 kfree(tx_queue->buffer);
448 tx_queue->buffer = NULL;
449 tx_queue->used = 0;
450}
451
452