Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2007 Mellanox Technologies. All rights reserved. |
| 3 | * |
| 4 | * This software is available to you under a choice of one of two |
| 5 | * licenses. You may choose to be licensed under the terms of the GNU |
| 6 | * General Public License (GPL) Version 2, available from the file |
| 7 | * COPYING in the main directory of this source tree, or the |
| 8 | * OpenIB.org BSD license below: |
| 9 | * |
| 10 | * Redistribution and use in source and binary forms, with or |
| 11 | * without modification, are permitted provided that the following |
| 12 | * conditions are met: |
| 13 | * |
| 14 | * - Redistributions of source code must retain the above |
| 15 | * copyright notice, this list of conditions and the following |
| 16 | * disclaimer. |
| 17 | * |
| 18 | * - Redistributions in binary form must reproduce the above |
| 19 | * copyright notice, this list of conditions and the following |
| 20 | * disclaimer in the documentation and/or other materials |
| 21 | * provided with the distribution. |
| 22 | * |
| 23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 30 | * SOFTWARE. |
| 31 | * |
| 32 | */ |
| 33 | |
| 34 | #include <asm/page.h> |
| 35 | #include <linux/mlx4/cq.h> |
| 36 | #include <linux/mlx4/qp.h> |
| 37 | #include <linux/skbuff.h> |
| 38 | #include <linux/if_vlan.h> |
| 39 | #include <linux/vmalloc.h> |
| 40 | |
| 41 | #include "mlx4_en.h" |
| 42 | |
| 43 | enum { |
| 44 | MAX_INLINE = 104, /* 128 - 16 - 4 - 4 */ |
| 45 | }; |
| 46 | |
| 47 | static int inline_thold __read_mostly = MAX_INLINE; |
| 48 | |
| 49 | module_param_named(inline_thold, inline_thold, int, 0444); |
| 50 | MODULE_PARM_DESC(inline_thold, "treshold for using inline data"); |
| 51 | |
| 52 | int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, |
| 53 | struct mlx4_en_tx_ring *ring, u32 size, |
| 54 | u16 stride) |
| 55 | { |
| 56 | struct mlx4_en_dev *mdev = priv->mdev; |
| 57 | int tmp; |
| 58 | int err; |
| 59 | |
| 60 | ring->size = size; |
| 61 | ring->size_mask = size - 1; |
| 62 | ring->stride = stride; |
| 63 | |
| 64 | inline_thold = min(inline_thold, MAX_INLINE); |
| 65 | |
| 66 | spin_lock_init(&ring->comp_lock); |
| 67 | |
| 68 | tmp = size * sizeof(struct mlx4_en_tx_info); |
| 69 | ring->tx_info = vmalloc(tmp); |
| 70 | if (!ring->tx_info) { |
| 71 | mlx4_err(mdev, "Failed allocating tx_info ring\n"); |
| 72 | return -ENOMEM; |
| 73 | } |
| 74 | mlx4_dbg(DRV, priv, "Allocated tx_info ring at addr:%p size:%d\n", |
| 75 | ring->tx_info, tmp); |
| 76 | |
| 77 | ring->bounce_buf = kmalloc(MAX_DESC_SIZE, GFP_KERNEL); |
| 78 | if (!ring->bounce_buf) { |
| 79 | mlx4_err(mdev, "Failed allocating bounce buffer\n"); |
| 80 | err = -ENOMEM; |
| 81 | goto err_tx; |
| 82 | } |
| 83 | ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); |
| 84 | |
| 85 | err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, |
| 86 | 2 * PAGE_SIZE); |
| 87 | if (err) { |
| 88 | mlx4_err(mdev, "Failed allocating hwq resources\n"); |
| 89 | goto err_bounce; |
| 90 | } |
| 91 | |
| 92 | err = mlx4_en_map_buffer(&ring->wqres.buf); |
| 93 | if (err) { |
| 94 | mlx4_err(mdev, "Failed to map TX buffer\n"); |
| 95 | goto err_hwq_res; |
| 96 | } |
| 97 | |
| 98 | ring->buf = ring->wqres.buf.direct.buf; |
| 99 | |
| 100 | mlx4_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d " |
| 101 | "buf_size:%d dma:%llx\n", ring, ring->buf, ring->size, |
| 102 | ring->buf_size, (unsigned long long) ring->wqres.buf.direct.map); |
| 103 | |
| 104 | err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn); |
| 105 | if (err) { |
| 106 | mlx4_err(mdev, "Failed reserving qp for tx ring.\n"); |
| 107 | goto err_map; |
| 108 | } |
| 109 | |
| 110 | err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp); |
| 111 | if (err) { |
| 112 | mlx4_err(mdev, "Failed allocating qp %d\n", ring->qpn); |
| 113 | goto err_reserve; |
| 114 | } |
Yevgeny Petrilin | 966508f | 2009-04-20 04:30:03 +0000 | [diff] [blame] | 115 | ring->qp.event = mlx4_en_sqp_event; |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 116 | |
| 117 | return 0; |
| 118 | |
| 119 | err_reserve: |
| 120 | mlx4_qp_release_range(mdev->dev, ring->qpn, 1); |
| 121 | err_map: |
| 122 | mlx4_en_unmap_buffer(&ring->wqres.buf); |
| 123 | err_hwq_res: |
| 124 | mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); |
| 125 | err_bounce: |
| 126 | kfree(ring->bounce_buf); |
| 127 | ring->bounce_buf = NULL; |
| 128 | err_tx: |
| 129 | vfree(ring->tx_info); |
| 130 | ring->tx_info = NULL; |
| 131 | return err; |
| 132 | } |
| 133 | |
| 134 | void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, |
| 135 | struct mlx4_en_tx_ring *ring) |
| 136 | { |
| 137 | struct mlx4_en_dev *mdev = priv->mdev; |
| 138 | mlx4_dbg(DRV, priv, "Destroying tx ring, qpn: %d\n", ring->qpn); |
| 139 | |
| 140 | mlx4_qp_remove(mdev->dev, &ring->qp); |
| 141 | mlx4_qp_free(mdev->dev, &ring->qp); |
| 142 | mlx4_qp_release_range(mdev->dev, ring->qpn, 1); |
| 143 | mlx4_en_unmap_buffer(&ring->wqres.buf); |
| 144 | mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size); |
| 145 | kfree(ring->bounce_buf); |
| 146 | ring->bounce_buf = NULL; |
| 147 | vfree(ring->tx_info); |
| 148 | ring->tx_info = NULL; |
| 149 | } |
| 150 | |
| 151 | int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, |
| 152 | struct mlx4_en_tx_ring *ring, |
| 153 | int cq, int srqn) |
| 154 | { |
| 155 | struct mlx4_en_dev *mdev = priv->mdev; |
| 156 | int err; |
| 157 | |
| 158 | ring->cqn = cq; |
| 159 | ring->prod = 0; |
| 160 | ring->cons = 0xffffffff; |
| 161 | ring->last_nr_txbb = 1; |
| 162 | ring->poll_cnt = 0; |
| 163 | ring->blocked = 0; |
| 164 | memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); |
| 165 | memset(ring->buf, 0, ring->buf_size); |
| 166 | |
| 167 | ring->qp_state = MLX4_QP_STATE_RST; |
| 168 | ring->doorbell_qpn = swab32(ring->qp.qpn << 8); |
| 169 | |
| 170 | mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn, |
| 171 | ring->cqn, srqn, &ring->context); |
| 172 | |
| 173 | err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context, |
| 174 | &ring->qp, &ring->qp_state); |
| 175 | |
| 176 | return err; |
| 177 | } |
| 178 | |
| 179 | void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv, |
| 180 | struct mlx4_en_tx_ring *ring) |
| 181 | { |
| 182 | struct mlx4_en_dev *mdev = priv->mdev; |
| 183 | |
| 184 | mlx4_qp_modify(mdev->dev, NULL, ring->qp_state, |
| 185 | MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp); |
| 186 | } |
| 187 | |
| 188 | |
| 189 | static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, |
| 190 | struct mlx4_en_tx_ring *ring, |
| 191 | int index, u8 owner) |
| 192 | { |
| 193 | struct mlx4_en_dev *mdev = priv->mdev; |
| 194 | struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; |
| 195 | struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; |
| 196 | struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset; |
| 197 | struct sk_buff *skb = tx_info->skb; |
| 198 | struct skb_frag_struct *frag; |
| 199 | void *end = ring->buf + ring->buf_size; |
| 200 | int frags = skb_shinfo(skb)->nr_frags; |
| 201 | int i; |
| 202 | __be32 *ptr = (__be32 *)tx_desc; |
| 203 | __be32 stamp = cpu_to_be32(STAMP_VAL | (!!owner << STAMP_SHIFT)); |
| 204 | |
| 205 | /* Optimize the common case when there are no wraparounds */ |
| 206 | if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) { |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 207 | if (!tx_info->inl) { |
| 208 | if (tx_info->linear) { |
| 209 | pci_unmap_single(mdev->pdev, |
| 210 | (dma_addr_t) be64_to_cpu(data->addr), |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 211 | be32_to_cpu(data->byte_count), |
| 212 | PCI_DMA_TODEVICE); |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 213 | ++data; |
| 214 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 215 | |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 216 | for (i = 0; i < frags; i++) { |
| 217 | frag = &skb_shinfo(skb)->frags[i]; |
| 218 | pci_unmap_page(mdev->pdev, |
| 219 | (dma_addr_t) be64_to_cpu(data[i].addr), |
| 220 | frag->size, PCI_DMA_TODEVICE); |
| 221 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 222 | } |
| 223 | /* Stamp the freed descriptor */ |
| 224 | for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { |
| 225 | *ptr = stamp; |
| 226 | ptr += STAMP_DWORDS; |
| 227 | } |
| 228 | |
| 229 | } else { |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 230 | if (!tx_info->inl) { |
| 231 | if ((void *) data >= end) { |
| 232 | data = (struct mlx4_wqe_data_seg *) |
| 233 | (ring->buf + ((void *) data - end)); |
| 234 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 235 | |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 236 | if (tx_info->linear) { |
| 237 | pci_unmap_single(mdev->pdev, |
| 238 | (dma_addr_t) be64_to_cpu(data->addr), |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 239 | be32_to_cpu(data->byte_count), |
| 240 | PCI_DMA_TODEVICE); |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 241 | ++data; |
| 242 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 243 | |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 244 | for (i = 0; i < frags; i++) { |
| 245 | /* Check for wraparound before unmapping */ |
| 246 | if ((void *) data >= end) |
| 247 | data = (struct mlx4_wqe_data_seg *) ring->buf; |
| 248 | frag = &skb_shinfo(skb)->frags[i]; |
| 249 | pci_unmap_page(mdev->pdev, |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 250 | (dma_addr_t) be64_to_cpu(data->addr), |
| 251 | frag->size, PCI_DMA_TODEVICE); |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 252 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 253 | } |
| 254 | /* Stamp the freed descriptor */ |
| 255 | for (i = 0; i < tx_info->nr_txbb * TXBB_SIZE; i += STAMP_STRIDE) { |
| 256 | *ptr = stamp; |
| 257 | ptr += STAMP_DWORDS; |
| 258 | if ((void *) ptr >= end) { |
| 259 | ptr = ring->buf; |
| 260 | stamp ^= cpu_to_be32(0x80000000); |
| 261 | } |
| 262 | } |
| 263 | |
| 264 | } |
| 265 | dev_kfree_skb_any(skb); |
| 266 | return tx_info->nr_txbb; |
| 267 | } |
| 268 | |
| 269 | |
| 270 | int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) |
| 271 | { |
| 272 | struct mlx4_en_priv *priv = netdev_priv(dev); |
| 273 | int cnt = 0; |
| 274 | |
| 275 | /* Skip last polled descriptor */ |
| 276 | ring->cons += ring->last_nr_txbb; |
| 277 | mlx4_dbg(DRV, priv, "Freeing Tx buf - cons:0x%x prod:0x%x\n", |
| 278 | ring->cons, ring->prod); |
| 279 | |
| 280 | if ((u32) (ring->prod - ring->cons) > ring->size) { |
| 281 | if (netif_msg_tx_err(priv)) |
| 282 | mlx4_warn(priv->mdev, "Tx consumer passed producer!\n"); |
| 283 | return 0; |
| 284 | } |
| 285 | |
| 286 | while (ring->cons != ring->prod) { |
| 287 | ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, |
| 288 | ring->cons & ring->size_mask, |
| 289 | !!(ring->cons & ring->size)); |
| 290 | ring->cons += ring->last_nr_txbb; |
| 291 | cnt++; |
| 292 | } |
| 293 | |
| 294 | if (cnt) |
| 295 | mlx4_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt); |
| 296 | |
| 297 | return cnt; |
| 298 | } |
| 299 | |
| 300 | void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num) |
| 301 | { |
| 302 | int block = 8 / ring_num; |
| 303 | int extra = 8 - (block * ring_num); |
| 304 | int num = 0; |
| 305 | u16 ring = 1; |
| 306 | int prio; |
| 307 | |
| 308 | if (ring_num == 1) { |
| 309 | for (prio = 0; prio < 8; prio++) |
| 310 | prio_map[prio] = 0; |
| 311 | return; |
| 312 | } |
| 313 | |
| 314 | for (prio = 0; prio < 8; prio++) { |
| 315 | if (extra && (num == block + 1)) { |
| 316 | ring++; |
| 317 | num = 0; |
| 318 | extra--; |
| 319 | } else if (!extra && (num == block)) { |
| 320 | ring++; |
| 321 | num = 0; |
| 322 | } |
| 323 | prio_map[prio] = ring; |
| 324 | mlx4_dbg(DRV, priv, " prio:%d --> ring:%d\n", prio, ring); |
| 325 | num++; |
| 326 | } |
| 327 | } |
| 328 | |
| 329 | static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq) |
| 330 | { |
| 331 | struct mlx4_en_priv *priv = netdev_priv(dev); |
| 332 | struct mlx4_cq *mcq = &cq->mcq; |
| 333 | struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; |
| 334 | struct mlx4_cqe *cqe = cq->buf; |
| 335 | u16 index; |
| 336 | u16 new_index; |
| 337 | u32 txbbs_skipped = 0; |
| 338 | u32 cq_last_sav; |
| 339 | |
| 340 | /* index always points to the first TXBB of the last polled descriptor */ |
| 341 | index = ring->cons & ring->size_mask; |
| 342 | new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; |
| 343 | if (index == new_index) |
| 344 | return; |
| 345 | |
| 346 | if (!priv->port_up) |
| 347 | return; |
| 348 | |
| 349 | /* |
| 350 | * We use a two-stage loop: |
| 351 | * - the first samples the HW-updated CQE |
| 352 | * - the second frees TXBBs until the last sample |
| 353 | * This lets us amortize CQE cache misses, while still polling the CQ |
| 354 | * until is quiescent. |
| 355 | */ |
| 356 | cq_last_sav = mcq->cons_index; |
| 357 | do { |
| 358 | do { |
| 359 | /* Skip over last polled CQE */ |
| 360 | index = (index + ring->last_nr_txbb) & ring->size_mask; |
| 361 | txbbs_skipped += ring->last_nr_txbb; |
| 362 | |
| 363 | /* Poll next CQE */ |
| 364 | ring->last_nr_txbb = mlx4_en_free_tx_desc( |
| 365 | priv, ring, index, |
| 366 | !!((ring->cons + txbbs_skipped) & |
| 367 | ring->size)); |
| 368 | ++mcq->cons_index; |
| 369 | |
| 370 | } while (index != new_index); |
| 371 | |
| 372 | new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask; |
| 373 | } while (index != new_index); |
| 374 | AVG_PERF_COUNTER(priv->pstats.tx_coal_avg, |
| 375 | (u32) (mcq->cons_index - cq_last_sav)); |
| 376 | |
| 377 | /* |
| 378 | * To prevent CQ overflow we first update CQ consumer and only then |
| 379 | * the ring consumer. |
| 380 | */ |
| 381 | mlx4_cq_set_ci(mcq); |
| 382 | wmb(); |
| 383 | ring->cons += txbbs_skipped; |
| 384 | |
| 385 | /* Wakeup Tx queue if this ring stopped it */ |
| 386 | if (unlikely(ring->blocked)) { |
Yevgeny Petrilin | c03ea21 | 2008-12-25 18:14:04 -0800 | [diff] [blame] | 387 | if ((u32) (ring->prod - ring->cons) <= |
| 388 | ring->size - HEADROOM - MAX_DESC_TXBBS) { |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 389 | |
| 390 | /* TODO: support multiqueue netdevs. Currently, we block |
| 391 | * when *any* ring is full. Note that: |
| 392 | * - 2 Tx rings can unblock at the same time and call |
| 393 | * netif_wake_queue(), which is OK since this |
| 394 | * operation is idempotent. |
| 395 | * - We might wake the queue just after another ring |
| 396 | * stopped it. This is no big deal because the next |
| 397 | * transmission on that ring would stop the queue. |
| 398 | */ |
| 399 | ring->blocked = 0; |
| 400 | netif_wake_queue(dev); |
| 401 | priv->port_stats.wake_queue++; |
| 402 | } |
| 403 | } |
| 404 | } |
| 405 | |
| 406 | void mlx4_en_tx_irq(struct mlx4_cq *mcq) |
| 407 | { |
| 408 | struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq); |
| 409 | struct mlx4_en_priv *priv = netdev_priv(cq->dev); |
| 410 | struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; |
| 411 | |
Yevgeny Petrilin | 48374dd | 2008-12-25 18:13:45 -0800 | [diff] [blame] | 412 | if (!spin_trylock(&ring->comp_lock)) |
| 413 | return; |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 414 | mlx4_en_process_tx_cq(cq->dev, cq); |
Yevgeny Petrilin | 48374dd | 2008-12-25 18:13:45 -0800 | [diff] [blame] | 415 | mod_timer(&cq->timer, jiffies + 1); |
| 416 | spin_unlock(&ring->comp_lock); |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 417 | } |
| 418 | |
| 419 | |
| 420 | void mlx4_en_poll_tx_cq(unsigned long data) |
| 421 | { |
| 422 | struct mlx4_en_cq *cq = (struct mlx4_en_cq *) data; |
| 423 | struct mlx4_en_priv *priv = netdev_priv(cq->dev); |
| 424 | struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring]; |
| 425 | u32 inflight; |
| 426 | |
| 427 | INC_PERF_COUNTER(priv->pstats.tx_poll); |
| 428 | |
Yevgeny Petrilin | 465440d | 2009-05-25 20:57:21 +0000 | [diff] [blame^] | 429 | if (!spin_trylock_irq(&ring->comp_lock)) { |
Yevgeny Petrilin | 48374dd | 2008-12-25 18:13:45 -0800 | [diff] [blame] | 430 | mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); |
| 431 | return; |
| 432 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 433 | mlx4_en_process_tx_cq(cq->dev, cq); |
| 434 | inflight = (u32) (ring->prod - ring->cons - ring->last_nr_txbb); |
| 435 | |
| 436 | /* If there are still packets in flight and the timer has not already |
| 437 | * been scheduled by the Tx routine then schedule it here to guarantee |
| 438 | * completion processing of these packets */ |
| 439 | if (inflight && priv->port_up) |
| 440 | mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); |
| 441 | |
Yevgeny Petrilin | 465440d | 2009-05-25 20:57:21 +0000 | [diff] [blame^] | 442 | spin_unlock_irq(&ring->comp_lock); |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 443 | } |
| 444 | |
| 445 | static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv, |
| 446 | struct mlx4_en_tx_ring *ring, |
| 447 | u32 index, |
| 448 | unsigned int desc_size) |
| 449 | { |
| 450 | u32 copy = (ring->size - index) * TXBB_SIZE; |
| 451 | int i; |
| 452 | |
| 453 | for (i = desc_size - copy - 4; i >= 0; i -= 4) { |
| 454 | if ((i & (TXBB_SIZE - 1)) == 0) |
| 455 | wmb(); |
| 456 | |
| 457 | *((u32 *) (ring->buf + i)) = |
| 458 | *((u32 *) (ring->bounce_buf + copy + i)); |
| 459 | } |
| 460 | |
| 461 | for (i = copy - 4; i >= 4 ; i -= 4) { |
| 462 | if ((i & (TXBB_SIZE - 1)) == 0) |
| 463 | wmb(); |
| 464 | |
| 465 | *((u32 *) (ring->buf + index * TXBB_SIZE + i)) = |
| 466 | *((u32 *) (ring->bounce_buf + i)); |
| 467 | } |
| 468 | |
| 469 | /* Return real descriptor location */ |
| 470 | return ring->buf + index * TXBB_SIZE; |
| 471 | } |
| 472 | |
| 473 | static inline void mlx4_en_xmit_poll(struct mlx4_en_priv *priv, int tx_ind) |
| 474 | { |
| 475 | struct mlx4_en_cq *cq = &priv->tx_cq[tx_ind]; |
| 476 | struct mlx4_en_tx_ring *ring = &priv->tx_ring[tx_ind]; |
| 477 | |
| 478 | /* If we don't have a pending timer, set one up to catch our recent |
| 479 | post in case the interface becomes idle */ |
| 480 | if (!timer_pending(&cq->timer)) |
| 481 | mod_timer(&cq->timer, jiffies + MLX4_EN_TX_POLL_TIMEOUT); |
| 482 | |
| 483 | /* Poll the CQ every mlx4_en_TX_MODER_POLL packets */ |
| 484 | if ((++ring->poll_cnt & (MLX4_EN_TX_POLL_MODER - 1)) == 0) |
Yevgeny Petrilin | 465440d | 2009-05-25 20:57:21 +0000 | [diff] [blame^] | 485 | if (spin_trylock_irq(&ring->comp_lock)) { |
Yevgeny Petrilin | 48374dd | 2008-12-25 18:13:45 -0800 | [diff] [blame] | 486 | mlx4_en_process_tx_cq(priv->dev, cq); |
Yevgeny Petrilin | 465440d | 2009-05-25 20:57:21 +0000 | [diff] [blame^] | 487 | spin_unlock_irq(&ring->comp_lock); |
Yevgeny Petrilin | 48374dd | 2008-12-25 18:13:45 -0800 | [diff] [blame] | 488 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 489 | } |
| 490 | |
| 491 | static void *get_frag_ptr(struct sk_buff *skb) |
| 492 | { |
| 493 | struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; |
| 494 | struct page *page = frag->page; |
| 495 | void *ptr; |
| 496 | |
| 497 | ptr = page_address(page); |
| 498 | if (unlikely(!ptr)) |
| 499 | return NULL; |
| 500 | |
| 501 | return ptr + frag->page_offset; |
| 502 | } |
| 503 | |
| 504 | static int is_inline(struct sk_buff *skb, void **pfrag) |
| 505 | { |
| 506 | void *ptr; |
| 507 | |
| 508 | if (inline_thold && !skb_is_gso(skb) && skb->len <= inline_thold) { |
| 509 | if (skb_shinfo(skb)->nr_frags == 1) { |
| 510 | ptr = get_frag_ptr(skb); |
| 511 | if (unlikely(!ptr)) |
| 512 | return 0; |
| 513 | |
| 514 | if (pfrag) |
| 515 | *pfrag = ptr; |
| 516 | |
| 517 | return 1; |
| 518 | } else if (unlikely(skb_shinfo(skb)->nr_frags)) |
| 519 | return 0; |
| 520 | else |
| 521 | return 1; |
| 522 | } |
| 523 | |
| 524 | return 0; |
| 525 | } |
| 526 | |
| 527 | static int inline_size(struct sk_buff *skb) |
| 528 | { |
| 529 | if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg) |
| 530 | <= MLX4_INLINE_ALIGN) |
| 531 | return ALIGN(skb->len + CTRL_SIZE + |
| 532 | sizeof(struct mlx4_wqe_inline_seg), 16); |
| 533 | else |
| 534 | return ALIGN(skb->len + CTRL_SIZE + 2 * |
| 535 | sizeof(struct mlx4_wqe_inline_seg), 16); |
| 536 | } |
| 537 | |
| 538 | static int get_real_size(struct sk_buff *skb, struct net_device *dev, |
| 539 | int *lso_header_size) |
| 540 | { |
| 541 | struct mlx4_en_priv *priv = netdev_priv(dev); |
| 542 | struct mlx4_en_dev *mdev = priv->mdev; |
| 543 | int real_size; |
| 544 | |
| 545 | if (skb_is_gso(skb)) { |
| 546 | *lso_header_size = skb_transport_offset(skb) + tcp_hdrlen(skb); |
| 547 | real_size = CTRL_SIZE + skb_shinfo(skb)->nr_frags * DS_SIZE + |
| 548 | ALIGN(*lso_header_size + 4, DS_SIZE); |
| 549 | if (unlikely(*lso_header_size != skb_headlen(skb))) { |
| 550 | /* We add a segment for the skb linear buffer only if |
| 551 | * it contains data */ |
| 552 | if (*lso_header_size < skb_headlen(skb)) |
| 553 | real_size += DS_SIZE; |
| 554 | else { |
| 555 | if (netif_msg_tx_err(priv)) |
| 556 | mlx4_warn(mdev, "Non-linear headers\n"); |
| 557 | dev_kfree_skb_any(skb); |
| 558 | return 0; |
| 559 | } |
| 560 | } |
| 561 | if (unlikely(*lso_header_size > MAX_LSO_HDR_SIZE)) { |
| 562 | if (netif_msg_tx_err(priv)) |
| 563 | mlx4_warn(mdev, "LSO header size too big\n"); |
| 564 | dev_kfree_skb_any(skb); |
| 565 | return 0; |
| 566 | } |
| 567 | } else { |
| 568 | *lso_header_size = 0; |
| 569 | if (!is_inline(skb, NULL)) |
| 570 | real_size = CTRL_SIZE + (skb_shinfo(skb)->nr_frags + 1) * DS_SIZE; |
| 571 | else |
| 572 | real_size = inline_size(skb); |
| 573 | } |
| 574 | |
| 575 | return real_size; |
| 576 | } |
| 577 | |
| 578 | static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *skb, |
| 579 | int real_size, u16 *vlan_tag, int tx_ind, void *fragptr) |
| 580 | { |
| 581 | struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; |
| 582 | int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; |
| 583 | |
| 584 | if (skb->len <= spc) { |
| 585 | inl->byte_count = cpu_to_be32(1 << 31 | skb->len); |
| 586 | skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); |
| 587 | if (skb_shinfo(skb)->nr_frags) |
| 588 | memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr, |
| 589 | skb_shinfo(skb)->frags[0].size); |
| 590 | |
| 591 | } else { |
| 592 | inl->byte_count = cpu_to_be32(1 << 31 | spc); |
| 593 | if (skb_headlen(skb) <= spc) { |
| 594 | skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); |
| 595 | if (skb_headlen(skb) < spc) { |
| 596 | memcpy(((void *)(inl + 1)) + skb_headlen(skb), |
| 597 | fragptr, spc - skb_headlen(skb)); |
| 598 | fragptr += spc - skb_headlen(skb); |
| 599 | } |
| 600 | inl = (void *) (inl + 1) + spc; |
| 601 | memcpy(((void *)(inl + 1)), fragptr, skb->len - spc); |
| 602 | } else { |
| 603 | skb_copy_from_linear_data(skb, inl + 1, spc); |
| 604 | inl = (void *) (inl + 1) + spc; |
| 605 | skb_copy_from_linear_data_offset(skb, spc, inl + 1, |
| 606 | skb_headlen(skb) - spc); |
| 607 | if (skb_shinfo(skb)->nr_frags) |
| 608 | memcpy(((void *)(inl + 1)) + skb_headlen(skb) - spc, |
| 609 | fragptr, skb_shinfo(skb)->frags[0].size); |
| 610 | } |
| 611 | |
| 612 | wmb(); |
| 613 | inl->byte_count = cpu_to_be32(1 << 31 | (skb->len - spc)); |
| 614 | } |
| 615 | tx_desc->ctrl.vlan_tag = cpu_to_be16(*vlan_tag); |
| 616 | tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!(*vlan_tag); |
| 617 | tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; |
| 618 | } |
| 619 | |
| 620 | static int get_vlan_info(struct mlx4_en_priv *priv, struct sk_buff *skb, |
| 621 | u16 *vlan_tag) |
| 622 | { |
| 623 | int tx_ind; |
| 624 | |
| 625 | /* Obtain VLAN information if present */ |
| 626 | if (priv->vlgrp && vlan_tx_tag_present(skb)) { |
| 627 | *vlan_tag = vlan_tx_tag_get(skb); |
| 628 | /* Set the Tx ring to use according to vlan priority */ |
| 629 | tx_ind = priv->tx_prio_map[*vlan_tag >> 13]; |
| 630 | } else { |
| 631 | *vlan_tag = 0; |
| 632 | tx_ind = 0; |
| 633 | } |
| 634 | return tx_ind; |
| 635 | } |
| 636 | |
| 637 | int mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) |
| 638 | { |
| 639 | struct mlx4_en_priv *priv = netdev_priv(dev); |
| 640 | struct mlx4_en_dev *mdev = priv->mdev; |
| 641 | struct mlx4_en_tx_ring *ring; |
| 642 | struct mlx4_en_cq *cq; |
| 643 | struct mlx4_en_tx_desc *tx_desc; |
| 644 | struct mlx4_wqe_data_seg *data; |
| 645 | struct skb_frag_struct *frag; |
| 646 | struct mlx4_en_tx_info *tx_info; |
| 647 | int tx_ind = 0; |
| 648 | int nr_txbb; |
| 649 | int desc_size; |
| 650 | int real_size; |
| 651 | dma_addr_t dma; |
| 652 | u32 index; |
| 653 | __be32 op_own; |
| 654 | u16 vlan_tag; |
| 655 | int i; |
| 656 | int lso_header_size; |
| 657 | void *fragptr; |
| 658 | |
| 659 | if (unlikely(!skb->len)) { |
| 660 | dev_kfree_skb_any(skb); |
| 661 | return NETDEV_TX_OK; |
| 662 | } |
| 663 | real_size = get_real_size(skb, dev, &lso_header_size); |
| 664 | if (unlikely(!real_size)) |
| 665 | return NETDEV_TX_OK; |
| 666 | |
| 667 | /* Allign descriptor to TXBB size */ |
| 668 | desc_size = ALIGN(real_size, TXBB_SIZE); |
| 669 | nr_txbb = desc_size / TXBB_SIZE; |
| 670 | if (unlikely(nr_txbb > MAX_DESC_TXBBS)) { |
| 671 | if (netif_msg_tx_err(priv)) |
| 672 | mlx4_warn(mdev, "Oversized header or SG list\n"); |
| 673 | dev_kfree_skb_any(skb); |
| 674 | return NETDEV_TX_OK; |
| 675 | } |
| 676 | |
| 677 | tx_ind = get_vlan_info(priv, skb, &vlan_tag); |
| 678 | ring = &priv->tx_ring[tx_ind]; |
| 679 | |
| 680 | /* Check available TXBBs And 2K spare for prefetch */ |
| 681 | if (unlikely(((int)(ring->prod - ring->cons)) > |
| 682 | ring->size - HEADROOM - MAX_DESC_TXBBS)) { |
| 683 | /* every full Tx ring stops queue. |
| 684 | * TODO: implement multi-queue support (per-queue stop) */ |
| 685 | netif_stop_queue(dev); |
| 686 | ring->blocked = 1; |
| 687 | priv->port_stats.queue_stopped++; |
| 688 | |
| 689 | /* Use interrupts to find out when queue opened */ |
| 690 | cq = &priv->tx_cq[tx_ind]; |
| 691 | mlx4_en_arm_cq(priv, cq); |
| 692 | return NETDEV_TX_BUSY; |
| 693 | } |
| 694 | |
| 695 | /* Now that we know what Tx ring to use */ |
| 696 | if (unlikely(!priv->port_up)) { |
| 697 | if (netif_msg_tx_err(priv)) |
| 698 | mlx4_warn(mdev, "xmit: port down!\n"); |
| 699 | dev_kfree_skb_any(skb); |
| 700 | return NETDEV_TX_OK; |
| 701 | } |
| 702 | |
| 703 | /* Track current inflight packets for performance analysis */ |
| 704 | AVG_PERF_COUNTER(priv->pstats.inflight_avg, |
| 705 | (u32) (ring->prod - ring->cons - 1)); |
| 706 | |
| 707 | /* Packet is good - grab an index and transmit it */ |
| 708 | index = ring->prod & ring->size_mask; |
| 709 | |
| 710 | /* See if we have enough space for whole descriptor TXBB for setting |
| 711 | * SW ownership on next descriptor; if not, use a bounce buffer. */ |
| 712 | if (likely(index + nr_txbb <= ring->size)) |
| 713 | tx_desc = ring->buf + index * TXBB_SIZE; |
| 714 | else |
| 715 | tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; |
| 716 | |
| 717 | /* Save skb in tx_info ring */ |
| 718 | tx_info = &ring->tx_info[index]; |
| 719 | tx_info->skb = skb; |
| 720 | tx_info->nr_txbb = nr_txbb; |
| 721 | |
| 722 | /* Prepare ctrl segement apart opcode+ownership, which depends on |
| 723 | * whether LSO is used */ |
| 724 | tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); |
| 725 | tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag; |
| 726 | tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f; |
| 727 | tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE | |
| 728 | MLX4_WQE_CTRL_SOLICITED); |
| 729 | if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { |
| 730 | tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM | |
| 731 | MLX4_WQE_CTRL_TCP_UDP_CSUM); |
| 732 | priv->port_stats.tx_chksum_offload++; |
| 733 | } |
| 734 | |
| 735 | /* Handle LSO (TSO) packets */ |
| 736 | if (lso_header_size) { |
| 737 | /* Mark opcode as LSO */ |
| 738 | op_own = cpu_to_be32(MLX4_OPCODE_LSO | (1 << 6)) | |
| 739 | ((ring->prod & ring->size) ? |
| 740 | cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); |
| 741 | |
| 742 | /* Fill in the LSO prefix */ |
| 743 | tx_desc->lso.mss_hdr_size = cpu_to_be32( |
| 744 | skb_shinfo(skb)->gso_size << 16 | lso_header_size); |
| 745 | |
| 746 | /* Copy headers; |
| 747 | * note that we already verified that it is linear */ |
| 748 | memcpy(tx_desc->lso.header, skb->data, lso_header_size); |
| 749 | data = ((void *) &tx_desc->lso + |
| 750 | ALIGN(lso_header_size + 4, DS_SIZE)); |
| 751 | |
| 752 | priv->port_stats.tso_packets++; |
| 753 | i = ((skb->len - lso_header_size) / skb_shinfo(skb)->gso_size) + |
| 754 | !!((skb->len - lso_header_size) % skb_shinfo(skb)->gso_size); |
| 755 | ring->bytes += skb->len + (i - 1) * lso_header_size; |
| 756 | ring->packets += i; |
| 757 | } else { |
| 758 | /* Normal (Non LSO) packet */ |
| 759 | op_own = cpu_to_be32(MLX4_OPCODE_SEND) | |
| 760 | ((ring->prod & ring->size) ? |
| 761 | cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); |
| 762 | data = &tx_desc->data; |
| 763 | ring->bytes += max(skb->len, (unsigned int) ETH_ZLEN); |
| 764 | ring->packets++; |
| 765 | |
| 766 | } |
| 767 | AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); |
| 768 | |
| 769 | |
| 770 | /* valid only for none inline segments */ |
| 771 | tx_info->data_offset = (void *) data - (void *) tx_desc; |
| 772 | |
| 773 | tx_info->linear = (lso_header_size < skb_headlen(skb) && !is_inline(skb, NULL)) ? 1 : 0; |
| 774 | data += skb_shinfo(skb)->nr_frags + tx_info->linear - 1; |
| 775 | |
| 776 | if (!is_inline(skb, &fragptr)) { |
| 777 | /* Map fragments */ |
| 778 | for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { |
| 779 | frag = &skb_shinfo(skb)->frags[i]; |
| 780 | dma = pci_map_page(mdev->dev->pdev, frag->page, frag->page_offset, |
| 781 | frag->size, PCI_DMA_TODEVICE); |
| 782 | data->addr = cpu_to_be64(dma); |
| 783 | data->lkey = cpu_to_be32(mdev->mr.key); |
| 784 | wmb(); |
| 785 | data->byte_count = cpu_to_be32(frag->size); |
| 786 | --data; |
| 787 | } |
| 788 | |
| 789 | /* Map linear part */ |
| 790 | if (tx_info->linear) { |
| 791 | dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size, |
| 792 | skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE); |
| 793 | data->addr = cpu_to_be64(dma); |
| 794 | data->lkey = cpu_to_be32(mdev->mr.key); |
| 795 | wmb(); |
| 796 | data->byte_count = cpu_to_be32(skb_headlen(skb) - lso_header_size); |
| 797 | } |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 798 | tx_info->inl = 0; |
| 799 | } else { |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 800 | build_inline_wqe(tx_desc, skb, real_size, &vlan_tag, tx_ind, fragptr); |
Yevgeny Petrilin | 41efea5 | 2009-01-08 10:57:15 -0800 | [diff] [blame] | 801 | tx_info->inl = 1; |
| 802 | } |
Yevgeny Petrilin | c27a02c | 2008-10-22 15:47:49 -0700 | [diff] [blame] | 803 | |
| 804 | ring->prod += nr_txbb; |
| 805 | |
| 806 | /* If we used a bounce buffer then copy descriptor back into place */ |
| 807 | if (tx_desc == (struct mlx4_en_tx_desc *) ring->bounce_buf) |
| 808 | tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size); |
| 809 | |
| 810 | /* Run destructor before passing skb to HW */ |
| 811 | if (likely(!skb_shared(skb))) |
| 812 | skb_orphan(skb); |
| 813 | |
| 814 | /* Ensure new descirptor hits memory |
| 815 | * before setting ownership of this descriptor to HW */ |
| 816 | wmb(); |
| 817 | tx_desc->ctrl.owner_opcode = op_own; |
| 818 | |
| 819 | /* Ring doorbell! */ |
| 820 | wmb(); |
| 821 | writel(ring->doorbell_qpn, mdev->uar_map + MLX4_SEND_DOORBELL); |
| 822 | dev->trans_start = jiffies; |
| 823 | |
| 824 | /* Poll CQ here */ |
| 825 | mlx4_en_xmit_poll(priv, tx_ind); |
| 826 | |
| 827 | return 0; |
| 828 | } |
| 829 | |