blob: 3223a36ad9a81649a0552f538623012fd2c4b4bd [file] [log] [blame]
Rusty Russell48925e32009-09-24 09:59:20 -06001/* A network driver using virtio.
Rusty Russell296f96f2007-10-22 11:03:37 +10002 *
3 * Copyright 2007 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
Jeff Kirsheradf8d3f2013-12-06 06:28:47 -080016 * along with this program; if not, see <http://www.gnu.org/licenses/>.
Rusty Russell296f96f2007-10-22 11:03:37 +100017 */
18//#define DEBUG
19#include <linux/netdevice.h>
20#include <linux/etherdevice.h>
Herbert Xua9ea3fc2008-04-18 11:21:42 +080021#include <linux/ethtool.h>
Rusty Russell296f96f2007-10-22 11:03:37 +100022#include <linux/module.h>
23#include <linux/virtio.h>
24#include <linux/virtio_net.h>
John Fastabendf600b692016-12-15 12:13:24 -080025#include <linux/bpf.h>
Daniel Borkmanna67edbf2017-01-25 02:28:18 +010026#include <linux/bpf_trace.h>
Rusty Russell296f96f2007-10-22 11:03:37 +100027#include <linux/scatterlist.h>
Alex Williamsone918085a2009-01-25 18:06:26 -080028#include <linux/if_vlan.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090029#include <linux/slab.h>
Wanlong Gao8de4b2f2013-01-24 23:51:31 +000030#include <linux/cpu.h>
Michael Daltonab7db912014-01-16 22:23:27 -080031#include <linux/average.h>
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +020032#include <net/route.h>
Rusty Russell296f96f2007-10-22 11:03:37 +100033
Amerigo Wangd34710e2013-05-09 19:50:51 +000034static int napi_weight = NAPI_POLL_WEIGHT;
Dor Laor6c0cd7c2007-12-16 15:19:43 +020035module_param(napi_weight, int, 0444);
36
Willem de Bruijnb92f1e62017-04-24 13:49:27 -040037static bool csum = true, gso = true, napi_tx;
Rusty Russell34a48572008-02-04 23:50:02 -050038module_param(csum, bool, 0444);
39module_param(gso, bool, 0444);
Willem de Bruijnb92f1e62017-04-24 13:49:27 -040040module_param(napi_tx, bool, 0644);
Rusty Russell34a48572008-02-04 23:50:02 -050041
Rusty Russell296f96f2007-10-22 11:03:37 +100042/* FIXME: MTU in config. */
Michael Dalton5061de32013-11-14 10:41:04 -080043#define GOOD_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -080044#define GOOD_COPY_LEN 128
Rusty Russell296f96f2007-10-22 11:03:37 +100045
Jason Wangf6b10202017-02-21 16:46:28 +080046#define VIRTNET_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
47
John Fastabend2de2f7f2017-02-02 19:16:29 -080048/* Amount of XDP headroom to prepend to packets for use by xdp_adjust_head */
49#define VIRTIO_XDP_HEADROOM 256
50
Johannes Berg5377d7582015-08-19 09:48:40 +020051/* RX packet size EWMA. The average packet size is used to determine the packet
52 * buffer size when refilling RX rings. As the entire RX ring may be refilled
53 * at once, the weight is chosen so that the EWMA will be insensitive to short-
54 * term, transient changes in packet size.
Michael Daltonab7db912014-01-16 22:23:27 -080055 */
Johannes Bergeb1e0112017-02-15 09:49:26 +010056DECLARE_EWMA(pkt_len, 0, 64)
Michael Daltonab7db912014-01-16 22:23:27 -080057
Rick Jones66846042011-11-14 14:17:08 +000058#define VIRTNET_DRIVER_VERSION "1.0.0"
Alex Williamson2a41f712009-02-04 09:02:34 +000059
stephen hemminger3fa2a1d2011-06-15 06:36:29 +000060struct virtnet_stats {
Eric Dumazet83a27052012-06-05 22:35:24 +000061 struct u64_stats_sync tx_syncp;
62 struct u64_stats_sync rx_syncp;
stephen hemminger3fa2a1d2011-06-15 06:36:29 +000063 u64 tx_bytes;
64 u64 tx_packets;
65
66 u64 rx_bytes;
67 u64 rx_packets;
68};
69
Jason Wange9d74172012-12-07 07:04:55 +000070/* Internal representation of a send virtqueue */
71struct send_queue {
72 /* Virtqueue associated with this send _queue */
73 struct virtqueue *vq;
74
75 /* TX: fragments + linear part + virtio header */
76 struct scatterlist sg[MAX_SKB_FRAGS + 2];
Jason Wang986a4f42012-12-07 07:04:56 +000077
78 /* Name of the send queue: output.$index */
79 char name[40];
Willem de Bruijnb92f1e62017-04-24 13:49:27 -040080
81 struct napi_struct napi;
Jason Wange9d74172012-12-07 07:04:55 +000082};
83
84/* Internal representation of a receive virtqueue */
85struct receive_queue {
86 /* Virtqueue associated with this receive_queue */
87 struct virtqueue *vq;
88
Rusty Russell296f96f2007-10-22 11:03:37 +100089 struct napi_struct napi;
90
John Fastabendf600b692016-12-15 12:13:24 -080091 struct bpf_prog __rcu *xdp_prog;
92
Jason Wange9d74172012-12-07 07:04:55 +000093 /* Chain pages by the private ptr. */
94 struct page *pages;
95
Michael Daltonab7db912014-01-16 22:23:27 -080096 /* Average packet length for mergeable receive buffers. */
Johannes Berg5377d7582015-08-19 09:48:40 +020097 struct ewma_pkt_len mrg_avg_pkt_len;
Michael Daltonab7db912014-01-16 22:23:27 -080098
Michael Daltonfb518792014-01-16 22:23:26 -080099 /* Page frag for packet buffer allocation. */
100 struct page_frag alloc_frag;
101
Jason Wange9d74172012-12-07 07:04:55 +0000102 /* RX: fragments + linear part + virtio header */
103 struct scatterlist sg[MAX_SKB_FRAGS + 2];
Jason Wang986a4f42012-12-07 07:04:56 +0000104
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +0200105 /* Min single buffer size for mergeable buffers case. */
106 unsigned int min_buf_len;
107
Jason Wang986a4f42012-12-07 07:04:56 +0000108 /* Name of this receive queue: input.$index */
109 char name[40];
Jason Wange9d74172012-12-07 07:04:55 +0000110};
111
112struct virtnet_info {
113 struct virtio_device *vdev;
114 struct virtqueue *cvq;
115 struct net_device *dev;
Jason Wang986a4f42012-12-07 07:04:56 +0000116 struct send_queue *sq;
117 struct receive_queue *rq;
Jason Wange9d74172012-12-07 07:04:55 +0000118 unsigned int status;
119
Jason Wang986a4f42012-12-07 07:04:56 +0000120 /* Max # of queue pairs supported by the device */
121 u16 max_queue_pairs;
122
123 /* # of queue pairs currently used by the driver */
124 u16 curr_queue_pairs;
125
John Fastabend672aafd2016-12-15 12:13:49 -0800126 /* # of XDP queue pairs currently used by the driver */
127 u16 xdp_queue_pairs;
128
Herbert Xu97402b92008-04-18 11:24:27 +0800129 /* I like... big packets and I cannot lie! */
130 bool big_packets;
131
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800132 /* Host will merge rx buffers for big packets (shake it! shake it!) */
133 bool mergeable_rx_bufs;
134
Jason Wang986a4f42012-12-07 07:04:56 +0000135 /* Has control virtqueue */
136 bool has_cvq;
137
Michael S. Tsirkine7428e92013-07-25 10:20:23 +0930138 /* Host can handle any s/g split between our header and packet data */
139 bool any_header_sg;
140
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300141 /* Packet virtio header size */
142 u8 hdr_len;
143
stephen hemminger3fa2a1d2011-06-15 06:36:29 +0000144 /* Active statistics */
145 struct virtnet_stats __percpu *stats;
146
Rusty Russell3161e452009-08-26 12:22:32 -0700147 /* Work struct for refilling if we run low on memory. */
148 struct delayed_work refill;
149
Jason Wang586d17c2012-04-11 20:43:52 +0000150 /* Work struct for config space updates */
151 struct work_struct config_work;
152
Jason Wang986a4f42012-12-07 07:04:56 +0000153 /* Does the affinity hint is set for virtqueues? */
154 bool affinity_hint_set;
Wanlong Gao47be2472013-01-24 23:51:29 +0000155
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +0200156 /* CPU hotplug instances for online & dead */
157 struct hlist_node node;
158 struct hlist_node node_dead;
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +0200159
160 /* Control VQ buffers: protected by the rtnl lock */
161 struct virtio_net_ctrl_hdr ctrl_hdr;
162 virtio_net_ctrl_ack ctrl_status;
Andy Lutomirskia725ee32016-07-18 15:34:49 -0700163 struct virtio_net_ctrl_mq ctrl_mq;
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +0200164 u8 ctrl_promisc;
165 u8 ctrl_allmulti;
Andy Lutomirskia725ee32016-07-18 15:34:49 -0700166 u16 ctrl_vid;
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +0100167
168 /* Ethtool settings */
169 u8 duplex;
170 u32 speed;
Rusty Russell296f96f2007-10-22 11:03:37 +1000171};
172
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000173struct padded_vnet_hdr {
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300174 struct virtio_net_hdr_mrg_rxbuf hdr;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000175 /*
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300176 * hdr is in a separate sg buffer, and data sg buffer shares same page
177 * with this header sg. This padding makes next sg 16 byte aligned
178 * after the header.
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000179 */
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300180 char padding[4];
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000181};
182
Jason Wang986a4f42012-12-07 07:04:56 +0000183/* Converting between virtqueue no. and kernel tx/rx queue no.
184 * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
185 */
186static int vq2txq(struct virtqueue *vq)
187{
Rusty Russell9d0ca6e2013-03-21 14:17:34 +0000188 return (vq->index - 1) / 2;
Jason Wang986a4f42012-12-07 07:04:56 +0000189}
190
191static int txq2vq(int txq)
192{
193 return txq * 2 + 1;
194}
195
196static int vq2rxq(struct virtqueue *vq)
197{
Rusty Russell9d0ca6e2013-03-21 14:17:34 +0000198 return vq->index / 2;
Jason Wang986a4f42012-12-07 07:04:56 +0000199}
200
201static int rxq2vq(int rxq)
202{
203 return rxq * 2;
204}
205
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300206static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
Rusty Russell296f96f2007-10-22 11:03:37 +1000207{
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300208 return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
Rusty Russell296f96f2007-10-22 11:03:37 +1000209}
210
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000211/*
212 * private is used to chain pages for big packets, put the whole
213 * most recent used list in the beginning for reuse
214 */
Jason Wange9d74172012-12-07 07:04:55 +0000215static void give_pages(struct receive_queue *rq, struct page *page)
Rusty Russellfb6813f2008-07-25 12:06:01 -0500216{
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000217 struct page *end;
218
Jason Wange9d74172012-12-07 07:04:55 +0000219 /* Find end of list, sew whole thing into vi->rq.pages. */
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000220 for (end = page; end->private; end = (struct page *)end->private);
Jason Wange9d74172012-12-07 07:04:55 +0000221 end->private = (unsigned long)rq->pages;
222 rq->pages = page;
Rusty Russellfb6813f2008-07-25 12:06:01 -0500223}
224
Jason Wange9d74172012-12-07 07:04:55 +0000225static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
Rusty Russellfb6813f2008-07-25 12:06:01 -0500226{
Jason Wange9d74172012-12-07 07:04:55 +0000227 struct page *p = rq->pages;
Rusty Russellfb6813f2008-07-25 12:06:01 -0500228
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000229 if (p) {
Jason Wange9d74172012-12-07 07:04:55 +0000230 rq->pages = (struct page *)p->private;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000231 /* clear private here, it is used to chain pages */
232 p->private = 0;
233 } else
Rusty Russellfb6813f2008-07-25 12:06:01 -0500234 p = alloc_page(gfp_mask);
235 return p;
236}
237
Willem de Bruijne4e84522017-04-24 13:49:26 -0400238static void virtqueue_napi_schedule(struct napi_struct *napi,
239 struct virtqueue *vq)
240{
241 if (napi_schedule_prep(napi)) {
242 virtqueue_disable_cb(vq);
243 __napi_schedule(napi);
244 }
245}
246
247static void virtqueue_napi_complete(struct napi_struct *napi,
248 struct virtqueue *vq, int processed)
249{
250 int opaque;
251
252 opaque = virtqueue_enable_cb_prepare(vq);
253 if (napi_complete_done(napi, processed) &&
254 unlikely(virtqueue_poll(vq, opaque)))
255 virtqueue_napi_schedule(napi, vq);
256}
257
Jason Wange9d74172012-12-07 07:04:55 +0000258static void skb_xmit_done(struct virtqueue *vq)
Rusty Russell296f96f2007-10-22 11:03:37 +1000259{
Jason Wange9d74172012-12-07 07:04:55 +0000260 struct virtnet_info *vi = vq->vdev->priv;
Willem de Bruijnb92f1e62017-04-24 13:49:27 -0400261 struct napi_struct *napi = &vi->sq[vq2txq(vq)].napi;
Rusty Russell296f96f2007-10-22 11:03:37 +1000262
Rusty Russell2cb9c6b2008-02-04 23:50:07 -0500263 /* Suppress further interrupts. */
Jason Wange9d74172012-12-07 07:04:55 +0000264 virtqueue_disable_cb(vq);
Rusty Russell11a3a152008-05-26 17:48:13 +1000265
Willem de Bruijnb92f1e62017-04-24 13:49:27 -0400266 if (napi->weight)
267 virtqueue_napi_schedule(napi, vq);
268 else
269 /* We were probably waiting for more output buffers. */
270 netif_wake_subqueue(vi->dev, vq2txq(vq));
Rusty Russell296f96f2007-10-22 11:03:37 +1000271}
272
Jason Wang28b39bc2017-07-19 16:54:46 +0800273#define MRG_CTX_HEADER_SHIFT 22
274static void *mergeable_len_to_ctx(unsigned int truesize,
275 unsigned int headroom)
276{
277 return (void *)(unsigned long)((headroom << MRG_CTX_HEADER_SHIFT) | truesize);
278}
279
280static unsigned int mergeable_ctx_to_headroom(void *mrg_ctx)
281{
282 return (unsigned long)mrg_ctx >> MRG_CTX_HEADER_SHIFT;
283}
284
285static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
286{
287 return (unsigned long)mrg_ctx & ((1 << MRG_CTX_HEADER_SHIFT) - 1);
288}
289
Mike Waychison34646452012-01-04 12:52:32 +0000290/* Called from bottom half context */
Michael S. Tsirkin946fa562014-10-24 00:12:10 +0300291static struct sk_buff *page_to_skb(struct virtnet_info *vi,
292 struct receive_queue *rq,
Michael Dalton2613af02013-10-28 15:44:18 -0700293 struct page *page, unsigned int offset,
294 unsigned int len, unsigned int truesize)
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000295{
296 struct sk_buff *skb;
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300297 struct virtio_net_hdr_mrg_rxbuf *hdr;
Michael Dalton2613af02013-10-28 15:44:18 -0700298 unsigned int copy, hdr_len, hdr_padded_len;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000299 char *p;
300
Michael Dalton2613af02013-10-28 15:44:18 -0700301 p = page_address(page) + offset;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000302
303 /* copy small packet so we can reuse these pages for small data */
Paolo Abenic67f5db2016-03-17 15:44:00 +0100304 skb = napi_alloc_skb(&rq->napi, GOOD_COPY_LEN);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000305 if (unlikely(!skb))
306 return NULL;
307
308 hdr = skb_vnet_hdr(skb);
309
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300310 hdr_len = vi->hdr_len;
311 if (vi->mergeable_rx_bufs)
312 hdr_padded_len = sizeof *hdr;
313 else
Michael Dalton2613af02013-10-28 15:44:18 -0700314 hdr_padded_len = sizeof(struct padded_vnet_hdr);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000315
316 memcpy(hdr, p, hdr_len);
317
318 len -= hdr_len;
Michael Dalton2613af02013-10-28 15:44:18 -0700319 offset += hdr_padded_len;
320 p += hdr_padded_len;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000321
322 copy = len;
323 if (copy > skb_tailroom(skb))
324 copy = skb_tailroom(skb);
Johannes Berg59ae1d12017-06-16 14:29:20 +0200325 skb_put_data(skb, p, copy);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000326
327 len -= copy;
328 offset += copy;
329
Michael Dalton2613af02013-10-28 15:44:18 -0700330 if (vi->mergeable_rx_bufs) {
331 if (len)
332 skb_add_rx_frag(skb, 0, page, offset, len, truesize);
333 else
334 put_page(page);
335 return skb;
336 }
337
Sasha Levine878d782011-09-28 04:40:54 +0000338 /*
339 * Verify that we can indeed put this data into a skb.
340 * This is here to handle cases when the device erroneously
341 * tries to receive more than is possible. This is usually
342 * the case of a broken device.
343 */
344 if (unlikely(len > MAX_SKB_FRAGS * PAGE_SIZE)) {
Amerigo Wangbe443892012-11-08 17:47:28 +0000345 net_dbg_ratelimited("%s: too much data\n", skb->dev->name);
Sasha Levine878d782011-09-28 04:40:54 +0000346 dev_kfree_skb(skb);
347 return NULL;
348 }
Michael Dalton2613af02013-10-28 15:44:18 -0700349 BUG_ON(offset >= PAGE_SIZE);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000350 while (len) {
Michael Dalton2613af02013-10-28 15:44:18 -0700351 unsigned int frag_size = min((unsigned)PAGE_SIZE - offset, len);
352 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, offset,
353 frag_size, truesize);
354 len -= frag_size;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000355 page = (struct page *)page->private;
356 offset = 0;
357 }
358
359 if (page)
Jason Wange9d74172012-12-07 07:04:55 +0000360 give_pages(rq, page);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000361
362 return skb;
363}
364
Daniel Borkmanna67edbf2017-01-25 02:28:18 +0100365static bool virtnet_xdp_xmit(struct virtnet_info *vi,
John Fastabend56434a02016-12-15 12:14:13 -0800366 struct receive_queue *rq,
Jason Wangf6b10202017-02-21 16:46:28 +0800367 struct xdp_buff *xdp)
John Fastabend56434a02016-12-15 12:14:13 -0800368{
John Fastabend56434a02016-12-15 12:14:13 -0800369 struct virtio_net_hdr_mrg_rxbuf *hdr;
Jason Wangf6b10202017-02-21 16:46:28 +0800370 unsigned int len;
John Fastabend722d8282017-02-02 19:15:32 -0800371 struct send_queue *sq;
372 unsigned int qp;
John Fastabend56434a02016-12-15 12:14:13 -0800373 void *xdp_sent;
374 int err;
375
John Fastabend722d8282017-02-02 19:15:32 -0800376 qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
377 sq = &vi->sq[qp];
378
John Fastabend56434a02016-12-15 12:14:13 -0800379 /* Free up any pending old buffers before queueing new ones. */
380 while ((xdp_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) {
Jason Wangf6b10202017-02-21 16:46:28 +0800381 struct page *sent_page = virt_to_head_page(xdp_sent);
Jason Wangbb91acc2016-12-23 22:37:32 +0800382
Jason Wangf6b10202017-02-21 16:46:28 +0800383 put_page(sent_page);
John Fastabend56434a02016-12-15 12:14:13 -0800384 }
385
Jason Wangf6b10202017-02-21 16:46:28 +0800386 xdp->data -= vi->hdr_len;
387 /* Zero header and leave csum up to XDP layers */
388 hdr = xdp->data;
389 memset(hdr, 0, vi->hdr_len);
John Fastabend56434a02016-12-15 12:14:13 -0800390
Jason Wangf6b10202017-02-21 16:46:28 +0800391 sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data);
Jason Wangbb91acc2016-12-23 22:37:32 +0800392
Jason Wangf6b10202017-02-21 16:46:28 +0800393 err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC);
John Fastabend56434a02016-12-15 12:14:13 -0800394 if (unlikely(err)) {
Jason Wangf6b10202017-02-21 16:46:28 +0800395 struct page *page = virt_to_head_page(xdp->data);
Jason Wangbb91acc2016-12-23 22:37:32 +0800396
Jason Wangf6b10202017-02-21 16:46:28 +0800397 put_page(page);
Daniel Borkmanna67edbf2017-01-25 02:28:18 +0100398 return false;
John Fastabend56434a02016-12-15 12:14:13 -0800399 }
400
401 virtqueue_kick(sq->vq);
Daniel Borkmanna67edbf2017-01-25 02:28:18 +0100402 return true;
John Fastabend56434a02016-12-15 12:14:13 -0800403}
404
Jason Wangf6b10202017-02-21 16:46:28 +0800405static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
406{
407 return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
408}
409
Jason Wangbb91acc2016-12-23 22:37:32 +0800410static struct sk_buff *receive_small(struct net_device *dev,
411 struct virtnet_info *vi,
412 struct receive_queue *rq,
413 void *buf, unsigned int len)
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200414{
Jason Wangf6b10202017-02-21 16:46:28 +0800415 struct sk_buff *skb;
Jason Wangbb91acc2016-12-23 22:37:32 +0800416 struct bpf_prog *xdp_prog;
Jason Wangf6b10202017-02-21 16:46:28 +0800417 unsigned int xdp_headroom = virtnet_get_headroom(vi);
418 unsigned int header_offset = VIRTNET_RX_PAD + xdp_headroom;
419 unsigned int headroom = vi->hdr_len + header_offset;
420 unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) +
421 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
422 unsigned int delta = 0;
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300423 len -= vi->hdr_len;
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200424
Jason Wangbb91acc2016-12-23 22:37:32 +0800425 rcu_read_lock();
426 xdp_prog = rcu_dereference(rq->xdp_prog);
427 if (xdp_prog) {
Jason Wangf6b10202017-02-21 16:46:28 +0800428 struct virtio_net_hdr_mrg_rxbuf *hdr = buf + header_offset;
John Fastabend0354e4d2017-02-02 19:15:01 -0800429 struct xdp_buff xdp;
Jason Wangf6b10202017-02-21 16:46:28 +0800430 void *orig_data;
Jason Wangbb91acc2016-12-23 22:37:32 +0800431 u32 act;
432
433 if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags))
434 goto err_xdp;
John Fastabend0354e4d2017-02-02 19:15:01 -0800435
Jason Wangf6b10202017-02-21 16:46:28 +0800436 xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
437 xdp.data = xdp.data_hard_start + xdp_headroom;
John Fastabend0354e4d2017-02-02 19:15:01 -0800438 xdp.data_end = xdp.data + len;
Jason Wangf6b10202017-02-21 16:46:28 +0800439 orig_data = xdp.data;
John Fastabend0354e4d2017-02-02 19:15:01 -0800440 act = bpf_prog_run_xdp(xdp_prog, &xdp);
441
Jason Wangbb91acc2016-12-23 22:37:32 +0800442 switch (act) {
443 case XDP_PASS:
John Fastabend2de2f7f2017-02-02 19:16:29 -0800444 /* Recalculate length in case bpf program changed it */
Jason Wangf6b10202017-02-21 16:46:28 +0800445 delta = orig_data - xdp.data;
Jason Wangbb91acc2016-12-23 22:37:32 +0800446 break;
447 case XDP_TX:
Jason Wangf6b10202017-02-21 16:46:28 +0800448 if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
John Fastabend0354e4d2017-02-02 19:15:01 -0800449 trace_xdp_exception(vi->dev, xdp_prog, act);
Jason Wangbb91acc2016-12-23 22:37:32 +0800450 rcu_read_unlock();
451 goto xdp_xmit;
Jason Wangbb91acc2016-12-23 22:37:32 +0800452 default:
John Fastabend0354e4d2017-02-02 19:15:01 -0800453 bpf_warn_invalid_xdp_action(act);
454 case XDP_ABORTED:
455 trace_xdp_exception(vi->dev, xdp_prog, act);
456 case XDP_DROP:
Jason Wangbb91acc2016-12-23 22:37:32 +0800457 goto err_xdp;
458 }
459 }
460 rcu_read_unlock();
461
Jason Wangf6b10202017-02-21 16:46:28 +0800462 skb = build_skb(buf, buflen);
463 if (!skb) {
464 put_page(virt_to_head_page(buf));
465 goto err;
466 }
467 skb_reserve(skb, headroom - delta);
468 skb_put(skb, len + delta);
469 if (!delta) {
470 buf += header_offset;
471 memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
472 } /* keep zeroed vnet hdr since packet was changed by bpf */
473
474err:
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200475 return skb;
Jason Wangbb91acc2016-12-23 22:37:32 +0800476
477err_xdp:
478 rcu_read_unlock();
479 dev->stats.rx_dropped++;
Jason Wangf6b10202017-02-21 16:46:28 +0800480 put_page(virt_to_head_page(buf));
Jason Wangbb91acc2016-12-23 22:37:32 +0800481xdp_xmit:
482 return NULL;
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200483}
484
485static struct sk_buff *receive_big(struct net_device *dev,
Michael S. Tsirkin946fa562014-10-24 00:12:10 +0300486 struct virtnet_info *vi,
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200487 struct receive_queue *rq,
488 void *buf,
489 unsigned int len)
490{
491 struct page *page = buf;
Jason Wangc47a43d2016-12-23 22:37:31 +0800492 struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200493
494 if (unlikely(!skb))
495 goto err;
496
497 return skb;
498
499err:
500 dev->stats.rx_dropped++;
501 give_pages(rq, page);
502 return NULL;
503}
504
John Fastabend72979a62016-12-15 12:14:36 -0800505/* The conditions to enable XDP should preclude the underlying device from
506 * sending packets across multiple buffers (num_buf > 1). However per spec
507 * it does not appear to be illegal to do so but rather just against convention.
508 * So in order to avoid making a system unresponsive the packets are pushed
509 * into a page and the XDP program is run. This will be extremely slow and we
510 * push a warning to the user to fix this as soon as possible. Fixing this may
511 * require resolving the underlying hardware to determine why multiple buffers
512 * are being received or simply loading the XDP program in the ingress stack
513 * after the skb is built because there is no advantage to running it here
514 * anymore.
515 */
516static struct page *xdp_linearize_page(struct receive_queue *rq,
Jason Wang56a86f82016-12-23 22:37:26 +0800517 u16 *num_buf,
John Fastabend72979a62016-12-15 12:14:36 -0800518 struct page *p,
519 int offset,
520 unsigned int *len)
521{
522 struct page *page = alloc_page(GFP_ATOMIC);
John Fastabend2de2f7f2017-02-02 19:16:29 -0800523 unsigned int page_off = VIRTIO_XDP_HEADROOM;
John Fastabend72979a62016-12-15 12:14:36 -0800524
525 if (!page)
526 return NULL;
527
528 memcpy(page_address(page) + page_off, page_address(p) + offset, *len);
529 page_off += *len;
530
Jason Wang56a86f82016-12-23 22:37:26 +0800531 while (--*num_buf) {
John Fastabend72979a62016-12-15 12:14:36 -0800532 unsigned int buflen;
John Fastabend72979a62016-12-15 12:14:36 -0800533 void *buf;
534 int off;
535
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200536 buf = virtqueue_get_buf(rq->vq, &buflen);
537 if (unlikely(!buf))
John Fastabend72979a62016-12-15 12:14:36 -0800538 goto err_buf;
539
John Fastabend72979a62016-12-15 12:14:36 -0800540 p = virt_to_head_page(buf);
541 off = buf - page_address(p);
542
Jason Wang56a86f82016-12-23 22:37:26 +0800543 /* guard against a misconfigured or uncooperative backend that
544 * is sending packet larger than the MTU.
545 */
546 if ((page_off + buflen) > PAGE_SIZE) {
547 put_page(p);
548 goto err_buf;
549 }
550
John Fastabend72979a62016-12-15 12:14:36 -0800551 memcpy(page_address(page) + page_off,
552 page_address(p) + off, buflen);
553 page_off += buflen;
Jason Wang56a86f82016-12-23 22:37:26 +0800554 put_page(p);
John Fastabend72979a62016-12-15 12:14:36 -0800555 }
556
John Fastabend2de2f7f2017-02-02 19:16:29 -0800557 /* Headroom does not contribute to packet length */
558 *len = page_off - VIRTIO_XDP_HEADROOM;
John Fastabend72979a62016-12-15 12:14:36 -0800559 return page;
560err_buf:
561 __free_pages(page, 0);
562 return NULL;
563}
564
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200565static struct sk_buff *receive_mergeable(struct net_device *dev,
Michael S. Tsirkinfdd819b2014-10-07 16:39:48 +0200566 struct virtnet_info *vi,
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200567 struct receive_queue *rq,
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200568 void *buf,
569 void *ctx,
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200570 unsigned int len)
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000571{
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300572 struct virtio_net_hdr_mrg_rxbuf *hdr = buf;
573 u16 num_buf = virtio16_to_cpu(vi->vdev, hdr->num_buffers);
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200574 struct page *page = virt_to_head_page(buf);
575 int offset = buf - page_address(page);
John Fastabendf600b692016-12-15 12:13:24 -0800576 struct sk_buff *head_skb, *curr_skb;
577 struct bpf_prog *xdp_prog;
578 unsigned int truesize;
Michael Daltonab7db912014-01-16 22:23:27 -0800579
John Fastabend56434a02016-12-15 12:14:13 -0800580 head_skb = NULL;
581
John Fastabendf600b692016-12-15 12:13:24 -0800582 rcu_read_lock();
583 xdp_prog = rcu_dereference(rq->xdp_prog);
584 if (xdp_prog) {
John Fastabend72979a62016-12-15 12:14:36 -0800585 struct page *xdp_page;
John Fastabend0354e4d2017-02-02 19:15:01 -0800586 struct xdp_buff xdp;
John Fastabend0354e4d2017-02-02 19:15:01 -0800587 void *data;
John Fastabendf600b692016-12-15 12:13:24 -0800588 u32 act;
589
Jason Wang73b62bd2016-12-23 22:37:24 +0800590 /* This happens when rx buffer size is underestimated */
John Fastabendf600b692016-12-15 12:13:24 -0800591 if (unlikely(num_buf > 1)) {
John Fastabend72979a62016-12-15 12:14:36 -0800592 /* linearize data for XDP */
Jason Wang56a86f82016-12-23 22:37:26 +0800593 xdp_page = xdp_linearize_page(rq, &num_buf,
John Fastabend72979a62016-12-15 12:14:36 -0800594 page, offset, &len);
595 if (!xdp_page)
596 goto err_xdp;
John Fastabend2de2f7f2017-02-02 19:16:29 -0800597 offset = VIRTIO_XDP_HEADROOM;
John Fastabend72979a62016-12-15 12:14:36 -0800598 } else {
599 xdp_page = page;
John Fastabendf600b692016-12-15 12:13:24 -0800600 }
601
602 /* Transient failure which in theory could occur if
603 * in-flight packets from before XDP was enabled reach
604 * the receive path after XDP is loaded. In practice I
605 * was not able to create this condition.
606 */
Jason Wangb00f70b2016-12-23 22:37:28 +0800607 if (unlikely(hdr->hdr.gso_type))
John Fastabendf600b692016-12-15 12:13:24 -0800608 goto err_xdp;
609
John Fastabend2de2f7f2017-02-02 19:16:29 -0800610 /* Allow consuming headroom but reserve enough space to push
611 * the descriptor on if we get an XDP_TX return code.
612 */
John Fastabend0354e4d2017-02-02 19:15:01 -0800613 data = page_address(xdp_page) + offset;
John Fastabend2de2f7f2017-02-02 19:16:29 -0800614 xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
John Fastabend0354e4d2017-02-02 19:15:01 -0800615 xdp.data = data + vi->hdr_len;
616 xdp.data_end = xdp.data + (len - vi->hdr_len);
617 act = bpf_prog_run_xdp(xdp_prog, &xdp);
618
John Fastabend56434a02016-12-15 12:14:13 -0800619 switch (act) {
620 case XDP_PASS:
John Fastabend2de2f7f2017-02-02 19:16:29 -0800621 /* recalculate offset to account for any header
622 * adjustments. Note other cases do not build an
623 * skb and avoid using offset
624 */
625 offset = xdp.data -
626 page_address(xdp_page) - vi->hdr_len;
627
Jason Wang1830f892016-12-23 22:37:27 +0800628 /* We can only create skb based on xdp_page. */
629 if (unlikely(xdp_page != page)) {
630 rcu_read_unlock();
631 put_page(page);
632 head_skb = page_to_skb(vi, rq, xdp_page,
John Fastabend2de2f7f2017-02-02 19:16:29 -0800633 offset, len, PAGE_SIZE);
Jason Wang5c334742016-12-23 22:37:29 +0800634 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
Jason Wang1830f892016-12-23 22:37:27 +0800635 return head_skb;
636 }
John Fastabend56434a02016-12-15 12:14:13 -0800637 break;
638 case XDP_TX:
Jason Wangf6b10202017-02-21 16:46:28 +0800639 if (unlikely(!virtnet_xdp_xmit(vi, rq, &xdp)))
John Fastabend0354e4d2017-02-02 19:15:01 -0800640 trace_xdp_exception(vi->dev, xdp_prog, act);
Jason Wang5c334742016-12-23 22:37:29 +0800641 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
John Fastabend72979a62016-12-15 12:14:36 -0800642 if (unlikely(xdp_page != page))
643 goto err_xdp;
John Fastabend56434a02016-12-15 12:14:13 -0800644 rcu_read_unlock();
645 goto xdp_xmit;
John Fastabend56434a02016-12-15 12:14:13 -0800646 default:
John Fastabend0354e4d2017-02-02 19:15:01 -0800647 bpf_warn_invalid_xdp_action(act);
648 case XDP_ABORTED:
649 trace_xdp_exception(vi->dev, xdp_prog, act);
650 case XDP_DROP:
John Fastabend72979a62016-12-15 12:14:36 -0800651 if (unlikely(xdp_page != page))
652 __free_pages(xdp_page, 0);
Jason Wang5c334742016-12-23 22:37:29 +0800653 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, len);
John Fastabendf600b692016-12-15 12:13:24 -0800654 goto err_xdp;
John Fastabend56434a02016-12-15 12:14:13 -0800655 }
John Fastabendf600b692016-12-15 12:13:24 -0800656 }
657 rcu_read_unlock();
658
Jason Wang28b39bc2017-07-19 16:54:46 +0800659 truesize = mergeable_ctx_to_truesize(ctx);
660 if (unlikely(len > truesize)) {
Dan Carpenter56da5fd2017-04-06 12:04:47 +0300661 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200662 dev->name, len, (unsigned long)ctx);
663 dev->stats.rx_length_errors++;
664 goto err_skb;
665 }
Jason Wang28b39bc2017-07-19 16:54:46 +0800666
John Fastabendf600b692016-12-15 12:13:24 -0800667 head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
668 curr_skb = head_skb;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000669
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200670 if (unlikely(!curr_skb))
671 goto err_skb;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000672 while (--num_buf) {
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200673 int num_skb_frags;
674
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200675 buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx);
Michael Daltonab7db912014-01-16 22:23:27 -0800676 if (unlikely(!ctx)) {
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200677 pr_debug("%s: rx error: %d buffers out of %d missing\n",
Michael S. Tsirkinfdd819b2014-10-07 16:39:48 +0200678 dev->name, num_buf,
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300679 virtio16_to_cpu(vi->vdev,
680 hdr->num_buffers));
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200681 dev->stats.rx_length_errors++;
682 goto err_buf;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000683 }
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200684
685 page = virt_to_head_page(buf);
Jason Wang28b39bc2017-07-19 16:54:46 +0800686
687 truesize = mergeable_ctx_to_truesize(ctx);
688 if (unlikely(len > truesize)) {
Dan Carpenter56da5fd2017-04-06 12:04:47 +0300689 pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200690 dev->name, len, (unsigned long)ctx);
691 dev->stats.rx_length_errors++;
692 goto err_skb;
693 }
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200694
695 num_skb_frags = skb_shinfo(curr_skb)->nr_frags;
Michael Dalton2613af02013-10-28 15:44:18 -0700696 if (unlikely(num_skb_frags == MAX_SKB_FRAGS)) {
697 struct sk_buff *nskb = alloc_skb(0, GFP_ATOMIC);
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200698
699 if (unlikely(!nskb))
700 goto err_skb;
Michael Dalton2613af02013-10-28 15:44:18 -0700701 if (curr_skb == head_skb)
702 skb_shinfo(curr_skb)->frag_list = nskb;
703 else
704 curr_skb->next = nskb;
705 curr_skb = nskb;
706 head_skb->truesize += nskb->truesize;
707 num_skb_frags = 0;
708 }
709 if (curr_skb != head_skb) {
710 head_skb->data_len += len;
711 head_skb->len += len;
Michael Daltonfb518792014-01-16 22:23:26 -0800712 head_skb->truesize += truesize;
Michael Dalton2613af02013-10-28 15:44:18 -0700713 }
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200714 offset = buf - page_address(page);
Jason Wangba275242013-11-01 14:07:48 +0800715 if (skb_can_coalesce(curr_skb, num_skb_frags, page, offset)) {
716 put_page(page);
717 skb_coalesce_rx_frag(curr_skb, num_skb_frags - 1,
Michael Daltonfb518792014-01-16 22:23:26 -0800718 len, truesize);
Jason Wangba275242013-11-01 14:07:48 +0800719 } else {
720 skb_add_rx_frag(curr_skb, num_skb_frags, page,
Michael Daltonfb518792014-01-16 22:23:26 -0800721 offset, len, truesize);
Jason Wangba275242013-11-01 14:07:48 +0800722 }
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200723 }
724
Johannes Berg5377d7582015-08-19 09:48:40 +0200725 ewma_pkt_len_add(&rq->mrg_avg_pkt_len, head_skb->len);
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200726 return head_skb;
727
John Fastabendf600b692016-12-15 12:13:24 -0800728err_xdp:
729 rcu_read_unlock();
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200730err_skb:
731 put_page(page);
732 while (--num_buf) {
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200733 buf = virtqueue_get_buf(rq->vq, &len);
734 if (unlikely(!buf)) {
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200735 pr_debug("%s: rx error: %d buffers missing\n",
736 dev->name, num_buf);
737 dev->stats.rx_length_errors++;
738 break;
739 }
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200740 page = virt_to_head_page(buf);
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200741 put_page(page);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000742 }
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200743err_buf:
744 dev->stats.rx_dropped++;
745 dev_kfree_skb(head_skb);
John Fastabend56434a02016-12-15 12:14:13 -0800746xdp_xmit:
Michael S. Tsirkin8fc3b9e2013-11-28 13:30:55 +0200747 return NULL;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000748}
749
Jason Wang61845d22017-02-17 11:33:09 +0800750static int receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200751 void *buf, unsigned int len, void **ctx)
Rusty Russell296f96f2007-10-22 11:03:37 +1000752{
Jason Wange9d74172012-12-07 07:04:55 +0000753 struct net_device *dev = vi->dev;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000754 struct sk_buff *skb;
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300755 struct virtio_net_hdr_mrg_rxbuf *hdr;
Jason Wang61845d22017-02-17 11:33:09 +0800756 int ret;
Rusty Russell296f96f2007-10-22 11:03:37 +1000757
Michael S. Tsirkinbcff3162014-10-24 00:22:11 +0300758 if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
Rusty Russell296f96f2007-10-22 11:03:37 +1000759 pr_debug("%s: short packet %i\n", dev->name, len);
760 dev->stats.rx_length_errors++;
Michael Daltonab7db912014-01-16 22:23:27 -0800761 if (vi->mergeable_rx_bufs) {
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200762 put_page(virt_to_head_page(buf));
Michael Daltonab7db912014-01-16 22:23:27 -0800763 } else if (vi->big_packets) {
Michael Dalton98bfd232013-12-05 13:14:05 -0800764 give_pages(rq, buf);
Michael Daltonab7db912014-01-16 22:23:27 -0800765 } else {
Jason Wangf6b10202017-02-21 16:46:28 +0800766 put_page(virt_to_head_page(buf));
Michael Daltonab7db912014-01-16 22:23:27 -0800767 }
Jason Wang61845d22017-02-17 11:33:09 +0800768 return 0;
Rusty Russell296f96f2007-10-22 11:03:37 +1000769 }
Rusty Russell296f96f2007-10-22 11:03:37 +1000770
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200771 if (vi->mergeable_rx_bufs)
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200772 skb = receive_mergeable(dev, vi, rq, buf, ctx, len);
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200773 else if (vi->big_packets)
Michael S. Tsirkin946fa562014-10-24 00:12:10 +0300774 skb = receive_big(dev, vi, rq, buf, len);
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200775 else
Jason Wangbb91acc2016-12-23 22:37:32 +0800776 skb = receive_small(dev, vi, rq, buf, len);
Michael S. Tsirkinf1211592013-11-28 13:30:59 +0200777
778 if (unlikely(!skb))
Jason Wang61845d22017-02-17 11:33:09 +0800779 return 0;
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800780
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000781 hdr = skb_vnet_hdr(skb);
stephen hemminger3fa2a1d2011-06-15 06:36:29 +0000782
Jason Wang61845d22017-02-17 11:33:09 +0800783 ret = skb->len;
Rusty Russell296f96f2007-10-22 11:03:37 +1000784
Mike Rapoporte858fae2016-06-08 16:09:21 +0300785 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
Jason Wang10a8d942011-06-10 00:56:17 +0000786 skb->ip_summed = CHECKSUM_UNNECESSARY;
Rusty Russell296f96f2007-10-22 11:03:37 +1000787
Mike Rapoporte858fae2016-06-08 16:09:21 +0300788 if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
789 virtio_is_little_endian(vi->vdev))) {
790 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
791 dev->name, hdr->hdr.gso_type,
792 hdr->hdr.gso_size);
793 goto frame_err;
Rusty Russell296f96f2007-10-22 11:03:37 +1000794 }
795
Mike Rapoportd1dc06d2016-06-14 08:29:38 +0300796 skb->protocol = eth_type_trans(skb, dev);
797 pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
798 ntohs(skb->protocol), skb->len, skb->pkt_type);
799
Eric Dumazet0fbd0502015-07-31 18:25:17 +0200800 napi_gro_receive(&rq->napi, skb);
Jason Wang61845d22017-02-17 11:33:09 +0800801 return ret;
Rusty Russell296f96f2007-10-22 11:03:37 +1000802
803frame_err:
804 dev->stats.rx_frame_errors++;
Rusty Russell296f96f2007-10-22 11:03:37 +1000805 dev_kfree_skb(skb);
Jason Wang61845d22017-02-17 11:33:09 +0800806 return 0;
Rusty Russell296f96f2007-10-22 11:03:37 +1000807}
808
Michael S. Tsirkin946fa562014-10-24 00:12:10 +0300809static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
810 gfp_t gfp)
Rusty Russell296f96f2007-10-22 11:03:37 +1000811{
Jason Wangf6b10202017-02-21 16:46:28 +0800812 struct page_frag *alloc_frag = &rq->alloc_frag;
813 char *buf;
John Fastabend2de2f7f2017-02-02 19:16:29 -0800814 unsigned int xdp_headroom = virtnet_get_headroom(vi);
Jason Wangf6b10202017-02-21 16:46:28 +0800815 int len = vi->hdr_len + VIRTNET_RX_PAD + GOOD_PACKET_LEN + xdp_headroom;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000816 int err;
Rusty Russell296f96f2007-10-22 11:03:37 +1000817
Jason Wangf6b10202017-02-21 16:46:28 +0800818 len = SKB_DATA_ALIGN(len) +
819 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
820 if (unlikely(!skb_page_frag_refill(len, alloc_frag, gfp)))
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000821 return -ENOMEM;
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800822
Jason Wangf6b10202017-02-21 16:46:28 +0800823 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
824 get_page(alloc_frag->page);
825 alloc_frag->offset += len;
826 sg_init_one(rq->sg, buf + VIRTNET_RX_PAD + xdp_headroom,
827 vi->hdr_len + GOOD_PACKET_LEN);
828 err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, buf, gfp);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000829 if (err < 0)
Jason Wangf6b10202017-02-21 16:46:28 +0800830 put_page(virt_to_head_page(buf));
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000831
832 return err;
833}
834
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300835static int add_recvbuf_big(struct virtnet_info *vi, struct receive_queue *rq,
836 gfp_t gfp)
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000837{
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000838 struct page *first, *list = NULL;
839 char *p;
840 int i, err, offset;
841
Rusty Russella5835442014-09-11 10:17:36 +0930842 sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
843
Jason Wange9d74172012-12-07 07:04:55 +0000844 /* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000845 for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
Jason Wange9d74172012-12-07 07:04:55 +0000846 first = get_a_page(rq, gfp);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000847 if (!first) {
848 if (list)
Jason Wange9d74172012-12-07 07:04:55 +0000849 give_pages(rq, list);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000850 return -ENOMEM;
Rusty Russell3161e452009-08-26 12:22:32 -0700851 }
Jason Wange9d74172012-12-07 07:04:55 +0000852 sg_set_buf(&rq->sg[i], page_address(first), PAGE_SIZE);
Rusty Russell296f96f2007-10-22 11:03:37 +1000853
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000854 /* chain new page in list head to match sg */
855 first->private = (unsigned long)list;
856 list = first;
857 }
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800858
Jason Wange9d74172012-12-07 07:04:55 +0000859 first = get_a_page(rq, gfp);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000860 if (!first) {
Jason Wange9d74172012-12-07 07:04:55 +0000861 give_pages(rq, list);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000862 return -ENOMEM;
863 }
864 p = page_address(first);
Herbert Xu97402b92008-04-18 11:24:27 +0800865
Jason Wange9d74172012-12-07 07:04:55 +0000866 /* rq->sg[0], rq->sg[1] share the same page */
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300867 /* a separated rq->sg[0] for header - required in case !any_header_sg */
868 sg_set_buf(&rq->sg[0], p, vi->hdr_len);
Herbert Xu97402b92008-04-18 11:24:27 +0800869
Jason Wange9d74172012-12-07 07:04:55 +0000870 /* rq->sg[1] for data packet, from offset */
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000871 offset = sizeof(struct padded_vnet_hdr);
Jason Wange9d74172012-12-07 07:04:55 +0000872 sg_set_buf(&rq->sg[1], p + offset, PAGE_SIZE - offset);
Herbert Xu97402b92008-04-18 11:24:27 +0800873
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000874 /* chain first in list head */
875 first->private = (unsigned long)list;
Rusty Russell9dc7b9e2013-03-20 15:44:28 +1030876 err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
877 first, gfp);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000878 if (err < 0)
Jason Wange9d74172012-12-07 07:04:55 +0000879 give_pages(rq, first);
Herbert Xu97402b92008-04-18 11:24:27 +0800880
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000881 return err;
882}
Herbert Xu97402b92008-04-18 11:24:27 +0800883
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +0200884static unsigned int get_mergeable_buf_len(struct receive_queue *rq,
885 struct ewma_pkt_len *avg_pkt_len)
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000886{
Michael Daltonab7db912014-01-16 22:23:27 -0800887 const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
Michael Daltonfbf28d72014-01-16 22:23:30 -0800888 unsigned int len;
889
Johannes Berg5377d7582015-08-19 09:48:40 +0200890 len = hdr_len + clamp_t(unsigned int, ewma_pkt_len_read(avg_pkt_len),
Michael S. Tsirkinf0c31922017-06-02 17:54:33 +0300891 rq->min_buf_len, PAGE_SIZE - hdr_len);
Michael S. Tsirkine377fcc2017-03-06 22:21:35 +0200892 return ALIGN(len, L1_CACHE_BYTES);
Michael Daltonfbf28d72014-01-16 22:23:30 -0800893}
894
John Fastabend2de2f7f2017-02-02 19:16:29 -0800895static int add_recvbuf_mergeable(struct virtnet_info *vi,
896 struct receive_queue *rq, gfp_t gfp)
Michael Daltonfbf28d72014-01-16 22:23:30 -0800897{
Michael Daltonfb518792014-01-16 22:23:26 -0800898 struct page_frag *alloc_frag = &rq->alloc_frag;
John Fastabend2de2f7f2017-02-02 19:16:29 -0800899 unsigned int headroom = virtnet_get_headroom(vi);
Michael Daltonfb518792014-01-16 22:23:26 -0800900 char *buf;
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200901 void *ctx;
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000902 int err;
Michael Daltonfb518792014-01-16 22:23:26 -0800903 unsigned int len, hole;
Rusty Russell296f96f2007-10-22 11:03:37 +1000904
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +0200905 len = get_mergeable_buf_len(rq, &rq->mrg_avg_pkt_len);
John Fastabend2de2f7f2017-02-02 19:16:29 -0800906 if (unlikely(!skb_page_frag_refill(len + headroom, alloc_frag, gfp)))
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000907 return -ENOMEM;
Michael Daltonab7db912014-01-16 22:23:27 -0800908
Michael Daltonfb518792014-01-16 22:23:26 -0800909 buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
John Fastabend2de2f7f2017-02-02 19:16:29 -0800910 buf += headroom; /* advance address leaving hole at front of pkt */
Jason Wang28b39bc2017-07-19 16:54:46 +0800911 ctx = mergeable_len_to_ctx(len, headroom);
Michael Daltonfb518792014-01-16 22:23:26 -0800912 get_page(alloc_frag->page);
John Fastabend2de2f7f2017-02-02 19:16:29 -0800913 alloc_frag->offset += len + headroom;
Michael Daltonfb518792014-01-16 22:23:26 -0800914 hole = alloc_frag->size - alloc_frag->offset;
John Fastabend2de2f7f2017-02-02 19:16:29 -0800915 if (hole < len + headroom) {
Michael Daltonab7db912014-01-16 22:23:27 -0800916 /* To avoid internal fragmentation, if there is very likely not
917 * enough space for another buffer, add the remaining space to
918 * the current buffer. This extra space is not included in
919 * the truesize stored in ctx.
920 */
Michael Daltonfb518792014-01-16 22:23:26 -0800921 len += hole;
922 alloc_frag->offset += hole;
923 }
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000924
Michael Daltonfb518792014-01-16 22:23:26 -0800925 sg_init_one(rq->sg, buf, len);
Michael S. Tsirkin680557c2017-03-06 21:29:47 +0200926 err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000927 if (err < 0)
Michael Dalton2613af02013-10-28 15:44:18 -0700928 put_page(virt_to_head_page(buf));
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000929
930 return err;
Rusty Russell296f96f2007-10-22 11:03:37 +1000931}
932
Rusty Russellb2baed62011-12-29 00:42:38 +0000933/*
934 * Returns false if we couldn't fill entirely (OOM).
935 *
936 * Normally run in the receive path, but can also be run from ndo_open
937 * before we're receiving packets, or from refill_work which is
938 * careful to disable receiving (using napi_disable).
939 */
Michael S. Tsirkin946fa562014-10-24 00:12:10 +0300940static bool try_fill_recv(struct virtnet_info *vi, struct receive_queue *rq,
941 gfp_t gfp)
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800942{
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800943 int err;
Michael S. Tsirkin1788f4952010-07-02 16:32:55 +0000944 bool oom;
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800945
Michael Daltonfb518792014-01-16 22:23:26 -0800946 gfp |= __GFP_COLD;
Amit Shah0aea51c2009-08-26 14:58:28 +0530947 do {
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000948 if (vi->mergeable_rx_bufs)
John Fastabend2de2f7f2017-02-02 19:16:29 -0800949 err = add_recvbuf_mergeable(vi, rq, gfp);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000950 else if (vi->big_packets)
Michael S. Tsirkin012873d2014-10-24 16:55:57 +0300951 err = add_recvbuf_big(vi, rq, gfp);
Shirley Ma9ab86bb2010-01-29 03:20:04 +0000952 else
Michael S. Tsirkin946fa562014-10-24 00:12:10 +0300953 err = add_recvbuf_small(vi, rq, gfp);
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800954
Michael S. Tsirkin1788f4952010-07-02 16:32:55 +0000955 oom = err == -ENOMEM;
Rusty Russell9ed4cb02012-10-16 23:56:14 +1030956 if (err)
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800957 break;
Linus Torvaldsb7dfde92012-12-20 08:37:04 -0800958 } while (rq->vq->num_free);
Jason Wang681daee22014-03-26 13:03:00 +0800959 virtqueue_kick(rq->vq);
Rusty Russell3161e452009-08-26 12:22:32 -0700960 return !oom;
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -0800961}
962
Rusty Russell18445c42008-02-04 23:49:57 -0500963static void skb_recv_done(struct virtqueue *rvq)
Rusty Russell296f96f2007-10-22 11:03:37 +1000964{
965 struct virtnet_info *vi = rvq->vdev->priv;
Jason Wang986a4f42012-12-07 07:04:56 +0000966 struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
Jason Wange9d74172012-12-07 07:04:55 +0000967
Willem de Bruijne4e84522017-04-24 13:49:26 -0400968 virtqueue_napi_schedule(&rq->napi, rvq);
Rusty Russell296f96f2007-10-22 11:03:37 +1000969}
970
Willem de Bruijne4e84522017-04-24 13:49:26 -0400971static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
Bruce Rogers3e9d08e2011-02-10 11:03:31 -0800972{
Willem de Bruijne4e84522017-04-24 13:49:26 -0400973 napi_enable(napi);
Bruce Rogers3e9d08e2011-02-10 11:03:31 -0800974
975 /* If all buffers were filled by other side before we napi_enabled, we
Willem de Bruijne4e84522017-04-24 13:49:26 -0400976 * won't get another interrupt, so process any outstanding packets now.
977 * Call local_bh_enable after to trigger softIRQ processing.
978 */
979 local_bh_disable();
980 virtqueue_napi_schedule(napi, vq);
981 local_bh_enable();
Bruce Rogers3e9d08e2011-02-10 11:03:31 -0800982}
983
Willem de Bruijnb92f1e62017-04-24 13:49:27 -0400984static void virtnet_napi_tx_enable(struct virtnet_info *vi,
985 struct virtqueue *vq,
986 struct napi_struct *napi)
987{
988 if (!napi->weight)
989 return;
990
991 /* Tx napi touches cachelines on the cpu handling tx interrupts. Only
992 * enable the feature if this is likely affine with the transmit path.
993 */
994 if (!vi->affinity_hint_set) {
995 napi->weight = 0;
996 return;
997 }
998
999 return virtnet_napi_enable(vq, napi);
1000}
1001
Willem de Bruijn78a57b42017-04-25 15:59:17 -04001002static void virtnet_napi_tx_disable(struct napi_struct *napi)
1003{
1004 if (napi->weight)
1005 napi_disable(napi);
1006}
1007
Rusty Russell3161e452009-08-26 12:22:32 -07001008static void refill_work(struct work_struct *work)
1009{
Jason Wange9d74172012-12-07 07:04:55 +00001010 struct virtnet_info *vi =
1011 container_of(work, struct virtnet_info, refill.work);
Rusty Russell3161e452009-08-26 12:22:32 -07001012 bool still_empty;
Jason Wang986a4f42012-12-07 07:04:56 +00001013 int i;
Rusty Russell3161e452009-08-26 12:22:32 -07001014
Sasha Levin55257d72013-04-29 12:00:08 +09301015 for (i = 0; i < vi->curr_queue_pairs; i++) {
Jason Wang986a4f42012-12-07 07:04:56 +00001016 struct receive_queue *rq = &vi->rq[i];
Rusty Russell3161e452009-08-26 12:22:32 -07001017
Jason Wang986a4f42012-12-07 07:04:56 +00001018 napi_disable(&rq->napi);
Michael S. Tsirkin946fa562014-10-24 00:12:10 +03001019 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
Willem de Bruijne4e84522017-04-24 13:49:26 -04001020 virtnet_napi_enable(rq->vq, &rq->napi);
Jason Wang986a4f42012-12-07 07:04:56 +00001021
1022 /* In theory, this can happen: if we don't get any buffers in
1023 * we will *never* try to fill again.
1024 */
1025 if (still_empty)
1026 schedule_delayed_work(&vi->refill, HZ/2);
1027 }
Rusty Russell3161e452009-08-26 12:22:32 -07001028}
1029
Jason Wang2ffa7592014-07-23 16:33:54 +08001030static int virtnet_receive(struct receive_queue *rq, int budget)
Rusty Russell296f96f2007-10-22 11:03:37 +10001031{
Jason Wange9d74172012-12-07 07:04:55 +00001032 struct virtnet_info *vi = rq->vq->vdev->priv;
Jason Wang61845d22017-02-17 11:33:09 +08001033 unsigned int len, received = 0, bytes = 0;
Shirley Ma9ab86bb2010-01-29 03:20:04 +00001034 void *buf;
Jason Wang61845d22017-02-17 11:33:09 +08001035 struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
Rusty Russell296f96f2007-10-22 11:03:37 +10001036
Michael S. Tsirkin680557c2017-03-06 21:29:47 +02001037 if (vi->mergeable_rx_bufs) {
1038 void *ctx;
1039
1040 while (received < budget &&
1041 (buf = virtqueue_get_buf_ctx(rq->vq, &len, &ctx))) {
1042 bytes += receive_buf(vi, rq, buf, len, ctx);
1043 received++;
1044 }
1045 } else {
1046 while (received < budget &&
1047 (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
1048 bytes += receive_buf(vi, rq, buf, len, NULL);
1049 received++;
1050 }
Rusty Russell296f96f2007-10-22 11:03:37 +10001051 }
1052
Jason Wangbe121f42014-01-16 14:45:24 +08001053 if (rq->vq->num_free > virtqueue_get_vring_size(rq->vq) / 2) {
Michael S. Tsirkin946fa562014-10-24 00:12:10 +03001054 if (!try_fill_recv(vi, rq, GFP_ATOMIC))
Tejun Heo3b07e9c2012-08-20 14:51:24 -07001055 schedule_delayed_work(&vi->refill, 0);
Rusty Russell3161e452009-08-26 12:22:32 -07001056 }
Rusty Russell296f96f2007-10-22 11:03:37 +10001057
Jason Wang61845d22017-02-17 11:33:09 +08001058 u64_stats_update_begin(&stats->rx_syncp);
1059 stats->rx_bytes += bytes;
1060 stats->rx_packets += received;
1061 u64_stats_update_end(&stats->rx_syncp);
1062
Jason Wang2ffa7592014-07-23 16:33:54 +08001063 return received;
1064}
1065
Willem de Bruijnea7735d2017-04-24 13:49:28 -04001066static void free_old_xmit_skbs(struct send_queue *sq)
1067{
1068 struct sk_buff *skb;
1069 unsigned int len;
1070 struct virtnet_info *vi = sq->vq->vdev->priv;
1071 struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
1072 unsigned int packets = 0;
1073 unsigned int bytes = 0;
1074
1075 while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
1076 pr_debug("Sent skb %p\n", skb);
1077
1078 bytes += skb->len;
1079 packets++;
1080
1081 dev_kfree_skb_any(skb);
1082 }
1083
1084 /* Avoid overhead when no packets have been processed
1085 * happens when called speculatively from start_xmit.
1086 */
1087 if (!packets)
1088 return;
1089
1090 u64_stats_update_begin(&stats->tx_syncp);
1091 stats->tx_bytes += bytes;
1092 stats->tx_packets += packets;
1093 u64_stats_update_end(&stats->tx_syncp);
1094}
1095
Willem de Bruijn7b0411e2017-04-24 13:49:29 -04001096static void virtnet_poll_cleantx(struct receive_queue *rq)
1097{
1098 struct virtnet_info *vi = rq->vq->vdev->priv;
1099 unsigned int index = vq2rxq(rq->vq);
1100 struct send_queue *sq = &vi->sq[index];
1101 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
1102
1103 if (!sq->napi.weight)
1104 return;
1105
1106 if (__netif_tx_trylock(txq)) {
1107 free_old_xmit_skbs(sq);
1108 __netif_tx_unlock(txq);
1109 }
1110
1111 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1112 netif_tx_wake_queue(txq);
1113}
1114
Jason Wang2ffa7592014-07-23 16:33:54 +08001115static int virtnet_poll(struct napi_struct *napi, int budget)
1116{
1117 struct receive_queue *rq =
1118 container_of(napi, struct receive_queue, napi);
Willem de Bruijne4e84522017-04-24 13:49:26 -04001119 unsigned int received;
Jason Wang2ffa7592014-07-23 16:33:54 +08001120
Willem de Bruijn7b0411e2017-04-24 13:49:29 -04001121 virtnet_poll_cleantx(rq);
1122
Li RongQingfaadb052015-03-26 15:39:45 +08001123 received = virtnet_receive(rq, budget);
Jason Wang2ffa7592014-07-23 16:33:54 +08001124
Rusty Russell8329d982007-11-19 11:20:43 -05001125 /* Out of packets? */
Willem de Bruijne4e84522017-04-24 13:49:26 -04001126 if (received < budget)
1127 virtqueue_napi_complete(napi, rq->vq, received);
Rusty Russell296f96f2007-10-22 11:03:37 +10001128
1129 return received;
1130}
1131
Jason Wang986a4f42012-12-07 07:04:56 +00001132static int virtnet_open(struct net_device *dev)
1133{
1134 struct virtnet_info *vi = netdev_priv(dev);
1135 int i;
1136
Jason Wange4166622013-05-21 20:03:58 +00001137 for (i = 0; i < vi->max_queue_pairs; i++) {
1138 if (i < vi->curr_queue_pairs)
1139 /* Make sure we have some buffers: if oom use wq. */
Michael S. Tsirkin946fa562014-10-24 00:12:10 +03001140 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
Jason Wange4166622013-05-21 20:03:58 +00001141 schedule_delayed_work(&vi->refill, 0);
Willem de Bruijne4e84522017-04-24 13:49:26 -04001142 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001143 virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
Jason Wang986a4f42012-12-07 07:04:56 +00001144 }
1145
1146 return 0;
1147}
1148
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001149static int virtnet_poll_tx(struct napi_struct *napi, int budget)
1150{
1151 struct send_queue *sq = container_of(napi, struct send_queue, napi);
1152 struct virtnet_info *vi = sq->vq->vdev->priv;
1153 struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
1154
1155 __netif_tx_lock(txq, raw_smp_processor_id());
1156 free_old_xmit_skbs(sq);
1157 __netif_tx_unlock(txq);
1158
1159 virtqueue_napi_complete(napi, sq->vq, 0);
1160
1161 if (sq->vq->num_free >= 2 + MAX_SKB_FRAGS)
1162 netif_tx_wake_queue(txq);
1163
1164 return 0;
1165}
1166
Jason Wange9d74172012-12-07 07:04:55 +00001167static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
Rusty Russell296f96f2007-10-22 11:03:37 +10001168{
Michael S. Tsirkin012873d2014-10-24 16:55:57 +03001169 struct virtio_net_hdr_mrg_rxbuf *hdr;
Rusty Russell296f96f2007-10-22 11:03:37 +10001170 const unsigned char *dest = ((struct ethhdr *)skb->data)->h_dest;
Jason Wange9d74172012-12-07 07:04:55 +00001171 struct virtnet_info *vi = sq->vq->vdev->priv;
Jason A. Donenfelde2fcad52017-06-04 04:16:26 +02001172 int num_sg;
Michael S. Tsirkin012873d2014-10-24 16:55:57 +03001173 unsigned hdr_len = vi->hdr_len;
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09301174 bool can_push;
Rusty Russell296f96f2007-10-22 11:03:37 +10001175
Johannes Berge1749612008-10-27 15:59:26 -07001176 pr_debug("%s: xmit %p %pM\n", vi->dev->name, skb, dest);
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09301177
1178 can_push = vi->any_header_sg &&
1179 !((unsigned long)skb->data & (__alignof__(*hdr) - 1)) &&
1180 !skb_header_cloned(skb) && skb_headroom(skb) >= hdr_len;
1181 /* Even if we can, don't push here yet as this would skew
1182 * csum_start offset below. */
1183 if (can_push)
Michael S. Tsirkin012873d2014-10-24 16:55:57 +03001184 hdr = (struct virtio_net_hdr_mrg_rxbuf *)(skb->data - hdr_len);
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09301185 else
1186 hdr = skb_vnet_hdr(skb);
Rusty Russell296f96f2007-10-22 11:03:37 +10001187
Mike Rapoporte858fae2016-06-08 16:09:21 +03001188 if (virtio_net_hdr_from_skb(skb, &hdr->hdr,
Jason Wang6391a442017-01-20 14:32:42 +08001189 virtio_is_little_endian(vi->vdev), false))
Mike Rapoporte858fae2016-06-08 16:09:21 +03001190 BUG();
Rusty Russell296f96f2007-10-22 11:03:37 +10001191
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -08001192 if (vi->mergeable_rx_bufs)
Michael S. Tsirkin012873d2014-10-24 16:55:57 +03001193 hdr->num_buffers = 0;
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -08001194
Jason Wang547c8902015-08-27 14:53:06 +08001195 sg_init_table(sq->sg, skb_shinfo(skb)->nr_frags + (can_push ? 1 : 2));
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09301196 if (can_push) {
1197 __skb_push(skb, hdr_len);
1198 num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
Jason A. Donenfelde2fcad52017-06-04 04:16:26 +02001199 if (unlikely(num_sg < 0))
1200 return num_sg;
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09301201 /* Pull header back to avoid skew in tx bytes calculations. */
1202 __skb_pull(skb, hdr_len);
1203 } else {
1204 sg_set_buf(sq->sg, hdr, hdr_len);
Jason A. Donenfelde2fcad52017-06-04 04:16:26 +02001205 num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len);
1206 if (unlikely(num_sg < 0))
1207 return num_sg;
1208 num_sg++;
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09301209 }
Rusty Russell9dc7b9e2013-03-20 15:44:28 +10301210 return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
Rusty Russell11a3a152008-05-26 17:48:13 +10001211}
1212
Stephen Hemminger424efe92009-08-31 19:50:51 +00001213static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
Rusty Russell99ffc692008-05-02 21:50:46 -05001214{
1215 struct virtnet_info *vi = netdev_priv(dev);
Jason Wang986a4f42012-12-07 07:04:56 +00001216 int qnum = skb_get_queue_mapping(skb);
1217 struct send_queue *sq = &vi->sq[qnum];
Rusty Russell9ed4cb02012-10-16 23:56:14 +10301218 int err;
Michael S. Tsirkin4b7fd2e62014-10-15 16:23:28 +03001219 struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
1220 bool kick = !skb->xmit_more;
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001221 bool use_napi = sq->napi.weight;
Rusty Russell2cb9c6b2008-02-04 23:50:07 -05001222
Rusty Russell2cb9c6b2008-02-04 23:50:07 -05001223 /* Free up any pending old buffers before queueing new ones. */
Jason Wange9d74172012-12-07 07:04:55 +00001224 free_old_xmit_skbs(sq);
Rusty Russell2cb9c6b2008-02-04 23:50:07 -05001225
Willem de Bruijnbdb12e02017-04-24 13:49:30 -04001226 if (use_napi && kick)
1227 virtqueue_enable_cb_delayed(sq->vq);
1228
Jacob Keller074c3582014-06-25 02:37:13 +00001229 /* timestamp packet in software */
1230 skb_tx_timestamp(skb);
1231
Michael S. Tsirkin03f191b2009-10-28 04:03:38 -07001232 /* Try to transmit */
Linus Torvaldsb7dfde92012-12-20 08:37:04 -08001233 err = xmit_skb(sq, skb);
Rusty Russell48925e32009-09-24 09:59:20 -06001234
Rusty Russell9ed4cb02012-10-16 23:56:14 +10301235 /* This should not happen! */
Jason Wang681daee22014-03-26 13:03:00 +08001236 if (unlikely(err)) {
Rusty Russell9ed4cb02012-10-16 23:56:14 +10301237 dev->stats.tx_fifo_errors++;
1238 if (net_ratelimit())
1239 dev_warn(&dev->dev,
Linus Torvaldsb7dfde92012-12-20 08:37:04 -08001240 "Unexpected TXQ (%d) queue failure: %d\n", qnum, err);
Rusty Russell58eba97d2010-07-02 16:34:01 +00001241 dev->stats.tx_dropped++;
Eric W. Biederman85e94522014-03-15 18:43:33 -07001242 dev_kfree_skb_any(skb);
Rusty Russell58eba97d2010-07-02 16:34:01 +00001243 return NETDEV_TX_OK;
Rusty Russell99ffc692008-05-02 21:50:46 -05001244 }
Michael S. Tsirkin03f191b2009-10-28 04:03:38 -07001245
Rusty Russell48925e32009-09-24 09:59:20 -06001246 /* Don't wait up for transmitted skbs to be freed. */
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001247 if (!use_napi) {
1248 skb_orphan(skb);
1249 nf_reset(skb);
1250 }
Rusty Russell99ffc692008-05-02 21:50:46 -05001251
Michael S. Tsirkin60302ff2015-04-02 13:05:47 +02001252 /* If running out of space, stop queue to avoid getting packets that we
1253 * are then unable to transmit.
1254 * An alternative would be to force queuing layer to requeue the skb by
1255 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
1256 * returned in a normal path of operation: it means that driver is not
1257 * maintaining the TX queue stop/start state properly, and causes
1258 * the stack to do a non-trivial amount of useless work.
1259 * Since most packets only take 1 or 2 ring slots, stopping the queue
1260 * early means 16 slots are typically wasted.
stephen hemmingerd631b942015-03-24 16:22:07 -07001261 */
Linus Torvaldsb7dfde92012-12-20 08:37:04 -08001262 if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
Jason Wang986a4f42012-12-07 07:04:56 +00001263 netif_stop_subqueue(dev, qnum);
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001264 if (!use_napi &&
1265 unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
Rusty Russell48925e32009-09-24 09:59:20 -06001266 /* More just got used, free them then recheck. */
Linus Torvaldsb7dfde92012-12-20 08:37:04 -08001267 free_old_xmit_skbs(sq);
1268 if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
Jason Wang986a4f42012-12-07 07:04:56 +00001269 netif_start_subqueue(dev, qnum);
Jason Wange9d74172012-12-07 07:04:55 +00001270 virtqueue_disable_cb(sq->vq);
Rusty Russell48925e32009-09-24 09:59:20 -06001271 }
1272 }
Rusty Russell99ffc692008-05-02 21:50:46 -05001273 }
Rusty Russell48925e32009-09-24 09:59:20 -06001274
Michael S. Tsirkin4b7fd2e62014-10-15 16:23:28 +03001275 if (kick || netif_xmit_stopped(txq))
David S. Miller0b725a22014-08-25 15:51:53 -07001276 virtqueue_kick(sq->vq);
1277
Rusty Russell48925e32009-09-24 09:59:20 -06001278 return NETDEV_TX_OK;
Rusty Russell296f96f2007-10-22 11:03:37 +10001279}
1280
Amos Kong40cbfc32013-01-21 01:17:21 +00001281/*
1282 * Send command via the control virtqueue and check status. Commands
1283 * supported by the hypervisor, as indicated by feature bits, should
stephen hemminger788a8b62013-12-09 16:18:45 -08001284 * never fail unless improperly formatted.
Amos Kong40cbfc32013-01-21 01:17:21 +00001285 */
1286static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001287 struct scatterlist *out)
Amos Kong40cbfc32013-01-21 01:17:21 +00001288{
Rusty Russellf7bc9592013-03-20 15:44:28 +10301289 struct scatterlist *sgs[4], hdr, stat;
stephen hemmingerd24bae32013-12-09 16:17:40 -08001290 unsigned out_num = 0, tmp;
Amos Kong40cbfc32013-01-21 01:17:21 +00001291
1292 /* Caller should know better */
Rusty Russellf7bc9592013-03-20 15:44:28 +10301293 BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
Amos Kong40cbfc32013-01-21 01:17:21 +00001294
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001295 vi->ctrl_status = ~0;
1296 vi->ctrl_hdr.class = class;
1297 vi->ctrl_hdr.cmd = cmd;
Rusty Russellf7bc9592013-03-20 15:44:28 +10301298 /* Add header */
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001299 sg_init_one(&hdr, &vi->ctrl_hdr, sizeof(vi->ctrl_hdr));
Rusty Russellf7bc9592013-03-20 15:44:28 +10301300 sgs[out_num++] = &hdr;
Amos Kong40cbfc32013-01-21 01:17:21 +00001301
Rusty Russellf7bc9592013-03-20 15:44:28 +10301302 if (out)
1303 sgs[out_num++] = out;
Amos Kong40cbfc32013-01-21 01:17:21 +00001304
Rusty Russellf7bc9592013-03-20 15:44:28 +10301305 /* Add return status. */
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001306 sg_init_one(&stat, &vi->ctrl_status, sizeof(vi->ctrl_status));
stephen hemmingerd24bae32013-12-09 16:17:40 -08001307 sgs[out_num] = &stat;
Amos Kong40cbfc32013-01-21 01:17:21 +00001308
stephen hemmingerd24bae32013-12-09 16:17:40 -08001309 BUG_ON(out_num + 1 > ARRAY_SIZE(sgs));
Rusty Russella7c58142014-03-13 11:23:39 +10301310 virtqueue_add_sgs(vi->cvq, sgs, out_num, 1, vi, GFP_ATOMIC);
Amos Kong40cbfc32013-01-21 01:17:21 +00001311
Heinz Graalfs67975902013-10-29 09:40:02 +10301312 if (unlikely(!virtqueue_kick(vi->cvq)))
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001313 return vi->ctrl_status == VIRTIO_NET_OK;
Amos Kong40cbfc32013-01-21 01:17:21 +00001314
1315 /* Spin for a response, the kick causes an ioport write, trapping
1316 * into the hypervisor, so the request should be handled immediately.
1317 */
Heinz Graalfs047b9b92013-10-29 09:40:47 +10301318 while (!virtqueue_get_buf(vi->cvq, &tmp) &&
1319 !virtqueue_is_broken(vi->cvq))
Amos Kong40cbfc32013-01-21 01:17:21 +00001320 cpu_relax();
1321
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001322 return vi->ctrl_status == VIRTIO_NET_OK;
Amos Kong40cbfc32013-01-21 01:17:21 +00001323}
1324
Alex Williamson9c46f6d2009-02-04 16:36:34 -08001325static int virtnet_set_mac_address(struct net_device *dev, void *p)
1326{
1327 struct virtnet_info *vi = netdev_priv(dev);
1328 struct virtio_device *vdev = vi->vdev;
Jiri Pirkof2f2c8b2012-06-29 05:10:06 +00001329 int ret;
Andy Lutomirskie37e2ff2016-12-05 18:10:58 -08001330 struct sockaddr *addr;
Amos Kong7e58d5a2013-01-21 01:17:23 +00001331 struct scatterlist sg;
Alex Williamson9c46f6d2009-02-04 16:36:34 -08001332
Shyam Saini801822d2016-12-24 00:44:58 +05301333 addr = kmemdup(p, sizeof(*addr), GFP_KERNEL);
Andy Lutomirskie37e2ff2016-12-05 18:10:58 -08001334 if (!addr)
1335 return -ENOMEM;
Andy Lutomirskie37e2ff2016-12-05 18:10:58 -08001336
1337 ret = eth_prepare_mac_addr_change(dev, addr);
Jiri Pirkof2f2c8b2012-06-29 05:10:06 +00001338 if (ret)
Andy Lutomirskie37e2ff2016-12-05 18:10:58 -08001339 goto out;
Alex Williamson9c46f6d2009-02-04 16:36:34 -08001340
Amos Kong7e58d5a2013-01-21 01:17:23 +00001341 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR)) {
1342 sg_init_one(&sg, addr->sa_data, dev->addr_len);
1343 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001344 VIRTIO_NET_CTRL_MAC_ADDR_SET, &sg)) {
Amos Kong7e58d5a2013-01-21 01:17:23 +00001345 dev_warn(&vdev->dev,
1346 "Failed to set mac address by vq command.\n");
Andy Lutomirskie37e2ff2016-12-05 18:10:58 -08001347 ret = -EINVAL;
1348 goto out;
Amos Kong7e58d5a2013-01-21 01:17:23 +00001349 }
Michael S. Tsirkin7e93a022014-11-26 15:58:28 +02001350 } else if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC) &&
1351 !virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) {
Rusty Russell855e0c52013-10-14 18:11:51 +10301352 unsigned int i;
1353
1354 /* Naturally, this has an atomicity problem. */
1355 for (i = 0; i < dev->addr_len; i++)
1356 virtio_cwrite8(vdev,
1357 offsetof(struct virtio_net_config, mac) +
1358 i, addr->sa_data[i]);
Amos Kong7e58d5a2013-01-21 01:17:23 +00001359 }
1360
1361 eth_commit_mac_addr_change(dev, p);
Andy Lutomirskie37e2ff2016-12-05 18:10:58 -08001362 ret = 0;
Alex Williamson9c46f6d2009-02-04 16:36:34 -08001363
Andy Lutomirskie37e2ff2016-12-05 18:10:58 -08001364out:
1365 kfree(addr);
1366 return ret;
Alex Williamson9c46f6d2009-02-04 16:36:34 -08001367}
1368
stephen hemmingerbc1f4472017-01-06 19:12:52 -08001369static void virtnet_stats(struct net_device *dev,
1370 struct rtnl_link_stats64 *tot)
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00001371{
1372 struct virtnet_info *vi = netdev_priv(dev);
1373 int cpu;
1374 unsigned int start;
1375
1376 for_each_possible_cpu(cpu) {
Eric Dumazet58472a72012-02-13 06:53:41 +00001377 struct virtnet_stats *stats = per_cpu_ptr(vi->stats, cpu);
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00001378 u64 tpackets, tbytes, rpackets, rbytes;
1379
1380 do {
Eric W. Biederman57a77442014-03-13 21:26:42 -07001381 start = u64_stats_fetch_begin_irq(&stats->tx_syncp);
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00001382 tpackets = stats->tx_packets;
1383 tbytes = stats->tx_bytes;
Eric W. Biederman57a77442014-03-13 21:26:42 -07001384 } while (u64_stats_fetch_retry_irq(&stats->tx_syncp, start));
Eric Dumazet83a27052012-06-05 22:35:24 +00001385
1386 do {
Eric W. Biederman57a77442014-03-13 21:26:42 -07001387 start = u64_stats_fetch_begin_irq(&stats->rx_syncp);
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00001388 rpackets = stats->rx_packets;
1389 rbytes = stats->rx_bytes;
Eric W. Biederman57a77442014-03-13 21:26:42 -07001390 } while (u64_stats_fetch_retry_irq(&stats->rx_syncp, start));
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00001391
1392 tot->rx_packets += rpackets;
1393 tot->tx_packets += tpackets;
1394 tot->rx_bytes += rbytes;
1395 tot->tx_bytes += tbytes;
1396 }
1397
1398 tot->tx_dropped = dev->stats.tx_dropped;
Rick Jones021ac8d2011-11-21 09:28:17 +00001399 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00001400 tot->rx_dropped = dev->stats.rx_dropped;
1401 tot->rx_length_errors = dev->stats.rx_length_errors;
1402 tot->rx_frame_errors = dev->stats.rx_frame_errors;
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00001403}
1404
Amit Shahda74e892008-02-29 16:24:50 +05301405#ifdef CONFIG_NET_POLL_CONTROLLER
1406static void virtnet_netpoll(struct net_device *dev)
1407{
1408 struct virtnet_info *vi = netdev_priv(dev);
Jason Wang986a4f42012-12-07 07:04:56 +00001409 int i;
Amit Shahda74e892008-02-29 16:24:50 +05301410
Jason Wang986a4f42012-12-07 07:04:56 +00001411 for (i = 0; i < vi->curr_queue_pairs; i++)
1412 napi_schedule(&vi->rq[i].napi);
Amit Shahda74e892008-02-29 16:24:50 +05301413}
1414#endif
1415
Jason Wang586d17c2012-04-11 20:43:52 +00001416static void virtnet_ack_link_announce(struct virtnet_info *vi)
1417{
1418 rtnl_lock();
1419 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001420 VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL))
Jason Wang586d17c2012-04-11 20:43:52 +00001421 dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
1422 rtnl_unlock();
1423}
1424
John Fastabend473153292017-02-02 19:14:32 -08001425static int _virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
Jason Wang986a4f42012-12-07 07:04:56 +00001426{
1427 struct scatterlist sg;
Jason Wang986a4f42012-12-07 07:04:56 +00001428 struct net_device *dev = vi->dev;
1429
1430 if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
1431 return 0;
1432
Andy Lutomirskia725ee32016-07-18 15:34:49 -07001433 vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs);
1434 sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq));
Jason Wang986a4f42012-12-07 07:04:56 +00001435
1436 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001437 VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) {
Jason Wang986a4f42012-12-07 07:04:56 +00001438 dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
1439 queue_pairs);
1440 return -EINVAL;
Sasha Levin55257d72013-04-29 12:00:08 +09301441 } else {
Jason Wang986a4f42012-12-07 07:04:56 +00001442 vi->curr_queue_pairs = queue_pairs;
Jason Wang35ed1592013-10-15 11:18:59 +08001443 /* virtnet_open() will refill when device is going to up. */
1444 if (dev->flags & IFF_UP)
1445 schedule_delayed_work(&vi->refill, 0);
Sasha Levin55257d72013-04-29 12:00:08 +09301446 }
Jason Wang986a4f42012-12-07 07:04:56 +00001447
1448 return 0;
1449}
1450
John Fastabend473153292017-02-02 19:14:32 -08001451static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
1452{
1453 int err;
1454
1455 rtnl_lock();
1456 err = _virtnet_set_queues(vi, queue_pairs);
1457 rtnl_unlock();
1458 return err;
1459}
1460
Rusty Russell296f96f2007-10-22 11:03:37 +10001461static int virtnet_close(struct net_device *dev)
1462{
1463 struct virtnet_info *vi = netdev_priv(dev);
Jason Wang986a4f42012-12-07 07:04:56 +00001464 int i;
Rusty Russell296f96f2007-10-22 11:03:37 +10001465
Rusty Russellb2baed62011-12-29 00:42:38 +00001466 /* Make sure refill_work doesn't re-enable napi! */
1467 cancel_delayed_work_sync(&vi->refill);
Jason Wang986a4f42012-12-07 07:04:56 +00001468
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001469 for (i = 0; i < vi->max_queue_pairs; i++) {
Jason Wang986a4f42012-12-07 07:04:56 +00001470 napi_disable(&vi->rq[i].napi);
Willem de Bruijn78a57b42017-04-25 15:59:17 -04001471 virtnet_napi_tx_disable(&vi->sq[i].napi);
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001472 }
Rusty Russell296f96f2007-10-22 11:03:37 +10001473
Rusty Russell296f96f2007-10-22 11:03:37 +10001474 return 0;
1475}
1476
Alex Williamson2af76982009-02-04 09:02:40 +00001477static void virtnet_set_rx_mode(struct net_device *dev)
1478{
1479 struct virtnet_info *vi = netdev_priv(dev);
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001480 struct scatterlist sg[2];
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001481 struct virtio_net_ctrl_mac *mac_data;
Jiri Pirkoccffad252009-05-22 23:22:17 +00001482 struct netdev_hw_addr *ha;
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08001483 int uc_count;
Jiri Pirko4cd24ea2010-02-08 04:30:35 +00001484 int mc_count;
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001485 void *buf;
1486 int i;
Alex Williamson2af76982009-02-04 09:02:40 +00001487
stephen hemminger788a8b62013-12-09 16:18:45 -08001488 /* We can't dynamically set ndo_set_rx_mode, so return gracefully */
Alex Williamson2af76982009-02-04 09:02:40 +00001489 if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_RX))
1490 return;
1491
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001492 vi->ctrl_promisc = ((dev->flags & IFF_PROMISC) != 0);
1493 vi->ctrl_allmulti = ((dev->flags & IFF_ALLMULTI) != 0);
Alex Williamson2af76982009-02-04 09:02:40 +00001494
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001495 sg_init_one(sg, &vi->ctrl_promisc, sizeof(vi->ctrl_promisc));
Alex Williamson2af76982009-02-04 09:02:40 +00001496
1497 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001498 VIRTIO_NET_CTRL_RX_PROMISC, sg))
Alex Williamson2af76982009-02-04 09:02:40 +00001499 dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001500 vi->ctrl_promisc ? "en" : "dis");
Alex Williamson2af76982009-02-04 09:02:40 +00001501
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001502 sg_init_one(sg, &vi->ctrl_allmulti, sizeof(vi->ctrl_allmulti));
Alex Williamson2af76982009-02-04 09:02:40 +00001503
1504 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001505 VIRTIO_NET_CTRL_RX_ALLMULTI, sg))
Alex Williamson2af76982009-02-04 09:02:40 +00001506 dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
Michael S. Tsirkin2ac46032015-11-15 15:11:00 +02001507 vi->ctrl_allmulti ? "en" : "dis");
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001508
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08001509 uc_count = netdev_uc_count(dev);
Jiri Pirko4cd24ea2010-02-08 04:30:35 +00001510 mc_count = netdev_mc_count(dev);
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001511 /* MAC filter - use one buffer for both lists */
Jiri Pirko4cd24ea2010-02-08 04:30:35 +00001512 buf = kzalloc(((uc_count + mc_count) * ETH_ALEN) +
1513 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
1514 mac_data = buf;
Joe Perchese68ed8f2013-02-03 17:28:15 +00001515 if (!buf)
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001516 return;
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001517
Alex Williamson23e258e2009-05-01 17:27:56 +00001518 sg_init_table(sg, 2);
1519
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001520 /* Store the unicast list and count in the front of the buffer */
Michael S. Tsirkinfdd819b2014-10-07 16:39:48 +02001521 mac_data->entries = cpu_to_virtio32(vi->vdev, uc_count);
Jiri Pirkoccffad252009-05-22 23:22:17 +00001522 i = 0;
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08001523 netdev_for_each_uc_addr(ha, dev)
Jiri Pirkoccffad252009-05-22 23:22:17 +00001524 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001525
1526 sg_set_buf(&sg[0], mac_data,
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08001527 sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001528
1529 /* multicast list and count fill the end */
Jiri Pirko32e7bfc2010-01-25 13:36:10 -08001530 mac_data = (void *)&mac_data->macs[uc_count][0];
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001531
Michael S. Tsirkinfdd819b2014-10-07 16:39:48 +02001532 mac_data->entries = cpu_to_virtio32(vi->vdev, mc_count);
Jiri Pirko567ec872010-02-23 23:17:07 +00001533 i = 0;
Jiri Pirko22bedad32010-04-01 21:22:57 +00001534 netdev_for_each_mc_addr(ha, dev)
1535 memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001536
1537 sg_set_buf(&sg[1], mac_data,
Jiri Pirko4cd24ea2010-02-08 04:30:35 +00001538 sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001539
1540 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001541 VIRTIO_NET_CTRL_MAC_TABLE_SET, sg))
Thomas Huth99e872a2013-11-29 10:02:19 +01001542 dev_warn(&dev->dev, "Failed to set MAC filter table.\n");
Alex Williamsonf565a7c2009-02-04 09:02:45 +00001543
1544 kfree(buf);
Alex Williamson2af76982009-02-04 09:02:40 +00001545}
1546
Patrick McHardy80d5c362013-04-19 02:04:28 +00001547static int virtnet_vlan_rx_add_vid(struct net_device *dev,
1548 __be16 proto, u16 vid)
Alex Williamson0bde95692009-02-04 09:02:50 +00001549{
1550 struct virtnet_info *vi = netdev_priv(dev);
1551 struct scatterlist sg;
1552
Andy Lutomirskia725ee32016-07-18 15:34:49 -07001553 vi->ctrl_vid = vid;
1554 sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
Alex Williamson0bde95692009-02-04 09:02:50 +00001555
1556 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001557 VIRTIO_NET_CTRL_VLAN_ADD, &sg))
Alex Williamson0bde95692009-02-04 09:02:50 +00001558 dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
Jiri Pirko8e586132011-12-08 19:52:37 -05001559 return 0;
Alex Williamson0bde95692009-02-04 09:02:50 +00001560}
1561
Patrick McHardy80d5c362013-04-19 02:04:28 +00001562static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
1563 __be16 proto, u16 vid)
Alex Williamson0bde95692009-02-04 09:02:50 +00001564{
1565 struct virtnet_info *vi = netdev_priv(dev);
1566 struct scatterlist sg;
1567
Andy Lutomirskia725ee32016-07-18 15:34:49 -07001568 vi->ctrl_vid = vid;
1569 sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid));
Alex Williamson0bde95692009-02-04 09:02:50 +00001570
1571 if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
stephen hemmingerd24bae32013-12-09 16:17:40 -08001572 VIRTIO_NET_CTRL_VLAN_DEL, &sg))
Alex Williamson0bde95692009-02-04 09:02:50 +00001573 dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
Jiri Pirko8e586132011-12-08 19:52:37 -05001574 return 0;
Alex Williamson0bde95692009-02-04 09:02:50 +00001575}
1576
Wanlong Gao8898c212013-01-24 23:51:30 +00001577static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
Jason Wang986a4f42012-12-07 07:04:56 +00001578{
1579 int i;
Wanlong Gao8898c212013-01-24 23:51:30 +00001580
1581 if (vi->affinity_hint_set) {
1582 for (i = 0; i < vi->max_queue_pairs; i++) {
1583 virtqueue_set_affinity(vi->rq[i].vq, -1);
1584 virtqueue_set_affinity(vi->sq[i].vq, -1);
1585 }
1586
1587 vi->affinity_hint_set = false;
1588 }
Wanlong Gao8898c212013-01-24 23:51:30 +00001589}
1590
1591static void virtnet_set_affinity(struct virtnet_info *vi)
Jason Wang986a4f42012-12-07 07:04:56 +00001592{
1593 int i;
Wanlong Gao47be2472013-01-24 23:51:29 +00001594 int cpu;
Jason Wang986a4f42012-12-07 07:04:56 +00001595
1596 /* In multiqueue mode, when the number of cpu is equal to the number of
1597 * queue pairs, we let the queue pairs to be private to one cpu by
1598 * setting the affinity hint to eliminate the contention.
1599 */
Wanlong Gao8898c212013-01-24 23:51:30 +00001600 if (vi->curr_queue_pairs == 1 ||
1601 vi->max_queue_pairs != num_online_cpus()) {
1602 virtnet_clean_affinity(vi, -1);
1603 return;
Jason Wang986a4f42012-12-07 07:04:56 +00001604 }
1605
Wanlong Gao8898c212013-01-24 23:51:30 +00001606 i = 0;
1607 for_each_online_cpu(cpu) {
Jason Wang986a4f42012-12-07 07:04:56 +00001608 virtqueue_set_affinity(vi->rq[i].vq, cpu);
1609 virtqueue_set_affinity(vi->sq[i].vq, cpu);
Jason Wang9bb8ca82013-11-05 18:19:45 +08001610 netif_set_xps_queue(vi->dev, cpumask_of(cpu), i);
Wanlong Gao8898c212013-01-24 23:51:30 +00001611 i++;
Jason Wang986a4f42012-12-07 07:04:56 +00001612 }
1613
Wanlong Gao8898c212013-01-24 23:51:30 +00001614 vi->affinity_hint_set = true;
Jason Wang986a4f42012-12-07 07:04:56 +00001615}
1616
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02001617static int virtnet_cpu_online(unsigned int cpu, struct hlist_node *node)
Wanlong Gao8de4b2f2013-01-24 23:51:31 +00001618{
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02001619 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1620 node);
1621 virtnet_set_affinity(vi);
1622 return 0;
1623}
Wanlong Gao8de4b2f2013-01-24 23:51:31 +00001624
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02001625static int virtnet_cpu_dead(unsigned int cpu, struct hlist_node *node)
1626{
1627 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1628 node_dead);
1629 virtnet_set_affinity(vi);
1630 return 0;
1631}
Jason Wang3ab098d2013-10-15 11:18:58 +08001632
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02001633static int virtnet_cpu_down_prep(unsigned int cpu, struct hlist_node *node)
1634{
1635 struct virtnet_info *vi = hlist_entry_safe(node, struct virtnet_info,
1636 node);
1637
1638 virtnet_clean_affinity(vi, cpu);
1639 return 0;
1640}
1641
1642static enum cpuhp_state virtionet_online;
1643
1644static int virtnet_cpu_notif_add(struct virtnet_info *vi)
1645{
1646 int ret;
1647
1648 ret = cpuhp_state_add_instance_nocalls(virtionet_online, &vi->node);
1649 if (ret)
1650 return ret;
1651 ret = cpuhp_state_add_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1652 &vi->node_dead);
1653 if (!ret)
1654 return ret;
1655 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1656 return ret;
1657}
1658
1659static void virtnet_cpu_notif_remove(struct virtnet_info *vi)
1660{
1661 cpuhp_state_remove_instance_nocalls(virtionet_online, &vi->node);
1662 cpuhp_state_remove_instance_nocalls(CPUHP_VIRT_NET_DEAD,
1663 &vi->node_dead);
Herbert Xua9ea3fc2008-04-18 11:21:42 +08001664}
1665
Rick Jones8f9f4662011-10-19 08:10:59 +00001666static void virtnet_get_ringparam(struct net_device *dev,
1667 struct ethtool_ringparam *ring)
1668{
1669 struct virtnet_info *vi = netdev_priv(dev);
1670
Jason Wang986a4f42012-12-07 07:04:56 +00001671 ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
1672 ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
Rick Jones8f9f4662011-10-19 08:10:59 +00001673 ring->rx_pending = ring->rx_max_pending;
1674 ring->tx_pending = ring->tx_max_pending;
Rick Jones8f9f4662011-10-19 08:10:59 +00001675}
1676
Rick Jones66846042011-11-14 14:17:08 +00001677
1678static void virtnet_get_drvinfo(struct net_device *dev,
1679 struct ethtool_drvinfo *info)
1680{
1681 struct virtnet_info *vi = netdev_priv(dev);
1682 struct virtio_device *vdev = vi->vdev;
1683
1684 strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
1685 strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
1686 strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
1687
1688}
1689
Jason Wangd73bcd22012-12-07 07:04:57 +00001690/* TODO: Eliminate OOO packets during switching */
1691static int virtnet_set_channels(struct net_device *dev,
1692 struct ethtool_channels *channels)
1693{
1694 struct virtnet_info *vi = netdev_priv(dev);
1695 u16 queue_pairs = channels->combined_count;
1696 int err;
1697
1698 /* We don't support separate rx/tx channels.
1699 * We don't allow setting 'other' channels.
1700 */
1701 if (channels->rx_count || channels->tx_count || channels->other_count)
1702 return -EINVAL;
1703
Amos Kongc18e9cd2014-04-18 13:45:41 +08001704 if (queue_pairs > vi->max_queue_pairs || queue_pairs == 0)
Jason Wangd73bcd22012-12-07 07:04:57 +00001705 return -EINVAL;
1706
John Fastabendf600b692016-12-15 12:13:24 -08001707 /* For now we don't support modifying channels while XDP is loaded
1708 * also when XDP is loaded all RX queues have XDP programs so we only
1709 * need to check a single RX queue.
1710 */
1711 if (vi->rq[0].xdp_prog)
1712 return -EINVAL;
1713
Wanlong Gao47be2472013-01-24 23:51:29 +00001714 get_online_cpus();
John Fastabend473153292017-02-02 19:14:32 -08001715 err = _virtnet_set_queues(vi, queue_pairs);
Jason Wangd73bcd22012-12-07 07:04:57 +00001716 if (!err) {
1717 netif_set_real_num_tx_queues(dev, queue_pairs);
1718 netif_set_real_num_rx_queues(dev, queue_pairs);
1719
Wanlong Gao8898c212013-01-24 23:51:30 +00001720 virtnet_set_affinity(vi);
Jason Wangd73bcd22012-12-07 07:04:57 +00001721 }
Wanlong Gao47be2472013-01-24 23:51:29 +00001722 put_online_cpus();
Jason Wangd73bcd22012-12-07 07:04:57 +00001723
1724 return err;
1725}
1726
1727static void virtnet_get_channels(struct net_device *dev,
1728 struct ethtool_channels *channels)
1729{
1730 struct virtnet_info *vi = netdev_priv(dev);
1731
1732 channels->combined_count = vi->curr_queue_pairs;
1733 channels->max_combined = vi->max_queue_pairs;
1734 channels->max_other = 0;
1735 channels->rx_count = 0;
1736 channels->tx_count = 0;
1737 channels->other_count = 0;
1738}
1739
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001740/* Check if the user is trying to change anything besides speed/duplex */
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001741static bool
1742virtnet_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001743{
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001744 struct ethtool_link_ksettings diff1 = *cmd;
1745 struct ethtool_link_ksettings diff2 = {};
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001746
Nikolay Aleksandrov0cf3ace2016-02-07 21:52:24 +01001747 /* cmd is always set so we need to clear it, validate the port type
1748 * and also without autonegotiation we can ignore advertising
1749 */
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001750 diff1.base.speed = 0;
1751 diff2.base.port = PORT_OTHER;
1752 ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
1753 diff1.base.duplex = 0;
1754 diff1.base.cmd = 0;
1755 diff1.base.link_mode_masks_nwords = 0;
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001756
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001757 return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
1758 bitmap_empty(diff1.link_modes.supported,
1759 __ETHTOOL_LINK_MODE_MASK_NBITS) &&
1760 bitmap_empty(diff1.link_modes.advertising,
1761 __ETHTOOL_LINK_MODE_MASK_NBITS) &&
1762 bitmap_empty(diff1.link_modes.lp_advertising,
1763 __ETHTOOL_LINK_MODE_MASK_NBITS);
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001764}
1765
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001766static int virtnet_set_link_ksettings(struct net_device *dev,
1767 const struct ethtool_link_ksettings *cmd)
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001768{
1769 struct virtnet_info *vi = netdev_priv(dev);
1770 u32 speed;
1771
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001772 speed = cmd->base.speed;
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001773 /* don't allow custom speed and duplex */
1774 if (!ethtool_validate_speed(speed) ||
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001775 !ethtool_validate_duplex(cmd->base.duplex) ||
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001776 !virtnet_validate_ethtool_cmd(cmd))
1777 return -EINVAL;
1778 vi->speed = speed;
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001779 vi->duplex = cmd->base.duplex;
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001780
1781 return 0;
1782}
1783
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001784static int virtnet_get_link_ksettings(struct net_device *dev,
1785 struct ethtool_link_ksettings *cmd)
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001786{
1787 struct virtnet_info *vi = netdev_priv(dev);
1788
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001789 cmd->base.speed = vi->speed;
1790 cmd->base.duplex = vi->duplex;
1791 cmd->base.port = PORT_OTHER;
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01001792
1793 return 0;
1794}
1795
1796static void virtnet_init_settings(struct net_device *dev)
1797{
1798 struct virtnet_info *vi = netdev_priv(dev);
1799
1800 vi->speed = SPEED_UNKNOWN;
1801 vi->duplex = DUPLEX_UNKNOWN;
1802}
1803
Stephen Hemminger0fc0b732009-09-02 01:03:33 -07001804static const struct ethtool_ops virtnet_ethtool_ops = {
Rick Jones66846042011-11-14 14:17:08 +00001805 .get_drvinfo = virtnet_get_drvinfo,
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08001806 .get_link = ethtool_op_get_link,
Rick Jones8f9f4662011-10-19 08:10:59 +00001807 .get_ringparam = virtnet_get_ringparam,
Jason Wangd73bcd22012-12-07 07:04:57 +00001808 .set_channels = virtnet_set_channels,
1809 .get_channels = virtnet_get_channels,
Jacob Keller074c3582014-06-25 02:37:13 +00001810 .get_ts_info = ethtool_op_get_ts_info,
Philippe Reynesebb6b4b2017-03-21 23:24:24 +01001811 .get_link_ksettings = virtnet_get_link_ksettings,
1812 .set_link_ksettings = virtnet_set_link_ksettings,
Herbert Xua9ea3fc2008-04-18 11:21:42 +08001813};
1814
John Fastabend9fe7bfc2017-02-02 19:16:01 -08001815static void virtnet_freeze_down(struct virtio_device *vdev)
1816{
1817 struct virtnet_info *vi = vdev->priv;
1818 int i;
1819
1820 /* Make sure no work handler is accessing the device */
1821 flush_work(&vi->config_work);
1822
1823 netif_device_detach(vi->dev);
Jason Wang713a98d2017-06-28 09:51:03 +08001824 netif_tx_disable(vi->dev);
John Fastabend9fe7bfc2017-02-02 19:16:01 -08001825 cancel_delayed_work_sync(&vi->refill);
1826
1827 if (netif_running(vi->dev)) {
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001828 for (i = 0; i < vi->max_queue_pairs; i++) {
John Fastabend9fe7bfc2017-02-02 19:16:01 -08001829 napi_disable(&vi->rq[i].napi);
Willem de Bruijn78a57b42017-04-25 15:59:17 -04001830 virtnet_napi_tx_disable(&vi->sq[i].napi);
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001831 }
John Fastabend9fe7bfc2017-02-02 19:16:01 -08001832 }
1833}
1834
1835static int init_vqs(struct virtnet_info *vi);
John Fastabend2de2f7f2017-02-02 19:16:29 -08001836static void _remove_vq_common(struct virtnet_info *vi);
John Fastabend9fe7bfc2017-02-02 19:16:01 -08001837
1838static int virtnet_restore_up(struct virtio_device *vdev)
1839{
1840 struct virtnet_info *vi = vdev->priv;
1841 int err, i;
1842
1843 err = init_vqs(vi);
1844 if (err)
1845 return err;
1846
1847 virtio_device_ready(vdev);
1848
1849 if (netif_running(vi->dev)) {
1850 for (i = 0; i < vi->curr_queue_pairs; i++)
1851 if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
1852 schedule_delayed_work(&vi->refill, 0);
1853
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001854 for (i = 0; i < vi->max_queue_pairs; i++) {
Willem de Bruijne4e84522017-04-24 13:49:26 -04001855 virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04001856 virtnet_napi_tx_enable(vi, vi->sq[i].vq,
1857 &vi->sq[i].napi);
1858 }
John Fastabend9fe7bfc2017-02-02 19:16:01 -08001859 }
1860
1861 netif_device_attach(vi->dev);
1862 return err;
1863}
1864
Jason Wang017b29c2017-02-20 11:50:20 +08001865static int virtnet_reset(struct virtnet_info *vi, int curr_qp, int xdp_qp)
John Fastabend2de2f7f2017-02-02 19:16:29 -08001866{
1867 struct virtio_device *dev = vi->vdev;
1868 int ret;
1869
1870 virtio_config_disable(dev);
1871 dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
1872 virtnet_freeze_down(dev);
1873 _remove_vq_common(vi);
1874
John Fastabend2de2f7f2017-02-02 19:16:29 -08001875 virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
1876 virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
1877
1878 ret = virtio_finalize_features(dev);
1879 if (ret)
1880 goto err;
1881
Jason Wang017b29c2017-02-20 11:50:20 +08001882 vi->xdp_queue_pairs = xdp_qp;
John Fastabend2de2f7f2017-02-02 19:16:29 -08001883 ret = virtnet_restore_up(dev);
1884 if (ret)
1885 goto err;
Jason Wang017b29c2017-02-20 11:50:20 +08001886 ret = _virtnet_set_queues(vi, curr_qp);
John Fastabend2de2f7f2017-02-02 19:16:29 -08001887 if (ret)
1888 goto err;
1889
1890 virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
1891 virtio_config_enable(dev);
1892 return 0;
1893err:
1894 virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
1895 return ret;
1896}
1897
Jakub Kicinski9861ce02017-04-30 21:46:48 -07001898static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1899 struct netlink_ext_ack *extack)
John Fastabendf600b692016-12-15 12:13:24 -08001900{
1901 unsigned long int max_sz = PAGE_SIZE - sizeof(struct padded_vnet_hdr);
1902 struct virtnet_info *vi = netdev_priv(dev);
1903 struct bpf_prog *old_prog;
Jason Wang017b29c2017-02-20 11:50:20 +08001904 u16 xdp_qp = 0, curr_qp;
John Fastabend672aafd2016-12-15 12:13:49 -08001905 int i, err;
John Fastabendf600b692016-12-15 12:13:24 -08001906
1907 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO4) ||
Jason Wang92502fe2016-12-23 22:37:30 +08001908 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) ||
1909 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) ||
1910 virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) {
Daniel Borkmann4d463c42017-05-03 00:39:17 +02001911 NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first");
John Fastabendf600b692016-12-15 12:13:24 -08001912 return -EOPNOTSUPP;
1913 }
1914
1915 if (vi->mergeable_rx_bufs && !vi->any_header_sg) {
Daniel Borkmann4d463c42017-05-03 00:39:17 +02001916 NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required");
John Fastabendf600b692016-12-15 12:13:24 -08001917 return -EINVAL;
1918 }
1919
1920 if (dev->mtu > max_sz) {
Daniel Borkmann4d463c42017-05-03 00:39:17 +02001921 NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP");
John Fastabendf600b692016-12-15 12:13:24 -08001922 netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz);
1923 return -EINVAL;
1924 }
1925
John Fastabend672aafd2016-12-15 12:13:49 -08001926 curr_qp = vi->curr_queue_pairs - vi->xdp_queue_pairs;
1927 if (prog)
1928 xdp_qp = nr_cpu_ids;
1929
1930 /* XDP requires extra queues for XDP_TX */
1931 if (curr_qp + xdp_qp > vi->max_queue_pairs) {
Daniel Borkmann4d463c42017-05-03 00:39:17 +02001932 NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
John Fastabend672aafd2016-12-15 12:13:49 -08001933 netdev_warn(dev, "request %i queues but max is %i\n",
1934 curr_qp + xdp_qp, vi->max_queue_pairs);
1935 return -ENOMEM;
1936 }
1937
John Fastabend2de2f7f2017-02-02 19:16:29 -08001938 if (prog) {
1939 prog = bpf_prog_add(prog, vi->max_queue_pairs - 1);
1940 if (IS_ERR(prog))
1941 return PTR_ERR(prog);
1942 }
1943
John Fastabend2de2f7f2017-02-02 19:16:29 -08001944 /* Changing the headroom in buffers is a disruptive operation because
1945 * existing buffers must be flushed and reallocated. This will happen
1946 * when a xdp program is initially added or xdp is disabled by removing
1947 * the xdp program resulting in number of XDP queues changing.
1948 */
1949 if (vi->xdp_queue_pairs != xdp_qp) {
Jason Wang017b29c2017-02-20 11:50:20 +08001950 err = virtnet_reset(vi, curr_qp + xdp_qp, xdp_qp);
1951 if (err) {
1952 dev_warn(&dev->dev, "XDP reset failure.\n");
John Fastabend2de2f7f2017-02-02 19:16:29 -08001953 goto virtio_reset_err;
Jason Wang017b29c2017-02-20 11:50:20 +08001954 }
John Fastabendf600b692016-12-15 12:13:24 -08001955 }
1956
John Fastabend672aafd2016-12-15 12:13:49 -08001957 netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp);
1958
John Fastabendf600b692016-12-15 12:13:24 -08001959 for (i = 0; i < vi->max_queue_pairs; i++) {
1960 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
1961 rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
1962 if (old_prog)
1963 bpf_prog_put(old_prog);
1964 }
1965
1966 return 0;
John Fastabend2de2f7f2017-02-02 19:16:29 -08001967
1968virtio_reset_err:
1969 /* On reset error do our best to unwind XDP changes inflight and return
1970 * error up to user space for resolution. The underlying reset hung on
1971 * us so not much we can do here.
1972 */
John Fastabend2de2f7f2017-02-02 19:16:29 -08001973 if (prog)
1974 bpf_prog_sub(prog, vi->max_queue_pairs - 1);
1975 return err;
John Fastabendf600b692016-12-15 12:13:24 -08001976}
1977
Martin KaFai Lau5b0e6622017-06-15 17:29:12 -07001978static u32 virtnet_xdp_query(struct net_device *dev)
John Fastabendf600b692016-12-15 12:13:24 -08001979{
1980 struct virtnet_info *vi = netdev_priv(dev);
Martin KaFai Lau5b0e6622017-06-15 17:29:12 -07001981 const struct bpf_prog *xdp_prog;
John Fastabendf600b692016-12-15 12:13:24 -08001982 int i;
1983
1984 for (i = 0; i < vi->max_queue_pairs; i++) {
Martin KaFai Lau5b0e6622017-06-15 17:29:12 -07001985 xdp_prog = rtnl_dereference(vi->rq[i].xdp_prog);
1986 if (xdp_prog)
1987 return xdp_prog->aux->id;
John Fastabendf600b692016-12-15 12:13:24 -08001988 }
Martin KaFai Lau5b0e6622017-06-15 17:29:12 -07001989 return 0;
John Fastabendf600b692016-12-15 12:13:24 -08001990}
1991
1992static int virtnet_xdp(struct net_device *dev, struct netdev_xdp *xdp)
1993{
1994 switch (xdp->command) {
1995 case XDP_SETUP_PROG:
Jakub Kicinski9861ce02017-04-30 21:46:48 -07001996 return virtnet_xdp_set(dev, xdp->prog, xdp->extack);
John Fastabendf600b692016-12-15 12:13:24 -08001997 case XDP_QUERY_PROG:
Martin KaFai Lau5b0e6622017-06-15 17:29:12 -07001998 xdp->prog_id = virtnet_xdp_query(dev);
1999 xdp->prog_attached = !!xdp->prog_id;
John Fastabendf600b692016-12-15 12:13:24 -08002000 return 0;
2001 default:
2002 return -EINVAL;
2003 }
2004}
2005
Stephen Hemminger76288b42009-01-06 10:44:22 -08002006static const struct net_device_ops virtnet_netdev = {
2007 .ndo_open = virtnet_open,
2008 .ndo_stop = virtnet_close,
2009 .ndo_start_xmit = start_xmit,
2010 .ndo_validate_addr = eth_validate_addr,
Alex Williamson9c46f6d2009-02-04 16:36:34 -08002011 .ndo_set_mac_address = virtnet_set_mac_address,
Alex Williamson2af76982009-02-04 09:02:40 +00002012 .ndo_set_rx_mode = virtnet_set_rx_mode,
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00002013 .ndo_get_stats64 = virtnet_stats,
Alex Williamson1824a982009-05-01 17:31:10 +00002014 .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
2015 .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
Stephen Hemminger76288b42009-01-06 10:44:22 -08002016#ifdef CONFIG_NET_POLL_CONTROLLER
2017 .ndo_poll_controller = virtnet_netpoll,
2018#endif
John Fastabendf600b692016-12-15 12:13:24 -08002019 .ndo_xdp = virtnet_xdp,
Vlad Yasevich2836b4f2017-05-23 13:38:43 -04002020 .ndo_features_check = passthru_features_check,
Stephen Hemminger76288b42009-01-06 10:44:22 -08002021};
2022
Jason Wang586d17c2012-04-11 20:43:52 +00002023static void virtnet_config_changed_work(struct work_struct *work)
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002024{
Jason Wang586d17c2012-04-11 20:43:52 +00002025 struct virtnet_info *vi =
2026 container_of(work, struct virtnet_info, config_work);
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002027 u16 v;
2028
Rusty Russell855e0c52013-10-14 18:11:51 +10302029 if (virtio_cread_feature(vi->vdev, VIRTIO_NET_F_STATUS,
2030 struct virtio_net_config, status, &v) < 0)
Michael S. Tsirkin507613b2014-10-15 10:22:30 +10302031 return;
Jason Wang586d17c2012-04-11 20:43:52 +00002032
2033 if (v & VIRTIO_NET_S_ANNOUNCE) {
Amerigo Wangee89bab2012-08-09 22:14:56 +00002034 netdev_notify_peers(vi->dev);
Jason Wang586d17c2012-04-11 20:43:52 +00002035 virtnet_ack_link_announce(vi);
2036 }
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002037
2038 /* Ignore unknown (future) status bits */
2039 v &= VIRTIO_NET_S_LINK_UP;
2040
2041 if (vi->status == v)
Michael S. Tsirkin507613b2014-10-15 10:22:30 +10302042 return;
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002043
2044 vi->status = v;
2045
2046 if (vi->status & VIRTIO_NET_S_LINK_UP) {
2047 netif_carrier_on(vi->dev);
Jason Wang986a4f42012-12-07 07:04:56 +00002048 netif_tx_wake_all_queues(vi->dev);
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002049 } else {
2050 netif_carrier_off(vi->dev);
Jason Wang986a4f42012-12-07 07:04:56 +00002051 netif_tx_stop_all_queues(vi->dev);
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002052 }
2053}
2054
2055static void virtnet_config_changed(struct virtio_device *vdev)
2056{
2057 struct virtnet_info *vi = vdev->priv;
2058
Tejun Heo3b07e9c2012-08-20 14:51:24 -07002059 schedule_work(&vi->config_work);
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002060}
2061
Jason Wang986a4f42012-12-07 07:04:56 +00002062static void virtnet_free_queues(struct virtnet_info *vi)
2063{
Andrey Vagind4fb84e2013-12-05 18:36:21 +04002064 int i;
2065
Jason Wangab3971b2015-03-12 13:57:44 +08002066 for (i = 0; i < vi->max_queue_pairs; i++) {
2067 napi_hash_del(&vi->rq[i].napi);
Andrey Vagind4fb84e2013-12-05 18:36:21 +04002068 netif_napi_del(&vi->rq[i].napi);
Willem de Bruijnb92f1e62017-04-24 13:49:27 -04002069 netif_napi_del(&vi->sq[i].napi);
Jason Wangab3971b2015-03-12 13:57:44 +08002070 }
Andrey Vagind4fb84e2013-12-05 18:36:21 +04002071
Eric Dumazet963abe52016-11-15 22:24:12 -08002072 /* We called napi_hash_del() before netif_napi_del(),
2073 * we need to respect an RCU grace period before freeing vi->rq
2074 */
2075 synchronize_net();
2076
Jason Wang986a4f42012-12-07 07:04:56 +00002077 kfree(vi->rq);
2078 kfree(vi->sq);
2079}
2080
John Fastabend473153292017-02-02 19:14:32 -08002081static void _free_receive_bufs(struct virtnet_info *vi)
Jason Wang986a4f42012-12-07 07:04:56 +00002082{
John Fastabendf600b692016-12-15 12:13:24 -08002083 struct bpf_prog *old_prog;
Jason Wang986a4f42012-12-07 07:04:56 +00002084 int i;
2085
2086 for (i = 0; i < vi->max_queue_pairs; i++) {
2087 while (vi->rq[i].pages)
2088 __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
John Fastabendf600b692016-12-15 12:13:24 -08002089
2090 old_prog = rtnl_dereference(vi->rq[i].xdp_prog);
2091 RCU_INIT_POINTER(vi->rq[i].xdp_prog, NULL);
2092 if (old_prog)
2093 bpf_prog_put(old_prog);
Jason Wang986a4f42012-12-07 07:04:56 +00002094 }
John Fastabend473153292017-02-02 19:14:32 -08002095}
2096
2097static void free_receive_bufs(struct virtnet_info *vi)
2098{
2099 rtnl_lock();
2100 _free_receive_bufs(vi);
John Fastabendf600b692016-12-15 12:13:24 -08002101 rtnl_unlock();
Jason Wang986a4f42012-12-07 07:04:56 +00002102}
2103
Michael Daltonfb518792014-01-16 22:23:26 -08002104static void free_receive_page_frags(struct virtnet_info *vi)
2105{
2106 int i;
2107 for (i = 0; i < vi->max_queue_pairs; i++)
2108 if (vi->rq[i].alloc_frag.page)
2109 put_page(vi->rq[i].alloc_frag.page);
2110}
2111
John Fastabendb68df012017-01-25 18:22:48 -08002112static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
John Fastabend56434a02016-12-15 12:14:13 -08002113{
2114 if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
2115 return false;
2116 else if (q < vi->curr_queue_pairs)
2117 return true;
2118 else
2119 return false;
2120}
2121
Jason Wang986a4f42012-12-07 07:04:56 +00002122static void free_unused_bufs(struct virtnet_info *vi)
2123{
2124 void *buf;
2125 int i;
2126
2127 for (i = 0; i < vi->max_queue_pairs; i++) {
2128 struct virtqueue *vq = vi->sq[i].vq;
John Fastabend56434a02016-12-15 12:14:13 -08002129 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
John Fastabendb68df012017-01-25 18:22:48 -08002130 if (!is_xdp_raw_buffer_queue(vi, i))
John Fastabend56434a02016-12-15 12:14:13 -08002131 dev_kfree_skb(buf);
2132 else
2133 put_page(virt_to_head_page(buf));
2134 }
Jason Wang986a4f42012-12-07 07:04:56 +00002135 }
2136
2137 for (i = 0; i < vi->max_queue_pairs; i++) {
2138 struct virtqueue *vq = vi->rq[i].vq;
2139
2140 while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
Michael Daltonab7db912014-01-16 22:23:27 -08002141 if (vi->mergeable_rx_bufs) {
Michael S. Tsirkin680557c2017-03-06 21:29:47 +02002142 put_page(virt_to_head_page(buf));
Michael Daltonab7db912014-01-16 22:23:27 -08002143 } else if (vi->big_packets) {
Andrey Vaginfa9fac12013-12-05 18:36:20 +04002144 give_pages(&vi->rq[i], buf);
Michael Daltonab7db912014-01-16 22:23:27 -08002145 } else {
Jason Wangf6b10202017-02-21 16:46:28 +08002146 put_page(virt_to_head_page(buf));
Michael Daltonab7db912014-01-16 22:23:27 -08002147 }
Jason Wang986a4f42012-12-07 07:04:56 +00002148 }
Jason Wang986a4f42012-12-07 07:04:56 +00002149 }
2150}
2151
Jason Wange9d74172012-12-07 07:04:55 +00002152static void virtnet_del_vqs(struct virtnet_info *vi)
2153{
2154 struct virtio_device *vdev = vi->vdev;
2155
Wanlong Gao8898c212013-01-24 23:51:30 +00002156 virtnet_clean_affinity(vi, -1);
Jason Wang986a4f42012-12-07 07:04:56 +00002157
Jason Wange9d74172012-12-07 07:04:55 +00002158 vdev->config->del_vqs(vdev);
Jason Wang986a4f42012-12-07 07:04:56 +00002159
2160 virtnet_free_queues(vi);
2161}
2162
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +02002163/* How large should a single buffer be so a queue full of these can fit at
2164 * least one full packet?
2165 * Logic below assumes the mergeable buffer header is used.
2166 */
2167static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq)
2168{
2169 const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2170 unsigned int rq_size = virtqueue_get_vring_size(vq);
2171 unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu;
2172 unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len;
2173 unsigned int min_buf_len = DIV_ROUND_UP(buf_len, rq_size);
2174
Michael S. Tsirkinf0c31922017-06-02 17:54:33 +03002175 return max(max(min_buf_len, hdr_len) - hdr_len,
2176 (unsigned int)GOOD_PACKET_LEN);
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +02002177}
2178
Jason Wang986a4f42012-12-07 07:04:56 +00002179static int virtnet_find_vqs(struct virtnet_info *vi)
2180{
2181 vq_callback_t **callbacks;
2182 struct virtqueue **vqs;
2183 int ret = -ENOMEM;
2184 int i, total_vqs;
2185 const char **names;
Michael S. Tsirkind45b8972017-03-06 20:31:21 +02002186 bool *ctx;
Jason Wang986a4f42012-12-07 07:04:56 +00002187
2188 /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by
2189 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
2190 * possible control vq.
2191 */
2192 total_vqs = vi->max_queue_pairs * 2 +
2193 virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
2194
2195 /* Allocate space for find_vqs parameters */
2196 vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
2197 if (!vqs)
2198 goto err_vq;
2199 callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
2200 if (!callbacks)
2201 goto err_callback;
2202 names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL);
2203 if (!names)
2204 goto err_names;
Michael S. Tsirkind45b8972017-03-06 20:31:21 +02002205 if (vi->mergeable_rx_bufs) {
2206 ctx = kzalloc(total_vqs * sizeof(*ctx), GFP_KERNEL);
2207 if (!ctx)
2208 goto err_ctx;
2209 } else {
2210 ctx = NULL;
2211 }
Jason Wang986a4f42012-12-07 07:04:56 +00002212
2213 /* Parameters for control virtqueue, if any */
2214 if (vi->has_cvq) {
2215 callbacks[total_vqs - 1] = NULL;
2216 names[total_vqs - 1] = "control";
2217 }
2218
2219 /* Allocate/initialize parameters for send/receive virtqueues */
2220 for (i = 0; i < vi->max_queue_pairs; i++) {
2221 callbacks[rxq2vq(i)] = skb_recv_done;
2222 callbacks[txq2vq(i)] = skb_xmit_done;
2223 sprintf(vi->rq[i].name, "input.%d", i);
2224 sprintf(vi->sq[i].name, "output.%d", i);
2225 names[rxq2vq(i)] = vi->rq[i].name;
2226 names[txq2vq(i)] = vi->sq[i].name;
Michael S. Tsirkind45b8972017-03-06 20:31:21 +02002227 if (ctx)
2228 ctx[rxq2vq(i)] = true;
Jason Wang986a4f42012-12-07 07:04:56 +00002229 }
2230
2231 ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
Michael S. Tsirkind45b8972017-03-06 20:31:21 +02002232 names, ctx, NULL);
Jason Wang986a4f42012-12-07 07:04:56 +00002233 if (ret)
2234 goto err_find;
2235
2236 if (vi->has_cvq) {
2237 vi->cvq = vqs[total_vqs - 1];
2238 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
Patrick McHardyf6469682013-04-19 02:04:27 +00002239 vi->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
Jason Wang986a4f42012-12-07 07:04:56 +00002240 }
2241
2242 for (i = 0; i < vi->max_queue_pairs; i++) {
2243 vi->rq[i].vq = vqs[rxq2vq(i)];
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +02002244 vi->rq[i].min_buf_len = mergeable_min_buf_len(vi, vi->rq[i].vq);
Jason Wang986a4f42012-12-07 07:04:56 +00002245 vi->sq[i].vq = vqs[txq2vq(i)];
2246 }
2247
2248 kfree(names);
2249 kfree(callbacks);
2250 kfree(vqs);
Jason Wang55281622017-07-07 19:56:09 +08002251 kfree(ctx);
Jason Wang986a4f42012-12-07 07:04:56 +00002252
2253 return 0;
2254
2255err_find:
Michael S. Tsirkind45b8972017-03-06 20:31:21 +02002256 kfree(ctx);
2257err_ctx:
Jason Wang986a4f42012-12-07 07:04:56 +00002258 kfree(names);
2259err_names:
2260 kfree(callbacks);
2261err_callback:
2262 kfree(vqs);
2263err_vq:
2264 return ret;
2265}
2266
2267static int virtnet_alloc_queues(struct virtnet_info *vi)
2268{
2269 int i;
2270
2271 vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL);
2272 if (!vi->sq)
2273 goto err_sq;
2274 vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL);
Amerigo Wang008d4272012-12-10 02:24:08 +00002275 if (!vi->rq)
Jason Wang986a4f42012-12-07 07:04:56 +00002276 goto err_rq;
2277
2278 INIT_DELAYED_WORK(&vi->refill, refill_work);
2279 for (i = 0; i < vi->max_queue_pairs; i++) {
2280 vi->rq[i].pages = NULL;
2281 netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
2282 napi_weight);
Willem de Bruijn1d11e732017-04-27 20:37:58 -04002283 netif_tx_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
2284 napi_tx ? napi_weight : 0);
Jason Wang986a4f42012-12-07 07:04:56 +00002285
2286 sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
Johannes Berg5377d7582015-08-19 09:48:40 +02002287 ewma_pkt_len_init(&vi->rq[i].mrg_avg_pkt_len);
Jason Wang986a4f42012-12-07 07:04:56 +00002288 sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
2289 }
2290
2291 return 0;
2292
2293err_rq:
2294 kfree(vi->sq);
2295err_sq:
2296 return -ENOMEM;
Jason Wange9d74172012-12-07 07:04:55 +00002297}
2298
Amit Shah3f9c10b2011-12-22 16:58:31 +05302299static int init_vqs(struct virtnet_info *vi)
2300{
Jason Wang986a4f42012-12-07 07:04:56 +00002301 int ret;
Amit Shah3f9c10b2011-12-22 16:58:31 +05302302
Jason Wang986a4f42012-12-07 07:04:56 +00002303 /* Allocate send & receive queues */
2304 ret = virtnet_alloc_queues(vi);
2305 if (ret)
2306 goto err;
Amit Shah3f9c10b2011-12-22 16:58:31 +05302307
Jason Wang986a4f42012-12-07 07:04:56 +00002308 ret = virtnet_find_vqs(vi);
2309 if (ret)
2310 goto err_free;
Amit Shah3f9c10b2011-12-22 16:58:31 +05302311
Wanlong Gao47be2472013-01-24 23:51:29 +00002312 get_online_cpus();
Wanlong Gao8898c212013-01-24 23:51:30 +00002313 virtnet_set_affinity(vi);
Wanlong Gao47be2472013-01-24 23:51:29 +00002314 put_online_cpus();
2315
Amit Shah3f9c10b2011-12-22 16:58:31 +05302316 return 0;
Jason Wang986a4f42012-12-07 07:04:56 +00002317
2318err_free:
2319 virtnet_free_queues(vi);
2320err:
2321 return ret;
Amit Shah3f9c10b2011-12-22 16:58:31 +05302322}
2323
Michael Daltonfbf28d72014-01-16 22:23:30 -08002324#ifdef CONFIG_SYSFS
2325static ssize_t mergeable_rx_buffer_size_show(struct netdev_rx_queue *queue,
2326 struct rx_queue_attribute *attribute, char *buf)
2327{
2328 struct virtnet_info *vi = netdev_priv(queue->dev);
2329 unsigned int queue_index = get_netdev_rx_queue_index(queue);
Johannes Berg5377d7582015-08-19 09:48:40 +02002330 struct ewma_pkt_len *avg;
Michael Daltonfbf28d72014-01-16 22:23:30 -08002331
2332 BUG_ON(queue_index >= vi->max_queue_pairs);
2333 avg = &vi->rq[queue_index].mrg_avg_pkt_len;
Michael S. Tsirkind85b758f72017-03-09 02:21:21 +02002334 return sprintf(buf, "%u\n",
2335 get_mergeable_buf_len(&vi->rq[queue_index], avg));
Michael Daltonfbf28d72014-01-16 22:23:30 -08002336}
2337
2338static struct rx_queue_attribute mergeable_rx_buffer_size_attribute =
2339 __ATTR_RO(mergeable_rx_buffer_size);
2340
2341static struct attribute *virtio_net_mrg_rx_attrs[] = {
2342 &mergeable_rx_buffer_size_attribute.attr,
2343 NULL
2344};
2345
2346static const struct attribute_group virtio_net_mrg_rx_group = {
2347 .name = "virtio_net",
2348 .attrs = virtio_net_mrg_rx_attrs
2349};
2350#endif
2351
Jason Wang892d6eb2014-11-20 17:03:05 +08002352static bool virtnet_fail_on_feature(struct virtio_device *vdev,
2353 unsigned int fbit,
2354 const char *fname, const char *dname)
2355{
2356 if (!virtio_has_feature(vdev, fbit))
2357 return false;
2358
2359 dev_err(&vdev->dev, "device advertises feature %s but not %s",
2360 fname, dname);
2361
2362 return true;
2363}
2364
2365#define VIRTNET_FAIL_ON(vdev, fbit, dbit) \
2366 virtnet_fail_on_feature(vdev, fbit, #fbit, dbit)
2367
2368static bool virtnet_validate_features(struct virtio_device *vdev)
2369{
2370 if (!virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ) &&
2371 (VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_RX,
2372 "VIRTIO_NET_F_CTRL_VQ") ||
2373 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_VLAN,
2374 "VIRTIO_NET_F_CTRL_VQ") ||
2375 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE,
2376 "VIRTIO_NET_F_CTRL_VQ") ||
2377 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") ||
2378 VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR,
2379 "VIRTIO_NET_F_CTRL_VQ"))) {
2380 return false;
2381 }
2382
2383 return true;
2384}
2385
Jarod Wilsond0c2c992016-10-20 13:55:21 -04002386#define MIN_MTU ETH_MIN_MTU
2387#define MAX_MTU ETH_MAX_MTU
2388
Michael S. Tsirkinfe36cbe2017-03-29 19:09:14 +03002389static int virtnet_validate(struct virtio_device *vdev)
Rusty Russell296f96f2007-10-22 11:03:37 +10002390{
Michael S. Tsirkin6ba42242015-01-12 16:23:37 +02002391 if (!vdev->config->get) {
2392 dev_err(&vdev->dev, "%s failure: config access disabled\n",
2393 __func__);
2394 return -EINVAL;
2395 }
2396
Jason Wang892d6eb2014-11-20 17:03:05 +08002397 if (!virtnet_validate_features(vdev))
2398 return -EINVAL;
2399
Michael S. Tsirkinfe36cbe2017-03-29 19:09:14 +03002400 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2401 int mtu = virtio_cread16(vdev,
2402 offsetof(struct virtio_net_config,
2403 mtu));
2404 if (mtu < MIN_MTU)
2405 __virtio_clear_bit(vdev, VIRTIO_NET_F_MTU);
2406 }
2407
2408 return 0;
2409}
2410
2411static int virtnet_probe(struct virtio_device *vdev)
2412{
2413 int i, err;
2414 struct net_device *dev;
2415 struct virtnet_info *vi;
2416 u16 max_queue_pairs;
2417 int mtu;
2418
Jason Wang986a4f42012-12-07 07:04:56 +00002419 /* Find if host supports multiqueue virtio_net device */
Rusty Russell855e0c52013-10-14 18:11:51 +10302420 err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ,
2421 struct virtio_net_config,
2422 max_virtqueue_pairs, &max_queue_pairs);
Jason Wang986a4f42012-12-07 07:04:56 +00002423
2424 /* We need at least 2 queue's */
2425 if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
2426 max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
2427 !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2428 max_queue_pairs = 1;
Rusty Russell296f96f2007-10-22 11:03:37 +10002429
2430 /* Allocate ourselves a network device with room for our info */
Jason Wang986a4f42012-12-07 07:04:56 +00002431 dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
Rusty Russell296f96f2007-10-22 11:03:37 +10002432 if (!dev)
2433 return -ENOMEM;
2434
2435 /* Set up network device as normal. */
Jiri Pirkof2f2c8b2012-06-29 05:10:06 +00002436 dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
Stephen Hemminger76288b42009-01-06 10:44:22 -08002437 dev->netdev_ops = &virtnet_netdev;
Rusty Russell296f96f2007-10-22 11:03:37 +10002438 dev->features = NETIF_F_HIGHDMA;
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00002439
Wilfried Klaebe7ad24ea2014-05-11 00:12:32 +00002440 dev->ethtool_ops = &virtnet_ethtool_ops;
Rusty Russell296f96f2007-10-22 11:03:37 +10002441 SET_NETDEV_DEV(dev, &vdev->dev);
2442
2443 /* Do we support "hardware" checksums? */
Michał Mirosław98e778c2011-03-31 01:01:35 +00002444 if (virtio_has_feature(vdev, VIRTIO_NET_F_CSUM)) {
Rusty Russell296f96f2007-10-22 11:03:37 +10002445 /* This opens up the world of extra features. */
Jason Wang48900cb2015-08-05 10:34:04 +08002446 dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_SG;
Michał Mirosław98e778c2011-03-31 01:01:35 +00002447 if (csum)
Jason Wang48900cb2015-08-05 10:34:04 +08002448 dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
Michał Mirosław98e778c2011-03-31 01:01:35 +00002449
2450 if (virtio_has_feature(vdev, VIRTIO_NET_F_GSO)) {
David S. Millere078de02017-07-03 06:37:32 -07002451 dev->hw_features |= NETIF_F_TSO
Rusty Russell34a48572008-02-04 23:50:02 -05002452 | NETIF_F_TSO_ECN | NETIF_F_TSO6;
2453 }
Rusty Russell5539ae962008-05-02 21:50:46 -05002454 /* Individual feature bits: what can host handle? */
Michał Mirosław98e778c2011-03-31 01:01:35 +00002455 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO4))
2456 dev->hw_features |= NETIF_F_TSO;
2457 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_TSO6))
2458 dev->hw_features |= NETIF_F_TSO6;
2459 if (virtio_has_feature(vdev, VIRTIO_NET_F_HOST_ECN))
2460 dev->hw_features |= NETIF_F_TSO_ECN;
Michał Mirosław98e778c2011-03-31 01:01:35 +00002461
Jason Wang41f2f122014-12-24 11:03:52 +08002462 dev->features |= NETIF_F_GSO_ROBUST;
2463
Michał Mirosław98e778c2011-03-31 01:01:35 +00002464 if (gso)
David S. Millere078de02017-07-03 06:37:32 -07002465 dev->features |= dev->hw_features & NETIF_F_ALL_TSO;
Michał Mirosław98e778c2011-03-31 01:01:35 +00002466 /* (!csum && gso) case will be fixed by register_netdev() */
Rusty Russell296f96f2007-10-22 11:03:37 +10002467 }
Thomas Huth4f491292013-08-27 17:09:02 +02002468 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_CSUM))
2469 dev->features |= NETIF_F_RXCSUM;
Rusty Russell296f96f2007-10-22 11:03:37 +10002470
Jason Wang4fda8302013-04-10 23:32:21 +00002471 dev->vlan_features = dev->features;
2472
Jarod Wilsond0c2c992016-10-20 13:55:21 -04002473 /* MTU range: 68 - 65535 */
2474 dev->min_mtu = MIN_MTU;
2475 dev->max_mtu = MAX_MTU;
2476
Rusty Russell296f96f2007-10-22 11:03:37 +10002477 /* Configuration may specify what MAC to use. Otherwise random. */
Rusty Russell855e0c52013-10-14 18:11:51 +10302478 if (virtio_has_feature(vdev, VIRTIO_NET_F_MAC))
2479 virtio_cread_bytes(vdev,
2480 offsetof(struct virtio_net_config, mac),
2481 dev->dev_addr, dev->addr_len);
2482 else
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00002483 eth_hw_addr_random(dev);
Rusty Russell296f96f2007-10-22 11:03:37 +10002484
2485 /* Set up our device-specific information */
2486 vi = netdev_priv(dev);
Rusty Russell296f96f2007-10-22 11:03:37 +10002487 vi->dev = dev;
2488 vi->vdev = vdev;
Christian Borntraegerd9d5dcc2008-02-18 10:02:51 +01002489 vdev->priv = vi;
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00002490 vi->stats = alloc_percpu(struct virtnet_stats);
2491 err = -ENOMEM;
2492 if (vi->stats == NULL)
2493 goto free;
2494
John Stultz827da442013-10-07 15:51:58 -07002495 for_each_possible_cpu(i) {
2496 struct virtnet_stats *virtnet_stats;
2497 virtnet_stats = per_cpu_ptr(vi->stats, i);
2498 u64_stats_init(&virtnet_stats->tx_syncp);
2499 u64_stats_init(&virtnet_stats->rx_syncp);
2500 }
2501
Jason Wang586d17c2012-04-11 20:43:52 +00002502 INIT_WORK(&vi->config_work, virtnet_config_changed_work);
Rusty Russell296f96f2007-10-22 11:03:37 +10002503
Herbert Xu97402b92008-04-18 11:24:27 +08002504 /* If we can receive ANY GSO packets, we must allocate large ones. */
Joe Perches8e95a202009-12-03 07:58:21 +00002505 if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
2506 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6) ||
Vlad Yaseviche3e3c422015-02-03 16:36:17 -05002507 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_ECN) ||
2508 virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_UFO))
Herbert Xu97402b92008-04-18 11:24:27 +08002509 vi->big_packets = true;
2510
Mark McLoughlin3f2c31d2008-11-16 22:41:34 -08002511 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
2512 vi->mergeable_rx_bufs = true;
2513
Michael S. Tsirkind04302b2014-10-24 00:24:03 +03002514 if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) ||
2515 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
Michael S. Tsirkin012873d2014-10-24 16:55:57 +03002516 vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2517 else
2518 vi->hdr_len = sizeof(struct virtio_net_hdr);
2519
Michael S. Tsirkin75993302015-07-15 15:26:19 +03002520 if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
2521 virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09302522 vi->any_header_sg = true;
2523
Jason Wang986a4f42012-12-07 07:04:56 +00002524 if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
2525 vi->has_cvq = true;
2526
Aaron Conole14de9d12016-06-03 16:57:12 -04002527 if (virtio_has_feature(vdev, VIRTIO_NET_F_MTU)) {
2528 mtu = virtio_cread16(vdev,
2529 offsetof(struct virtio_net_config,
2530 mtu));
Aaron Conole93a205e2016-10-25 16:12:12 -04002531 if (mtu < dev->min_mtu) {
Michael S. Tsirkinfe36cbe2017-03-29 19:09:14 +03002532 /* Should never trigger: MTU was previously validated
2533 * in virtnet_validate.
2534 */
2535 dev_err(&vdev->dev, "device MTU appears to have changed "
2536 "it is now %d < %d", mtu, dev->min_mtu);
2537 goto free_stats;
Aaron Conole93a205e2016-10-25 16:12:12 -04002538 }
Michael S. Tsirkin2e123b42017-03-08 02:14:25 +02002539
Michael S. Tsirkinfe36cbe2017-03-29 19:09:14 +03002540 dev->mtu = mtu;
2541 dev->max_mtu = mtu;
2542
Michael S. Tsirkin2e123b42017-03-08 02:14:25 +02002543 /* TODO: size buffers correctly in this case. */
2544 if (dev->mtu > ETH_DATA_LEN)
2545 vi->big_packets = true;
Aaron Conole14de9d12016-06-03 16:57:12 -04002546 }
2547
Michael S. Tsirkin012873d2014-10-24 16:55:57 +03002548 if (vi->any_header_sg)
2549 dev->needed_headroom = vi->hdr_len;
Zhangjie \(HZ\)6ebbc1a2014-04-29 18:43:22 +08002550
Jason Wang44900012016-11-25 12:37:26 +08002551 /* Enable multiqueue by default */
2552 if (num_online_cpus() >= max_queue_pairs)
2553 vi->curr_queue_pairs = max_queue_pairs;
2554 else
2555 vi->curr_queue_pairs = num_online_cpus();
Jason Wang986a4f42012-12-07 07:04:56 +00002556 vi->max_queue_pairs = max_queue_pairs;
2557
2558 /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
Amit Shah3f9c10b2011-12-22 16:58:31 +05302559 err = init_vqs(vi);
Michael S. Tsirkind2a7ddd2009-06-12 22:16:36 -06002560 if (err)
Jason Wang9bb8ca82013-11-05 18:19:45 +08002561 goto free_stats;
Michael S. Tsirkind2a7ddd2009-06-12 22:16:36 -06002562
Michael Daltonfbf28d72014-01-16 22:23:30 -08002563#ifdef CONFIG_SYSFS
2564 if (vi->mergeable_rx_bufs)
2565 dev->sysfs_rx_queue_group = &virtio_net_mrg_rx_group;
2566#endif
Zhi Yong Wu0f13b662013-11-18 21:19:27 +08002567 netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
2568 netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
Jason Wang986a4f42012-12-07 07:04:56 +00002569
Nikolay Aleksandrov16032be2016-02-03 04:04:37 +01002570 virtnet_init_settings(dev);
2571
Rusty Russell296f96f2007-10-22 11:03:37 +10002572 err = register_netdev(dev);
2573 if (err) {
2574 pr_debug("virtio_net: registering device failed\n");
Michael S. Tsirkind2a7ddd2009-06-12 22:16:36 -06002575 goto free_vqs;
Rusty Russell296f96f2007-10-22 11:03:37 +10002576 }
Rusty Russellb3369c12008-02-04 23:50:02 -05002577
Michael S. Tsirkin4baf1e32014-10-15 10:22:30 +10302578 virtio_device_ready(vdev);
2579
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02002580 err = virtnet_cpu_notif_add(vi);
Wanlong Gao8de4b2f2013-01-24 23:51:31 +00002581 if (err) {
2582 pr_debug("virtio_net: registering cpu notifier failed\n");
wangyunjianf00e35e2016-05-31 11:52:43 +08002583 goto free_unregister_netdev;
Wanlong Gao8de4b2f2013-01-24 23:51:31 +00002584 }
2585
Jason Wanga2208712016-12-13 14:23:05 +08002586 virtnet_set_queues(vi, vi->curr_queue_pairs);
Jason Wang44900012016-11-25 12:37:26 +08002587
Jason Wang167c25e2010-11-10 14:45:41 +00002588 /* Assume link up if device can't report link status,
2589 otherwise get link status from config. */
2590 if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_STATUS)) {
2591 netif_carrier_off(dev);
Tejun Heo3b07e9c2012-08-20 14:51:24 -07002592 schedule_work(&vi->config_work);
Jason Wang167c25e2010-11-10 14:45:41 +00002593 } else {
2594 vi->status = VIRTIO_NET_S_LINK_UP;
2595 netif_carrier_on(dev);
2596 }
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002597
Jason Wang986a4f42012-12-07 07:04:56 +00002598 pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
2599 dev->name, max_queue_pairs);
2600
Rusty Russell296f96f2007-10-22 11:03:37 +10002601 return 0;
2602
wangyunjianf00e35e2016-05-31 11:52:43 +08002603free_unregister_netdev:
Michael S. Tsirkin02465552014-10-15 10:22:31 +10302604 vi->vdev->config->reset(vdev);
2605
Rusty Russellb3369c12008-02-04 23:50:02 -05002606 unregister_netdev(dev);
Michael S. Tsirkind2a7ddd2009-06-12 22:16:36 -06002607free_vqs:
Jason Wang986a4f42012-12-07 07:04:56 +00002608 cancel_delayed_work_sync(&vi->refill);
Michael Daltonfb518792014-01-16 22:23:26 -08002609 free_receive_page_frags(vi);
Jason Wange9d74172012-12-07 07:04:55 +00002610 virtnet_del_vqs(vi);
stephen hemminger3fa2a1d2011-06-15 06:36:29 +00002611free_stats:
2612 free_percpu(vi->stats);
Rusty Russell296f96f2007-10-22 11:03:37 +10002613free:
2614 free_netdev(dev);
2615 return err;
2616}
2617
John Fastabend2de2f7f2017-02-02 19:16:29 -08002618static void _remove_vq_common(struct virtnet_info *vi)
2619{
2620 vi->vdev->config->reset(vi->vdev);
2621 free_unused_bufs(vi);
2622 _free_receive_bufs(vi);
2623 free_receive_page_frags(vi);
2624 virtnet_del_vqs(vi);
2625}
2626
Amit Shah04486ed2011-12-22 16:58:32 +05302627static void remove_vq_common(struct virtnet_info *vi)
Rusty Russell296f96f2007-10-22 11:03:37 +10002628{
Amit Shah04486ed2011-12-22 16:58:32 +05302629 vi->vdev->config->reset(vi->vdev);
Shirley Ma830a8a92010-02-08 14:14:42 +00002630
2631 /* Free unused buffers in both send and recv, if any. */
Shirley Ma9ab86bb2010-01-29 03:20:04 +00002632 free_unused_bufs(vi);
Rusty Russellfb6813f2008-07-25 12:06:01 -05002633
Jason Wang986a4f42012-12-07 07:04:56 +00002634 free_receive_bufs(vi);
Michael S. Tsirkind2a7ddd2009-06-12 22:16:36 -06002635
Michael Daltonfb518792014-01-16 22:23:26 -08002636 free_receive_page_frags(vi);
2637
Jason Wang986a4f42012-12-07 07:04:56 +00002638 virtnet_del_vqs(vi);
Amit Shah04486ed2011-12-22 16:58:32 +05302639}
2640
Bill Pemberton8cc085d2012-12-03 09:24:15 -05002641static void virtnet_remove(struct virtio_device *vdev)
Amit Shah04486ed2011-12-22 16:58:32 +05302642{
2643 struct virtnet_info *vi = vdev->priv;
2644
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02002645 virtnet_cpu_notif_remove(vi);
Wanlong Gao8de4b2f2013-01-24 23:51:31 +00002646
Michael S. Tsirkin102a2782014-10-15 10:22:29 +10302647 /* Make sure no work handler is accessing the device. */
2648 flush_work(&vi->config_work);
Jason Wang586d17c2012-04-11 20:43:52 +00002649
Amit Shah04486ed2011-12-22 16:58:32 +05302650 unregister_netdev(vi->dev);
2651
2652 remove_vq_common(vi);
Rusty Russellfb6813f2008-07-25 12:06:01 -05002653
Krishna Kumar2e66f552011-07-20 03:56:02 +00002654 free_percpu(vi->stats);
Rusty Russell74b25532007-11-19 11:20:42 -05002655 free_netdev(vi->dev);
Rusty Russell296f96f2007-10-22 11:03:37 +10002656}
2657
Aaron Lu89107002013-09-17 09:25:23 +09302658#ifdef CONFIG_PM_SLEEP
Amit Shah0741bcb2011-12-22 16:58:33 +05302659static int virtnet_freeze(struct virtio_device *vdev)
2660{
2661 struct virtnet_info *vi = vdev->priv;
2662
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02002663 virtnet_cpu_notif_remove(vi);
John Fastabend9fe7bfc2017-02-02 19:16:01 -08002664 virtnet_freeze_down(vdev);
Amit Shah0741bcb2011-12-22 16:58:33 +05302665 remove_vq_common(vi);
2666
2667 return 0;
2668}
2669
2670static int virtnet_restore(struct virtio_device *vdev)
2671{
2672 struct virtnet_info *vi = vdev->priv;
John Fastabend9fe7bfc2017-02-02 19:16:01 -08002673 int err;
Amit Shah0741bcb2011-12-22 16:58:33 +05302674
John Fastabend9fe7bfc2017-02-02 19:16:01 -08002675 err = virtnet_restore_up(vdev);
Amit Shah0741bcb2011-12-22 16:58:33 +05302676 if (err)
2677 return err;
Jason Wang986a4f42012-12-07 07:04:56 +00002678 virtnet_set_queues(vi, vi->curr_queue_pairs);
2679
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02002680 err = virtnet_cpu_notif_add(vi);
Jason Wangec9debb2013-10-29 15:11:07 +08002681 if (err)
2682 return err;
2683
Amit Shah0741bcb2011-12-22 16:58:33 +05302684 return 0;
2685}
2686#endif
2687
Rusty Russell296f96f2007-10-22 11:03:37 +10002688static struct virtio_device_id id_table[] = {
2689 { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
2690 { 0 },
2691};
2692
Michael S. Tsirkinf3358502016-11-04 12:55:36 +02002693#define VIRTNET_FEATURES \
2694 VIRTIO_NET_F_CSUM, VIRTIO_NET_F_GUEST_CSUM, \
2695 VIRTIO_NET_F_MAC, \
2696 VIRTIO_NET_F_HOST_TSO4, VIRTIO_NET_F_HOST_UFO, VIRTIO_NET_F_HOST_TSO6, \
2697 VIRTIO_NET_F_HOST_ECN, VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6, \
2698 VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, \
2699 VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, \
2700 VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, \
2701 VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \
2702 VIRTIO_NET_F_CTRL_MAC_ADDR, \
2703 VIRTIO_NET_F_MTU
2704
Rusty Russellc45a6812008-05-02 21:50:50 -05002705static unsigned int features[] = {
Michael S. Tsirkinf3358502016-11-04 12:55:36 +02002706 VIRTNET_FEATURES,
2707};
2708
2709static unsigned int features_legacy[] = {
2710 VIRTNET_FEATURES,
2711 VIRTIO_NET_F_GSO,
Michael S. Tsirkine7428e92013-07-25 10:20:23 +09302712 VIRTIO_F_ANY_LAYOUT,
Rusty Russellc45a6812008-05-02 21:50:50 -05002713};
2714
Uwe Kleine-König22402522009-11-05 01:32:44 -08002715static struct virtio_driver virtio_net_driver = {
Rusty Russellc45a6812008-05-02 21:50:50 -05002716 .feature_table = features,
2717 .feature_table_size = ARRAY_SIZE(features),
Michael S. Tsirkinf3358502016-11-04 12:55:36 +02002718 .feature_table_legacy = features_legacy,
2719 .feature_table_size_legacy = ARRAY_SIZE(features_legacy),
Rusty Russell296f96f2007-10-22 11:03:37 +10002720 .driver.name = KBUILD_MODNAME,
2721 .driver.owner = THIS_MODULE,
2722 .id_table = id_table,
Michael S. Tsirkinfe36cbe2017-03-29 19:09:14 +03002723 .validate = virtnet_validate,
Rusty Russell296f96f2007-10-22 11:03:37 +10002724 .probe = virtnet_probe,
Bill Pemberton8cc085d2012-12-03 09:24:15 -05002725 .remove = virtnet_remove,
Mark McLoughlin9f4d26d2009-01-19 17:09:49 -08002726 .config_changed = virtnet_config_changed,
Aaron Lu89107002013-09-17 09:25:23 +09302727#ifdef CONFIG_PM_SLEEP
Amit Shah0741bcb2011-12-22 16:58:33 +05302728 .freeze = virtnet_freeze,
2729 .restore = virtnet_restore,
2730#endif
Rusty Russell296f96f2007-10-22 11:03:37 +10002731};
2732
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02002733static __init int virtio_net_driver_init(void)
2734{
2735 int ret;
2736
Thomas Gleixner73c1b412016-12-21 20:19:54 +01002737 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "virtio/net:online",
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02002738 virtnet_cpu_online,
2739 virtnet_cpu_down_prep);
2740 if (ret < 0)
2741 goto out;
2742 virtionet_online = ret;
Thomas Gleixner73c1b412016-12-21 20:19:54 +01002743 ret = cpuhp_setup_state_multi(CPUHP_VIRT_NET_DEAD, "virtio/net:dead",
Sebastian Andrzej Siewior8017c272016-08-12 19:49:43 +02002744 NULL, virtnet_cpu_dead);
2745 if (ret)
2746 goto err_dead;
2747
2748 ret = register_virtio_driver(&virtio_net_driver);
2749 if (ret)
2750 goto err_virtio;
2751 return 0;
2752err_virtio:
2753 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
2754err_dead:
2755 cpuhp_remove_multi_state(virtionet_online);
2756out:
2757 return ret;
2758}
2759module_init(virtio_net_driver_init);
2760
2761static __exit void virtio_net_driver_exit(void)
2762{
2763 cpuhp_remove_multi_state(CPUHP_VIRT_NET_DEAD);
2764 cpuhp_remove_multi_state(virtionet_online);
2765 unregister_virtio_driver(&virtio_net_driver);
2766}
2767module_exit(virtio_net_driver_exit);
Rusty Russell296f96f2007-10-22 11:03:37 +10002768
2769MODULE_DEVICE_TABLE(virtio, id_table);
2770MODULE_DESCRIPTION("Virtio network driver");
2771MODULE_LICENSE("GPL");