blob: 72fb55ca27f32aec1dcb0b7378c6209a655ae190 [file] [log] [blame]
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -08001/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License as
5 * published by the Free Software Foundation; either version 2 of
6 * the License, or (at your option) any later version.
7 *
8 */
9
10#include "ipvlan.h"
11
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070012static u32 ipvl_nf_hook_refcnt = 0;
13
14static struct nf_hook_ops ipvl_nfops[] __read_mostly = {
15 {
16 .hook = ipvlan_nf_input,
17 .pf = NFPROTO_IPV4,
18 .hooknum = NF_INET_LOCAL_IN,
19 .priority = INT_MAX,
20 },
21 {
22 .hook = ipvlan_nf_input,
23 .pf = NFPROTO_IPV6,
24 .hooknum = NF_INET_LOCAL_IN,
25 .priority = INT_MAX,
26 },
27};
28
29static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = {
30 .l3mdev_l3_rcv = ipvlan_l3_rcv,
31};
32
Mahesh Bandewarab5b7012016-02-20 19:31:41 -080033static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -080034{
35 ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
36}
37
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070038static int ipvlan_register_nf_hook(void)
39{
40 int err = 0;
41
42 if (!ipvl_nf_hook_refcnt) {
43 err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
44 if (!err)
45 ipvl_nf_hook_refcnt = 1;
46 } else {
47 ipvl_nf_hook_refcnt++;
48 }
49
50 return err;
51}
52
53static void ipvlan_unregister_nf_hook(void)
54{
55 WARN_ON(!ipvl_nf_hook_refcnt);
56
57 ipvl_nf_hook_refcnt--;
58 if (!ipvl_nf_hook_refcnt)
59 _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops));
60}
61
62static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval)
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -080063{
64 struct ipvl_dev *ipvlan;
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070065 struct net_device *mdev = port->dev;
Hangbin Liu377c72c2018-07-01 16:21:21 +080066 unsigned int flags;
67 int err;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -080068
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070069 ASSERT_RTNL();
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -080070 if (port->mode != nval) {
Hangbin Liu377c72c2018-07-01 16:21:21 +080071 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
72 flags = ipvlan->dev->flags;
73 if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) {
74 err = dev_change_flags(ipvlan->dev,
75 flags | IFF_NOARP);
76 } else {
77 err = dev_change_flags(ipvlan->dev,
78 flags & ~IFF_NOARP);
79 }
80 if (unlikely(err))
81 goto fail;
82 }
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070083 if (nval == IPVLAN_MODE_L3S) {
84 /* New mode is L3S */
85 err = ipvlan_register_nf_hook();
86 if (!err) {
87 mdev->l3mdev_ops = &ipvl_l3mdev_ops;
Daniel Borkmannc574feb2019-01-30 12:49:48 +010088 mdev->priv_flags |= IFF_L3MDEV_RX_HANDLER;
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070089 } else
Hangbin Liu377c72c2018-07-01 16:21:21 +080090 goto fail;
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070091 } else if (port->mode == IPVLAN_MODE_L3S) {
92 /* Old mode was L3S */
Daniel Borkmannc574feb2019-01-30 12:49:48 +010093 mdev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -070094 ipvlan_unregister_nf_hook();
95 mdev->l3mdev_ops = NULL;
96 }
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -080097 port->mode = nval;
98 }
Hangbin Liu377c72c2018-07-01 16:21:21 +080099 return 0;
100
101fail:
102 /* Undo the flags changes that have been done so far. */
103 list_for_each_entry_continue_reverse(ipvlan, &port->ipvlans, pnode) {
104 flags = ipvlan->dev->flags;
105 if (port->mode == IPVLAN_MODE_L3 ||
106 port->mode == IPVLAN_MODE_L3S)
107 dev_change_flags(ipvlan->dev, flags | IFF_NOARP);
108 else
109 dev_change_flags(ipvlan->dev, flags & ~IFF_NOARP);
110 }
111
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700112 return err;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800113}
114
115static int ipvlan_port_create(struct net_device *dev)
116{
117 struct ipvl_port *port;
118 int err, idx;
119
120 if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) {
121 netdev_err(dev, "Master is either lo or non-ether device\n");
122 return -EINVAL;
123 }
Mahesh Bandewar764e4332014-12-06 15:53:19 -0800124
125 if (netif_is_macvlan_port(dev)) {
126 netdev_err(dev, "Master is a macvlan port.\n");
127 return -EBUSY;
128 }
129
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800130 port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
131 if (!port)
132 return -ENOMEM;
133
134 port->dev = dev;
135 port->mode = IPVLAN_MODE_L3;
136 INIT_LIST_HEAD(&port->ipvlans);
137 for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
138 INIT_HLIST_HEAD(&port->hlhead[idx]);
139
Mahesh Bandewarba35f852015-05-04 17:06:03 -0700140 skb_queue_head_init(&port->backlog);
141 INIT_WORK(&port->wq, ipvlan_process_multicast);
142
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800143 err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
144 if (err)
145 goto err;
146
147 dev->priv_flags |= IFF_IPVLAN_MASTER;
148 return 0;
149
150err:
151 kfree_rcu(port, rcu);
152 return err;
153}
154
155static void ipvlan_port_destroy(struct net_device *dev)
156{
157 struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
158
159 dev->priv_flags &= ~IFF_IPVLAN_MASTER;
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700160 if (port->mode == IPVLAN_MODE_L3S) {
Daniel Borkmannc574feb2019-01-30 12:49:48 +0100161 dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER;
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700162 ipvlan_unregister_nf_hook();
163 dev->l3mdev_ops = NULL;
164 }
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800165 netdev_rx_handler_unregister(dev);
Mahesh Bandewarba35f852015-05-04 17:06:03 -0700166 cancel_work_sync(&port->wq);
167 __skb_queue_purge(&port->backlog);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800168 kfree_rcu(port, rcu);
169}
170
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800171#define IPVLAN_FEATURES \
Tom Herberta1882222015-12-14 11:19:43 -0800172 (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800173 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
174 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
175 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
176
177#define IPVLAN_STATE_MASK \
178 ((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
179
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800180static int ipvlan_init(struct net_device *dev)
181{
182 struct ipvl_dev *ipvlan = netdev_priv(dev);
183 const struct net_device *phy_dev = ipvlan->phy_dev;
Mahesh Bandewar494e8482016-04-27 14:59:27 -0700184 struct ipvl_port *port = ipvlan->port;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800185
186 dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
187 (phy_dev->state & IPVLAN_STATE_MASK);
188 dev->features = phy_dev->features & IPVLAN_FEATURES;
189 dev->features |= NETIF_F_LLTX;
190 dev->gso_max_size = phy_dev->gso_max_size;
Eric Dumazetf6773c52016-03-16 21:59:49 -0700191 dev->gso_max_segs = phy_dev->gso_max_segs;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800192 dev->hard_header_len = phy_dev->hard_header_len;
193
Eric Dumazet0d7dd792016-06-09 07:45:15 -0700194 netdev_lockdep_set_classes(dev);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800195
196 ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
197 if (!ipvlan->pcpu_stats)
198 return -ENOMEM;
199
Mahesh Bandewar494e8482016-04-27 14:59:27 -0700200 port->count += 1;
201
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800202 return 0;
203}
204
205static void ipvlan_uninit(struct net_device *dev)
206{
207 struct ipvl_dev *ipvlan = netdev_priv(dev);
208 struct ipvl_port *port = ipvlan->port;
209
Markus Elfring04901ce2014-11-29 16:23:20 +0100210 free_percpu(ipvlan->pcpu_stats);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800211
212 port->count -= 1;
213 if (!port->count)
214 ipvlan_port_destroy(port->dev);
215}
216
217static int ipvlan_open(struct net_device *dev)
218{
219 struct ipvl_dev *ipvlan = netdev_priv(dev);
220 struct net_device *phy_dev = ipvlan->phy_dev;
221 struct ipvl_addr *addr;
222
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700223 if (ipvlan->port->mode == IPVLAN_MODE_L3 ||
224 ipvlan->port->mode == IPVLAN_MODE_L3S)
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800225 dev->flags |= IFF_NOARP;
226 else
227 dev->flags &= ~IFF_NOARP;
228
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300229 list_for_each_entry(addr, &ipvlan->addrs, anode)
230 ipvlan_ht_addr_add(ipvlan, addr);
231
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800232 return dev_uc_add(phy_dev, phy_dev->dev_addr);
233}
234
235static int ipvlan_stop(struct net_device *dev)
236{
237 struct ipvl_dev *ipvlan = netdev_priv(dev);
238 struct net_device *phy_dev = ipvlan->phy_dev;
239 struct ipvl_addr *addr;
240
241 dev_uc_unsync(phy_dev, dev);
242 dev_mc_unsync(phy_dev, dev);
243
244 dev_uc_del(phy_dev, phy_dev->dev_addr);
245
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300246 list_for_each_entry(addr, &ipvlan->addrs, anode)
Konstantin Khlebnikov6640e672015-07-14 16:35:53 +0300247 ipvlan_ht_addr_del(addr);
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300248
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800249 return 0;
250}
251
Mahesh Bandewar92c7b0d2014-11-25 21:24:43 -0800252static netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb,
253 struct net_device *dev)
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800254{
255 const struct ipvl_dev *ipvlan = netdev_priv(dev);
256 int skblen = skb->len;
257 int ret;
258
259 ret = ipvlan_queue_xmit(skb, dev);
260 if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
261 struct ipvl_pcpu_stats *pcptr;
262
263 pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
264
265 u64_stats_update_begin(&pcptr->syncp);
266 pcptr->tx_pkts++;
267 pcptr->tx_bytes += skblen;
268 u64_stats_update_end(&pcptr->syncp);
269 } else {
270 this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
271 }
272 return ret;
273}
274
275static netdev_features_t ipvlan_fix_features(struct net_device *dev,
276 netdev_features_t features)
277{
278 struct ipvl_dev *ipvlan = netdev_priv(dev);
279
280 return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES);
281}
282
283static void ipvlan_change_rx_flags(struct net_device *dev, int change)
284{
285 struct ipvl_dev *ipvlan = netdev_priv(dev);
286 struct net_device *phy_dev = ipvlan->phy_dev;
287
288 if (change & IFF_ALLMULTI)
289 dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
290}
291
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800292static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
293{
294 struct ipvl_dev *ipvlan = netdev_priv(dev);
295
296 if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
297 bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
298 } else {
299 struct netdev_hw_addr *ha;
300 DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
301
302 bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
303 netdev_for_each_mc_addr(ha, dev)
304 __set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
305
Mahesh Bandewarf631c442015-05-04 17:06:11 -0700306 /* Turn-on broadcast bit irrespective of address family,
307 * since broadcast is deferred to a work-queue, hence no
308 * impact on fast-path processing.
309 */
310 __set_bit(ipvlan_mac_hash(dev->broadcast), mc_filters);
311
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800312 bitmap_copy(ipvlan->mac_filters, mc_filters,
313 IPVLAN_MAC_FILTER_SIZE);
314 }
315 dev_uc_sync(ipvlan->phy_dev, dev);
316 dev_mc_sync(ipvlan->phy_dev, dev);
317}
318
319static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev,
320 struct rtnl_link_stats64 *s)
321{
322 struct ipvl_dev *ipvlan = netdev_priv(dev);
323
324 if (ipvlan->pcpu_stats) {
325 struct ipvl_pcpu_stats *pcptr;
326 u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
327 u32 rx_errs = 0, tx_drps = 0;
328 u32 strt;
329 int idx;
330
331 for_each_possible_cpu(idx) {
332 pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
333 do {
334 strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
335 rx_pkts = pcptr->rx_pkts;
336 rx_bytes = pcptr->rx_bytes;
337 rx_mcast = pcptr->rx_mcast;
338 tx_pkts = pcptr->tx_pkts;
339 tx_bytes = pcptr->tx_bytes;
340 } while (u64_stats_fetch_retry_irq(&pcptr->syncp,
341 strt));
342
343 s->rx_packets += rx_pkts;
344 s->rx_bytes += rx_bytes;
345 s->multicast += rx_mcast;
346 s->tx_packets += tx_pkts;
347 s->tx_bytes += tx_bytes;
348
349 /* u32 values are updated without syncp protection. */
350 rx_errs += pcptr->rx_errs;
351 tx_drps += pcptr->tx_drps;
352 }
353 s->rx_errors = rx_errs;
354 s->rx_dropped = rx_errs;
355 s->tx_dropped = tx_drps;
356 }
357 return s;
358}
359
360static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
361{
362 struct ipvl_dev *ipvlan = netdev_priv(dev);
363 struct net_device *phy_dev = ipvlan->phy_dev;
364
365 return vlan_vid_add(phy_dev, proto, vid);
366}
367
368static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
369 u16 vid)
370{
371 struct ipvl_dev *ipvlan = netdev_priv(dev);
372 struct net_device *phy_dev = ipvlan->phy_dev;
373
374 vlan_vid_del(phy_dev, proto, vid);
375 return 0;
376}
377
Nicolas Dichtel7c411652015-04-02 17:07:06 +0200378static int ipvlan_get_iflink(const struct net_device *dev)
379{
380 struct ipvl_dev *ipvlan = netdev_priv(dev);
381
382 return ipvlan->phy_dev->ifindex;
383}
384
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800385static const struct net_device_ops ipvlan_netdev_ops = {
386 .ndo_init = ipvlan_init,
387 .ndo_uninit = ipvlan_uninit,
388 .ndo_open = ipvlan_open,
389 .ndo_stop = ipvlan_stop,
390 .ndo_start_xmit = ipvlan_start_xmit,
391 .ndo_fix_features = ipvlan_fix_features,
392 .ndo_change_rx_flags = ipvlan_change_rx_flags,
393 .ndo_set_rx_mode = ipvlan_set_multicast_mac_filter,
394 .ndo_get_stats64 = ipvlan_get_stats64,
395 .ndo_vlan_rx_add_vid = ipvlan_vlan_rx_add_vid,
396 .ndo_vlan_rx_kill_vid = ipvlan_vlan_rx_kill_vid,
Nicolas Dichtel7c411652015-04-02 17:07:06 +0200397 .ndo_get_iflink = ipvlan_get_iflink,
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800398};
399
400static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
401 unsigned short type, const void *daddr,
402 const void *saddr, unsigned len)
403{
404 const struct ipvl_dev *ipvlan = netdev_priv(dev);
405 struct net_device *phy_dev = ipvlan->phy_dev;
406
407 /* TODO Probably use a different field than dev_addr so that the
408 * mac-address on the virtual device is portable and can be carried
409 * while the packets use the mac-addr on the physical device.
410 */
411 return dev_hard_header(skb, phy_dev, type, daddr,
412 saddr ? : dev->dev_addr, len);
413}
414
415static const struct header_ops ipvlan_header_ops = {
416 .create = ipvlan_hard_header,
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800417 .parse = eth_header_parse,
418 .cache = eth_header_cache,
419 .cache_update = eth_header_cache_update,
420};
421
David Decotigny314d10d2016-02-24 10:58:03 -0800422static int ipvlan_ethtool_get_link_ksettings(struct net_device *dev,
423 struct ethtool_link_ksettings *cmd)
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800424{
425 const struct ipvl_dev *ipvlan = netdev_priv(dev);
426
David Decotigny314d10d2016-02-24 10:58:03 -0800427 return __ethtool_get_link_ksettings(ipvlan->phy_dev, cmd);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800428}
429
430static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
431 struct ethtool_drvinfo *drvinfo)
432{
433 strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
434 strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
435}
436
437static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
438{
439 const struct ipvl_dev *ipvlan = netdev_priv(dev);
440
441 return ipvlan->msg_enable;
442}
443
444static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
445{
446 struct ipvl_dev *ipvlan = netdev_priv(dev);
447
448 ipvlan->msg_enable = value;
449}
450
451static const struct ethtool_ops ipvlan_ethtool_ops = {
452 .get_link = ethtool_op_get_link,
David Decotigny314d10d2016-02-24 10:58:03 -0800453 .get_link_ksettings = ipvlan_ethtool_get_link_ksettings,
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800454 .get_drvinfo = ipvlan_ethtool_get_drvinfo,
455 .get_msglevel = ipvlan_ethtool_get_msglevel,
456 .set_msglevel = ipvlan_ethtool_set_msglevel,
457};
458
459static int ipvlan_nl_changelink(struct net_device *dev,
460 struct nlattr *tb[], struct nlattr *data[])
461{
462 struct ipvl_dev *ipvlan = netdev_priv(dev);
463 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700464 int err = 0;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800465
Daniel Borkmann510c6252019-02-20 00:15:30 +0100466 if (!data)
467 return 0;
468 if (!ns_capable(dev_net(ipvlan->phy_dev)->user_ns, CAP_NET_ADMIN))
469 return -EPERM;
470
471 if (data[IFLA_IPVLAN_MODE]) {
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800472 u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
473
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700474 err = ipvlan_set_port_mode(port, nmode);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800475 }
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700476 return err;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800477}
478
479static size_t ipvlan_nl_getsize(const struct net_device *dev)
480{
481 return (0
482 + nla_total_size(2) /* IFLA_IPVLAN_MODE */
483 );
484}
485
486static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[])
487{
488 if (data && data[IFLA_IPVLAN_MODE]) {
489 u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
490
491 if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX)
492 return -EINVAL;
493 }
494 return 0;
495}
496
497static int ipvlan_nl_fillinfo(struct sk_buff *skb,
498 const struct net_device *dev)
499{
500 struct ipvl_dev *ipvlan = netdev_priv(dev);
501 struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
502 int ret = -EINVAL;
503
504 if (!port)
505 goto err;
506
507 ret = -EMSGSIZE;
508 if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
509 goto err;
510
511 return 0;
512
513err:
514 return ret;
515}
516
517static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
518 struct nlattr *tb[], struct nlattr *data[])
519{
520 struct ipvl_dev *ipvlan = netdev_priv(dev);
521 struct ipvl_port *port;
522 struct net_device *phy_dev;
523 int err;
Mahesh Bandeware93fbc52016-02-20 19:31:36 -0800524 u16 mode = IPVLAN_MODE_L3;
Gao Feng147fd282016-11-24 23:39:59 +0800525 bool create = false;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800526
527 if (!tb[IFLA_LINK])
528 return -EINVAL;
529
530 phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
531 if (!phy_dev)
532 return -ENODEV;
533
Mahesh Bandewar5933fea2014-12-06 15:53:33 -0800534 if (netif_is_ipvlan(phy_dev)) {
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800535 struct ipvl_dev *tmp = netdev_priv(phy_dev);
536
537 phy_dev = tmp->phy_dev;
Daniel Borkmann510c6252019-02-20 00:15:30 +0100538 if (!ns_capable(dev_net(phy_dev)->user_ns, CAP_NET_ADMIN))
539 return -EPERM;
Mahesh Bandewar5933fea2014-12-06 15:53:33 -0800540 } else if (!netif_is_ipvlan_port(phy_dev)) {
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800541 err = ipvlan_port_create(phy_dev);
542 if (err < 0)
543 return err;
Gao Feng147fd282016-11-24 23:39:59 +0800544 create = true;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800545 }
546
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800547 if (data && data[IFLA_IPVLAN_MODE])
Mahesh Bandeware93fbc52016-02-20 19:31:36 -0800548 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800549
Mahesh Bandeware93fbc52016-02-20 19:31:36 -0800550 port = ipvlan_port_get_rtnl(phy_dev);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800551 ipvlan->phy_dev = phy_dev;
552 ipvlan->dev = dev;
553 ipvlan->port = port;
554 ipvlan->sfeatures = IPVLAN_FEATURES;
Xin Longd7adadb2018-06-21 12:56:04 +0800555 if (!tb[IFLA_MTU])
556 ipvlan_adjust_mtu(ipvlan, phy_dev);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800557 INIT_LIST_HEAD(&ipvlan->addrs);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800558
559 /* TODO Probably put random address here to be presented to the
560 * world but keep using the physical-dev address for the outgoing
561 * packets.
562 */
563 memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
564
565 dev->priv_flags |= IFF_IPVLAN_SLAVE;
566
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800567 err = register_netdevice(dev);
568 if (err < 0)
Gao Feng147fd282016-11-24 23:39:59 +0800569 goto destroy_ipvlan_port;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800570
571 err = netdev_upper_dev_link(phy_dev, dev);
Mahesh Bandewar494e8482016-04-27 14:59:27 -0700572 if (err) {
Gao Feng147fd282016-11-24 23:39:59 +0800573 goto unregister_netdev;
Mahesh Bandewar494e8482016-04-27 14:59:27 -0700574 }
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700575 err = ipvlan_set_port_mode(port, mode);
576 if (err) {
Gao Feng1a31cc82016-12-08 11:16:58 +0800577 goto unlink_netdev;
Mahesh Bandewar4fbae7d2016-09-16 12:59:19 -0700578 }
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800579
580 list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
581 netif_stacked_transfer_operstate(phy_dev, dev);
582 return 0;
Gao Feng147fd282016-11-24 23:39:59 +0800583
Gao Feng1a31cc82016-12-08 11:16:58 +0800584unlink_netdev:
585 netdev_upper_dev_unlink(phy_dev, dev);
Gao Feng147fd282016-11-24 23:39:59 +0800586unregister_netdev:
587 unregister_netdevice(dev);
588destroy_ipvlan_port:
589 if (create)
590 ipvlan_port_destroy(phy_dev);
591 return err;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800592}
593
594static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
595{
596 struct ipvl_dev *ipvlan = netdev_priv(dev);
597 struct ipvl_addr *addr, *next;
598
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300599 list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
Konstantin Khlebnikov6640e672015-07-14 16:35:53 +0300600 ipvlan_ht_addr_del(addr);
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300601 list_del(&addr->anode);
Konstantin Khlebnikov6a725492015-07-14 16:35:51 +0300602 kfree_rcu(addr, rcu);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800603 }
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300604
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800605 list_del_rcu(&ipvlan->pnode);
606 unregister_netdevice_queue(dev, head);
607 netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
608}
609
610static void ipvlan_link_setup(struct net_device *dev)
611{
612 ether_setup(dev);
613
614 dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
Phil Sutterbf485bc2015-08-18 10:30:40 +0200615 dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800616 dev->netdev_ops = &ipvlan_netdev_ops;
617 dev->destructor = free_netdev;
618 dev->header_ops = &ipvlan_header_ops;
619 dev->ethtool_ops = &ipvlan_ethtool_ops;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800620}
621
622static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
623{
624 [IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
625};
626
627static struct rtnl_link_ops ipvlan_link_ops = {
628 .kind = "ipvlan",
629 .priv_size = sizeof(struct ipvl_dev),
630
631 .get_size = ipvlan_nl_getsize,
632 .policy = ipvlan_nl_policy,
633 .validate = ipvlan_nl_validate,
634 .fill_info = ipvlan_nl_fillinfo,
635 .changelink = ipvlan_nl_changelink,
636 .maxtype = IFLA_IPVLAN_MAX,
637
638 .setup = ipvlan_link_setup,
639 .newlink = ipvlan_link_new,
640 .dellink = ipvlan_link_delete,
641};
642
Mahesh Bandewar92c7b0d2014-11-25 21:24:43 -0800643static int ipvlan_link_register(struct rtnl_link_ops *ops)
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800644{
645 return rtnl_link_register(ops);
646}
647
648static int ipvlan_device_event(struct notifier_block *unused,
649 unsigned long event, void *ptr)
650{
651 struct net_device *dev = netdev_notifier_info_to_dev(ptr);
652 struct ipvl_dev *ipvlan, *next;
653 struct ipvl_port *port;
654 LIST_HEAD(lst_kill);
655
Mahesh Bandewar5933fea2014-12-06 15:53:33 -0800656 if (!netif_is_ipvlan_port(dev))
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800657 return NOTIFY_DONE;
658
659 port = ipvlan_port_get_rtnl(dev);
660
661 switch (event) {
662 case NETDEV_CHANGE:
663 list_for_each_entry(ipvlan, &port->ipvlans, pnode)
664 netif_stacked_transfer_operstate(ipvlan->phy_dev,
665 ipvlan->dev);
666 break;
667
668 case NETDEV_UNREGISTER:
669 if (dev->reg_state != NETREG_UNREGISTERING)
670 break;
671
672 list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
673 pnode)
674 ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
675 &lst_kill);
676 unregister_netdevice_many(&lst_kill);
677 break;
678
679 case NETDEV_FEAT_CHANGE:
680 list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
681 ipvlan->dev->features = dev->features & IPVLAN_FEATURES;
682 ipvlan->dev->gso_max_size = dev->gso_max_size;
Eric Dumazetf6773c52016-03-16 21:59:49 -0700683 ipvlan->dev->gso_max_segs = dev->gso_max_segs;
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800684 netdev_features_change(ipvlan->dev);
685 }
686 break;
687
688 case NETDEV_CHANGEMTU:
689 list_for_each_entry(ipvlan, &port->ipvlans, pnode)
690 ipvlan_adjust_mtu(ipvlan, dev);
691 break;
692
693 case NETDEV_PRE_TYPE_CHANGE:
694 /* Forbid underlying device to change its type. */
695 return NOTIFY_BAD;
696 }
697 return NOTIFY_DONE;
698}
699
700static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
701{
702 struct ipvl_addr *addr;
703
Jiri Bence9997c22015-03-28 19:13:25 +0100704 if (ipvlan_addr_busy(ipvlan->port, ip6_addr, true)) {
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800705 netif_err(ipvlan, ifup, ipvlan->dev,
706 "Failed to add IPv6=%pI6c addr for %s intf\n",
707 ip6_addr, ipvlan->dev->name);
708 return -EINVAL;
709 }
710 addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
711 if (!addr)
712 return -ENOMEM;
713
714 addr->master = ipvlan;
715 memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr));
716 addr->atype = IPVL_IPV6;
Jiri Benc40891e82015-03-28 19:13:24 +0100717 list_add_tail(&addr->anode, &ipvlan->addrs);
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300718
Jiri Benc27705f72015-03-28 19:13:22 +0100719 /* If the interface is not up, the address will be added to the hash
720 * list by ipvlan_open.
721 */
722 if (netif_running(ipvlan->dev))
723 ipvlan_ht_addr_add(ipvlan, addr);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800724
725 return 0;
726}
727
728static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
729{
730 struct ipvl_addr *addr;
731
Jiri Bence9997c22015-03-28 19:13:25 +0100732 addr = ipvlan_find_addr(ipvlan, ip6_addr, true);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800733 if (!addr)
734 return;
735
Konstantin Khlebnikov6640e672015-07-14 16:35:53 +0300736 ipvlan_ht_addr_del(addr);
Jiri Benc40891e82015-03-28 19:13:24 +0100737 list_del(&addr->anode);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800738 kfree_rcu(addr, rcu);
739
740 return;
741}
742
743static int ipvlan_addr6_event(struct notifier_block *unused,
744 unsigned long event, void *ptr)
745{
746 struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
747 struct net_device *dev = (struct net_device *)if6->idev->dev;
748 struct ipvl_dev *ipvlan = netdev_priv(dev);
749
Konstantin Khlebnikov23a5a492015-07-14 16:35:55 +0300750 /* FIXME IPv6 autoconf calls us from bh without RTNL */
751 if (in_softirq())
752 return NOTIFY_DONE;
753
Mahesh Bandewar5933fea2014-12-06 15:53:33 -0800754 if (!netif_is_ipvlan(dev))
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800755 return NOTIFY_DONE;
756
757 if (!ipvlan || !ipvlan->port)
758 return NOTIFY_DONE;
759
760 switch (event) {
761 case NETDEV_UP:
762 if (ipvlan_add_addr6(ipvlan, &if6->addr))
763 return NOTIFY_BAD;
764 break;
765
766 case NETDEV_DOWN:
767 ipvlan_del_addr6(ipvlan, &if6->addr);
768 break;
769 }
770
771 return NOTIFY_OK;
772}
773
774static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
775{
776 struct ipvl_addr *addr;
777
Jiri Bence9997c22015-03-28 19:13:25 +0100778 if (ipvlan_addr_busy(ipvlan->port, ip4_addr, false)) {
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800779 netif_err(ipvlan, ifup, ipvlan->dev,
780 "Failed to add IPv4=%pI4 on %s intf.\n",
781 ip4_addr, ipvlan->dev->name);
782 return -EINVAL;
783 }
784 addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL);
785 if (!addr)
786 return -ENOMEM;
787
788 addr->master = ipvlan;
789 memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr));
790 addr->atype = IPVL_IPV4;
Jiri Benc40891e82015-03-28 19:13:24 +0100791 list_add_tail(&addr->anode, &ipvlan->addrs);
Konstantin Khlebnikov515866f2015-07-14 16:35:50 +0300792
Jiri Benc27705f72015-03-28 19:13:22 +0100793 /* If the interface is not up, the address will be added to the hash
794 * list by ipvlan_open.
795 */
796 if (netif_running(ipvlan->dev))
797 ipvlan_ht_addr_add(ipvlan, addr);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800798
799 return 0;
800}
801
802static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
803{
804 struct ipvl_addr *addr;
805
Jiri Bence9997c22015-03-28 19:13:25 +0100806 addr = ipvlan_find_addr(ipvlan, ip4_addr, false);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800807 if (!addr)
808 return;
809
Konstantin Khlebnikov6640e672015-07-14 16:35:53 +0300810 ipvlan_ht_addr_del(addr);
Jiri Benc40891e82015-03-28 19:13:24 +0100811 list_del(&addr->anode);
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800812 kfree_rcu(addr, rcu);
813
814 return;
815}
816
817static int ipvlan_addr4_event(struct notifier_block *unused,
818 unsigned long event, void *ptr)
819{
820 struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
821 struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
822 struct ipvl_dev *ipvlan = netdev_priv(dev);
823 struct in_addr ip4_addr;
824
Mahesh Bandewar5933fea2014-12-06 15:53:33 -0800825 if (!netif_is_ipvlan(dev))
Mahesh Bandewar2ad7bf32014-11-23 23:07:46 -0800826 return NOTIFY_DONE;
827
828 if (!ipvlan || !ipvlan->port)
829 return NOTIFY_DONE;
830
831 switch (event) {
832 case NETDEV_UP:
833 ip4_addr.s_addr = if4->ifa_address;
834 if (ipvlan_add_addr4(ipvlan, &ip4_addr))
835 return NOTIFY_BAD;
836 break;
837
838 case NETDEV_DOWN:
839 ip4_addr.s_addr = if4->ifa_address;
840 ipvlan_del_addr4(ipvlan, &ip4_addr);
841 break;
842 }
843
844 return NOTIFY_OK;
845}
846
847static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
848 .notifier_call = ipvlan_addr4_event,
849};
850
851static struct notifier_block ipvlan_notifier_block __read_mostly = {
852 .notifier_call = ipvlan_device_event,
853};
854
855static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
856 .notifier_call = ipvlan_addr6_event,
857};
858
859static int __init ipvlan_init_module(void)
860{
861 int err;
862
863 ipvlan_init_secret();
864 register_netdevice_notifier(&ipvlan_notifier_block);
865 register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
866 register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
867
868 err = ipvlan_link_register(&ipvlan_link_ops);
869 if (err < 0)
870 goto error;
871
872 return 0;
873error:
874 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
875 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
876 unregister_netdevice_notifier(&ipvlan_notifier_block);
877 return err;
878}
879
880static void __exit ipvlan_cleanup_module(void)
881{
882 rtnl_link_unregister(&ipvlan_link_ops);
883 unregister_netdevice_notifier(&ipvlan_notifier_block);
884 unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
885 unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
886}
887
888module_init(ipvlan_init_module);
889module_exit(ipvlan_cleanup_module);
890
891MODULE_LICENSE("GPL");
892MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
893MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
894MODULE_ALIAS_RTNL_LINK("ipvlan");