blob: bf2c97b87992963b6ab025f4447e64d95789b01f [file] [log] [blame]
Björn Töpelc0c77d82018-05-02 13:01:23 +02001// SPDX-License-Identifier: GPL-2.0
2/* XDP sockets
3 *
4 * AF_XDP sockets allows a channel between XDP programs and userspace
5 * applications.
6 * Copyright(c) 2018 Intel Corporation.
7 *
8 * This program is free software; you can redistribute it and/or modify it
9 * under the terms and conditions of the GNU General Public License,
10 * version 2, as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * more details.
16 *
17 * Author(s): Björn Töpel <bjorn.topel@intel.com>
18 * Magnus Karlsson <magnus.karlsson@intel.com>
19 */
20
21#define pr_fmt(fmt) "AF_XDP: %s: " fmt, __func__
22
23#include <linux/if_xdp.h>
24#include <linux/init.h>
25#include <linux/sched/mm.h>
26#include <linux/sched/signal.h>
27#include <linux/sched/task.h>
28#include <linux/socket.h>
29#include <linux/file.h>
30#include <linux/uaccess.h>
31#include <linux/net.h>
32#include <linux/netdevice.h>
33#include <net/xdp_sock.h>
Björn Töpelb9b6b682018-05-02 13:01:25 +020034#include <net/xdp.h>
Björn Töpelc0c77d82018-05-02 13:01:23 +020035
Magnus Karlsson423f3832018-05-02 13:01:24 +020036#include "xsk_queue.h"
Björn Töpelc0c77d82018-05-02 13:01:23 +020037#include "xdp_umem.h"
38
39static struct xdp_sock *xdp_sk(struct sock *sk)
40{
41 return (struct xdp_sock *)sk;
42}
43
Björn Töpelb9b6b682018-05-02 13:01:25 +020044static int xsk_init_queue(u32 entries, struct xsk_queue **queue,
45 bool umem_queue)
Magnus Karlsson423f3832018-05-02 13:01:24 +020046{
47 struct xsk_queue *q;
48
49 if (entries == 0 || *queue || !is_power_of_2(entries))
50 return -EINVAL;
51
Björn Töpelb9b6b682018-05-02 13:01:25 +020052 q = xskq_create(entries, umem_queue);
Magnus Karlsson423f3832018-05-02 13:01:24 +020053 if (!q)
54 return -ENOMEM;
55
56 *queue = q;
57 return 0;
58}
59
Magnus Karlsson965a9902018-05-02 13:01:26 +020060static void __xsk_release(struct xdp_sock *xs)
61{
62 /* Wait for driver to stop using the xdp socket. */
63 synchronize_net();
64
65 dev_put(xs->dev);
66}
67
Björn Töpelc0c77d82018-05-02 13:01:23 +020068static int xsk_release(struct socket *sock)
69{
70 struct sock *sk = sock->sk;
Magnus Karlsson965a9902018-05-02 13:01:26 +020071 struct xdp_sock *xs = xdp_sk(sk);
Björn Töpelc0c77d82018-05-02 13:01:23 +020072 struct net *net;
73
74 if (!sk)
75 return 0;
76
77 net = sock_net(sk);
78
79 local_bh_disable();
80 sock_prot_inuse_add(net, sk->sk_prot, -1);
81 local_bh_enable();
82
Magnus Karlsson965a9902018-05-02 13:01:26 +020083 if (xs->dev) {
84 __xsk_release(xs);
85 xs->dev = NULL;
86 }
87
Björn Töpelc0c77d82018-05-02 13:01:23 +020088 sock_orphan(sk);
89 sock->sk = NULL;
90
91 sk_refcnt_debug_release(sk);
92 sock_put(sk);
93
94 return 0;
95}
96
Magnus Karlsson965a9902018-05-02 13:01:26 +020097static struct socket *xsk_lookup_xsk_from_fd(int fd)
98{
99 struct socket *sock;
100 int err;
101
102 sock = sockfd_lookup(fd, &err);
103 if (!sock)
104 return ERR_PTR(-ENOTSOCK);
105
106 if (sock->sk->sk_family != PF_XDP) {
107 sockfd_put(sock);
108 return ERR_PTR(-ENOPROTOOPT);
109 }
110
111 return sock;
112}
113
114static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
115{
116 struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
117 struct sock *sk = sock->sk;
118 struct net_device *dev, *dev_curr;
119 struct xdp_sock *xs = xdp_sk(sk);
120 struct xdp_umem *old_umem = NULL;
121 int err = 0;
122
123 if (addr_len < sizeof(struct sockaddr_xdp))
124 return -EINVAL;
125 if (sxdp->sxdp_family != AF_XDP)
126 return -EINVAL;
127
128 mutex_lock(&xs->mutex);
129 dev_curr = xs->dev;
130 dev = dev_get_by_index(sock_net(sk), sxdp->sxdp_ifindex);
131 if (!dev) {
132 err = -ENODEV;
133 goto out_release;
134 }
135
136 if (!xs->rx) {
137 err = -EINVAL;
138 goto out_unlock;
139 }
140
141 if (sxdp->sxdp_queue_id >= dev->num_rx_queues) {
142 err = -EINVAL;
143 goto out_unlock;
144 }
145
146 if (sxdp->sxdp_flags & XDP_SHARED_UMEM) {
147 struct xdp_sock *umem_xs;
148 struct socket *sock;
149
150 if (xs->umem) {
151 /* We have already our own. */
152 err = -EINVAL;
153 goto out_unlock;
154 }
155
156 sock = xsk_lookup_xsk_from_fd(sxdp->sxdp_shared_umem_fd);
157 if (IS_ERR(sock)) {
158 err = PTR_ERR(sock);
159 goto out_unlock;
160 }
161
162 umem_xs = xdp_sk(sock->sk);
163 if (!umem_xs->umem) {
164 /* No umem to inherit. */
165 err = -EBADF;
166 sockfd_put(sock);
167 goto out_unlock;
168 } else if (umem_xs->dev != dev ||
169 umem_xs->queue_id != sxdp->sxdp_queue_id) {
170 err = -EINVAL;
171 sockfd_put(sock);
172 goto out_unlock;
173 }
174
175 xdp_get_umem(umem_xs->umem);
176 old_umem = xs->umem;
177 xs->umem = umem_xs->umem;
178 sockfd_put(sock);
179 } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) {
180 err = -EINVAL;
181 goto out_unlock;
182 }
183
184 /* Rebind? */
185 if (dev_curr && (dev_curr != dev ||
186 xs->queue_id != sxdp->sxdp_queue_id)) {
187 __xsk_release(xs);
188 if (old_umem)
189 xdp_put_umem(old_umem);
190 }
191
192 xs->dev = dev;
193 xs->queue_id = sxdp->sxdp_queue_id;
194
195 xskq_set_umem(xs->rx, &xs->umem->props);
196
197out_unlock:
198 if (err)
199 dev_put(dev);
200out_release:
201 mutex_unlock(&xs->mutex);
202 return err;
203}
204
Björn Töpelc0c77d82018-05-02 13:01:23 +0200205static int xsk_setsockopt(struct socket *sock, int level, int optname,
206 char __user *optval, unsigned int optlen)
207{
208 struct sock *sk = sock->sk;
209 struct xdp_sock *xs = xdp_sk(sk);
210 int err;
211
212 if (level != SOL_XDP)
213 return -ENOPROTOOPT;
214
215 switch (optname) {
Björn Töpelb9b6b682018-05-02 13:01:25 +0200216 case XDP_RX_RING:
217 {
218 struct xsk_queue **q;
219 int entries;
220
221 if (optlen < sizeof(entries))
222 return -EINVAL;
223 if (copy_from_user(&entries, optval, sizeof(entries)))
224 return -EFAULT;
225
226 mutex_lock(&xs->mutex);
227 q = &xs->rx;
228 err = xsk_init_queue(entries, q, false);
229 mutex_unlock(&xs->mutex);
230 return err;
231 }
Björn Töpelc0c77d82018-05-02 13:01:23 +0200232 case XDP_UMEM_REG:
233 {
234 struct xdp_umem_reg mr;
235 struct xdp_umem *umem;
236
237 if (xs->umem)
238 return -EBUSY;
239
240 if (copy_from_user(&mr, optval, sizeof(mr)))
241 return -EFAULT;
242
243 mutex_lock(&xs->mutex);
244 err = xdp_umem_create(&umem);
245
246 err = xdp_umem_reg(umem, &mr);
247 if (err) {
248 kfree(umem);
249 mutex_unlock(&xs->mutex);
250 return err;
251 }
252
253 /* Make sure umem is ready before it can be seen by others */
254 smp_wmb();
255
256 xs->umem = umem;
257 mutex_unlock(&xs->mutex);
258 return 0;
259 }
Magnus Karlsson423f3832018-05-02 13:01:24 +0200260 case XDP_UMEM_FILL_RING:
261 {
262 struct xsk_queue **q;
263 int entries;
264
265 if (!xs->umem)
266 return -EINVAL;
267
268 if (copy_from_user(&entries, optval, sizeof(entries)))
269 return -EFAULT;
270
271 mutex_lock(&xs->mutex);
272 q = &xs->umem->fq;
Björn Töpelb9b6b682018-05-02 13:01:25 +0200273 err = xsk_init_queue(entries, q, true);
Magnus Karlsson423f3832018-05-02 13:01:24 +0200274 mutex_unlock(&xs->mutex);
275 return err;
276 }
Björn Töpelc0c77d82018-05-02 13:01:23 +0200277 default:
278 break;
279 }
280
281 return -ENOPROTOOPT;
282}
283
Magnus Karlsson423f3832018-05-02 13:01:24 +0200284static int xsk_mmap(struct file *file, struct socket *sock,
285 struct vm_area_struct *vma)
286{
287 unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
288 unsigned long size = vma->vm_end - vma->vm_start;
289 struct xdp_sock *xs = xdp_sk(sock->sk);
290 struct xsk_queue *q = NULL;
291 unsigned long pfn;
292 struct page *qpg;
293
Björn Töpelb9b6b682018-05-02 13:01:25 +0200294 if (offset == XDP_PGOFF_RX_RING) {
295 q = xs->rx;
296 } else {
297 if (!xs->umem)
298 return -EINVAL;
Magnus Karlsson423f3832018-05-02 13:01:24 +0200299
Björn Töpelb9b6b682018-05-02 13:01:25 +0200300 if (offset == XDP_UMEM_PGOFF_FILL_RING)
301 q = xs->umem->fq;
302 else
303 return -EINVAL;
304 }
Magnus Karlsson423f3832018-05-02 13:01:24 +0200305
306 if (!q)
307 return -EINVAL;
308
309 qpg = virt_to_head_page(q->ring);
310 if (size > (PAGE_SIZE << compound_order(qpg)))
311 return -EINVAL;
312
313 pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
314 return remap_pfn_range(vma, vma->vm_start, pfn,
315 size, vma->vm_page_prot);
316}
317
Björn Töpelc0c77d82018-05-02 13:01:23 +0200318static struct proto xsk_proto = {
319 .name = "XDP",
320 .owner = THIS_MODULE,
321 .obj_size = sizeof(struct xdp_sock),
322};
323
324static const struct proto_ops xsk_proto_ops = {
325 .family = PF_XDP,
326 .owner = THIS_MODULE,
327 .release = xsk_release,
Magnus Karlsson965a9902018-05-02 13:01:26 +0200328 .bind = xsk_bind,
Björn Töpelc0c77d82018-05-02 13:01:23 +0200329 .connect = sock_no_connect,
330 .socketpair = sock_no_socketpair,
331 .accept = sock_no_accept,
332 .getname = sock_no_getname,
333 .poll = sock_no_poll,
334 .ioctl = sock_no_ioctl,
335 .listen = sock_no_listen,
336 .shutdown = sock_no_shutdown,
337 .setsockopt = xsk_setsockopt,
338 .getsockopt = sock_no_getsockopt,
339 .sendmsg = sock_no_sendmsg,
340 .recvmsg = sock_no_recvmsg,
Magnus Karlsson423f3832018-05-02 13:01:24 +0200341 .mmap = xsk_mmap,
Björn Töpelc0c77d82018-05-02 13:01:23 +0200342 .sendpage = sock_no_sendpage,
343};
344
345static void xsk_destruct(struct sock *sk)
346{
347 struct xdp_sock *xs = xdp_sk(sk);
348
349 if (!sock_flag(sk, SOCK_DEAD))
350 return;
351
Björn Töpelb9b6b682018-05-02 13:01:25 +0200352 xskq_destroy(xs->rx);
Björn Töpelc0c77d82018-05-02 13:01:23 +0200353 xdp_put_umem(xs->umem);
354
355 sk_refcnt_debug_dec(sk);
356}
357
358static int xsk_create(struct net *net, struct socket *sock, int protocol,
359 int kern)
360{
361 struct sock *sk;
362 struct xdp_sock *xs;
363
364 if (!ns_capable(net->user_ns, CAP_NET_RAW))
365 return -EPERM;
366 if (sock->type != SOCK_RAW)
367 return -ESOCKTNOSUPPORT;
368
369 if (protocol)
370 return -EPROTONOSUPPORT;
371
372 sock->state = SS_UNCONNECTED;
373
374 sk = sk_alloc(net, PF_XDP, GFP_KERNEL, &xsk_proto, kern);
375 if (!sk)
376 return -ENOBUFS;
377
378 sock->ops = &xsk_proto_ops;
379
380 sock_init_data(sock, sk);
381
382 sk->sk_family = PF_XDP;
383
384 sk->sk_destruct = xsk_destruct;
385 sk_refcnt_debug_inc(sk);
386
387 xs = xdp_sk(sk);
388 mutex_init(&xs->mutex);
389
390 local_bh_disable();
391 sock_prot_inuse_add(net, &xsk_proto, 1);
392 local_bh_enable();
393
394 return 0;
395}
396
397static const struct net_proto_family xsk_family_ops = {
398 .family = PF_XDP,
399 .create = xsk_create,
400 .owner = THIS_MODULE,
401};
402
403static int __init xsk_init(void)
404{
405 int err;
406
407 err = proto_register(&xsk_proto, 0 /* no slab */);
408 if (err)
409 goto out;
410
411 err = sock_register(&xsk_family_ops);
412 if (err)
413 goto out_proto;
414
415 return 0;
416
417out_proto:
418 proto_unregister(&xsk_proto);
419out:
420 return err;
421}
422
423fs_initcall(xsk_init);