blob: 09fbc4bc7088fa2437a4727f9a3046fbc4045185 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NETLINK Kernel-user communication protocol.
3 *
4 * Authors: Alan Cox <alan@redhat.com>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 *
12 * Tue Jun 26 14:36:48 MEST 2001 Herbert "herp" Rosmanith
13 * added netlink_proto_exit
14 * Tue Jan 22 18:32:44 BRST 2002 Arnaldo C. de Melo <acme@conectiva.com.br>
15 * use nlk_sk, as sk->protinfo is on a diet 8)
Harald Welte4fdb3bb2005-08-09 19:40:55 -070016 * Fri Jul 22 19:51:12 MEST 2005 Harald Welte <laforge@gnumonks.org>
17 * - inc module use count of module that owns
18 * the kernel socket in case userspace opens
19 * socket of same protocol
20 * - remove all module support, since netlink is
21 * mandatory if CONFIG_NET=y these days
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 */
23
24#include <linux/config.h>
25#include <linux/module.h>
26
Randy Dunlap4fc268d2006-01-11 12:17:47 -080027#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <linux/kernel.h>
29#include <linux/init.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/signal.h>
31#include <linux/sched.h>
32#include <linux/errno.h>
33#include <linux/string.h>
34#include <linux/stat.h>
35#include <linux/socket.h>
36#include <linux/un.h>
37#include <linux/fcntl.h>
38#include <linux/termios.h>
39#include <linux/sockios.h>
40#include <linux/net.h>
41#include <linux/fs.h>
42#include <linux/slab.h>
43#include <asm/uaccess.h>
44#include <linux/skbuff.h>
45#include <linux/netdevice.h>
46#include <linux/rtnetlink.h>
47#include <linux/proc_fs.h>
48#include <linux/seq_file.h>
49#include <linux/smp_lock.h>
50#include <linux/notifier.h>
51#include <linux/security.h>
52#include <linux/jhash.h>
53#include <linux/jiffies.h>
54#include <linux/random.h>
55#include <linux/bitops.h>
56#include <linux/mm.h>
57#include <linux/types.h>
Andrew Morton54e0f522005-04-30 07:07:04 +010058#include <linux/audit.h>
Steve Grubbe7c34972006-04-03 09:08:13 -040059#include <linux/selinux.h>
Andrew Morton54e0f522005-04-30 07:07:04 +010060
Linus Torvalds1da177e2005-04-16 15:20:36 -070061#include <net/sock.h>
62#include <net/scm.h>
Thomas Graf82ace472005-11-10 02:25:53 +010063#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064
65#define Nprintk(a...)
Patrick McHardyf7fa9b12005-08-15 12:29:13 -070066#define NLGRPSZ(x) (ALIGN(x, sizeof(unsigned long) * 8) / 8)
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68struct netlink_sock {
69 /* struct sock has to be the first member of netlink_sock */
70 struct sock sk;
71 u32 pid;
Linus Torvalds1da177e2005-04-16 15:20:36 -070072 u32 dst_pid;
Patrick McHardyd629b832005-08-14 19:27:50 -070073 u32 dst_group;
Patrick McHardyf7fa9b12005-08-15 12:29:13 -070074 u32 flags;
75 u32 subscriptions;
76 u32 ngroups;
77 unsigned long *groups;
Linus Torvalds1da177e2005-04-16 15:20:36 -070078 unsigned long state;
79 wait_queue_head_t wait;
80 struct netlink_callback *cb;
81 spinlock_t cb_lock;
82 void (*data_ready)(struct sock *sk, int bytes);
Patrick McHardy77247bb2005-08-14 19:27:13 -070083 struct module *module;
Linus Torvalds1da177e2005-04-16 15:20:36 -070084};
85
Patrick McHardy77247bb2005-08-14 19:27:13 -070086#define NETLINK_KERNEL_SOCKET 0x1
Patrick McHardy9a4595b2005-08-15 12:32:15 -070087#define NETLINK_RECV_PKTINFO 0x2
Patrick McHardy77247bb2005-08-14 19:27:13 -070088
Linus Torvalds1da177e2005-04-16 15:20:36 -070089static inline struct netlink_sock *nlk_sk(struct sock *sk)
90{
91 return (struct netlink_sock *)sk;
92}
93
94struct nl_pid_hash {
95 struct hlist_head *table;
96 unsigned long rehash_time;
97
98 unsigned int mask;
99 unsigned int shift;
100
101 unsigned int entries;
102 unsigned int max_shift;
103
104 u32 rnd;
105};
106
107struct netlink_table {
108 struct nl_pid_hash hash;
109 struct hlist_head mc_list;
Patrick McHardy4277a082006-03-20 18:52:01 -0800110 unsigned long *listeners;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111 unsigned int nl_nonroot;
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700112 unsigned int groups;
Patrick McHardy77247bb2005-08-14 19:27:13 -0700113 struct module *module;
Patrick McHardyab33a172005-08-14 19:31:36 -0700114 int registered;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115};
116
117static struct netlink_table *nl_table;
118
119static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
120
121static int netlink_dump(struct sock *sk);
122static void netlink_destroy_callback(struct netlink_callback *cb);
123
124static DEFINE_RWLOCK(nl_table_lock);
125static atomic_t nl_table_users = ATOMIC_INIT(0);
126
Alan Sterne041c682006-03-27 01:16:30 -0800127static ATOMIC_NOTIFIER_HEAD(netlink_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
Patrick McHardyd629b832005-08-14 19:27:50 -0700129static u32 netlink_group_mask(u32 group)
130{
131 return group ? 1 << (group - 1) : 0;
132}
133
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134static struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid)
135{
136 return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask];
137}
138
139static void netlink_sock_destruct(struct sock *sk)
140{
141 skb_queue_purge(&sk->sk_receive_queue);
142
143 if (!sock_flag(sk, SOCK_DEAD)) {
144 printk("Freeing alive netlink socket %p\n", sk);
145 return;
146 }
147 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
148 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
149 BUG_TRAP(!nlk_sk(sk)->cb);
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700150 BUG_TRAP(!nlk_sk(sk)->groups);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151}
152
153/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on SMP.
154 * Look, when several writers sleep and reader wakes them up, all but one
155 * immediately hit write lock and grab all the cpus. Exclusive sleep solves
156 * this, _but_ remember, it adds useless work on UP machines.
157 */
158
159static void netlink_table_grab(void)
160{
161 write_lock_bh(&nl_table_lock);
162
163 if (atomic_read(&nl_table_users)) {
164 DECLARE_WAITQUEUE(wait, current);
165
166 add_wait_queue_exclusive(&nl_table_wait, &wait);
167 for(;;) {
168 set_current_state(TASK_UNINTERRUPTIBLE);
169 if (atomic_read(&nl_table_users) == 0)
170 break;
171 write_unlock_bh(&nl_table_lock);
172 schedule();
173 write_lock_bh(&nl_table_lock);
174 }
175
176 __set_current_state(TASK_RUNNING);
177 remove_wait_queue(&nl_table_wait, &wait);
178 }
179}
180
181static __inline__ void netlink_table_ungrab(void)
182{
183 write_unlock_bh(&nl_table_lock);
184 wake_up(&nl_table_wait);
185}
186
187static __inline__ void
188netlink_lock_table(void)
189{
190 /* read_lock() synchronizes us to netlink_table_grab */
191
192 read_lock(&nl_table_lock);
193 atomic_inc(&nl_table_users);
194 read_unlock(&nl_table_lock);
195}
196
197static __inline__ void
198netlink_unlock_table(void)
199{
200 if (atomic_dec_and_test(&nl_table_users))
201 wake_up(&nl_table_wait);
202}
203
204static __inline__ struct sock *netlink_lookup(int protocol, u32 pid)
205{
206 struct nl_pid_hash *hash = &nl_table[protocol].hash;
207 struct hlist_head *head;
208 struct sock *sk;
209 struct hlist_node *node;
210
211 read_lock(&nl_table_lock);
212 head = nl_pid_hashfn(hash, pid);
213 sk_for_each(sk, node, head) {
214 if (nlk_sk(sk)->pid == pid) {
215 sock_hold(sk);
216 goto found;
217 }
218 }
219 sk = NULL;
220found:
221 read_unlock(&nl_table_lock);
222 return sk;
223}
224
225static inline struct hlist_head *nl_pid_hash_alloc(size_t size)
226{
227 if (size <= PAGE_SIZE)
228 return kmalloc(size, GFP_ATOMIC);
229 else
230 return (struct hlist_head *)
231 __get_free_pages(GFP_ATOMIC, get_order(size));
232}
233
234static inline void nl_pid_hash_free(struct hlist_head *table, size_t size)
235{
236 if (size <= PAGE_SIZE)
237 kfree(table);
238 else
239 free_pages((unsigned long)table, get_order(size));
240}
241
242static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow)
243{
244 unsigned int omask, mask, shift;
245 size_t osize, size;
246 struct hlist_head *otable, *table;
247 int i;
248
249 omask = mask = hash->mask;
250 osize = size = (mask + 1) * sizeof(*table);
251 shift = hash->shift;
252
253 if (grow) {
254 if (++shift > hash->max_shift)
255 return 0;
256 mask = mask * 2 + 1;
257 size *= 2;
258 }
259
260 table = nl_pid_hash_alloc(size);
261 if (!table)
262 return 0;
263
264 memset(table, 0, size);
265 otable = hash->table;
266 hash->table = table;
267 hash->mask = mask;
268 hash->shift = shift;
269 get_random_bytes(&hash->rnd, sizeof(hash->rnd));
270
271 for (i = 0; i <= omask; i++) {
272 struct sock *sk;
273 struct hlist_node *node, *tmp;
274
275 sk_for_each_safe(sk, node, tmp, &otable[i])
276 __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid));
277 }
278
279 nl_pid_hash_free(otable, osize);
280 hash->rehash_time = jiffies + 10 * 60 * HZ;
281 return 1;
282}
283
284static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
285{
286 int avg = hash->entries >> hash->shift;
287
288 if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1))
289 return 1;
290
291 if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) {
292 nl_pid_hash_rehash(hash, 0);
293 return 1;
294 }
295
296 return 0;
297}
298
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800299static const struct proto_ops netlink_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300
Patrick McHardy4277a082006-03-20 18:52:01 -0800301static void
302netlink_update_listeners(struct sock *sk)
303{
304 struct netlink_table *tbl = &nl_table[sk->sk_protocol];
305 struct hlist_node *node;
306 unsigned long mask;
307 unsigned int i;
308
309 for (i = 0; i < NLGRPSZ(tbl->groups)/sizeof(unsigned long); i++) {
310 mask = 0;
311 sk_for_each_bound(sk, node, &tbl->mc_list)
312 mask |= nlk_sk(sk)->groups[i];
313 tbl->listeners[i] = mask;
314 }
315 /* this function is only called with the netlink table "grabbed", which
316 * makes sure updates are visible before bind or setsockopt return. */
317}
318
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319static int netlink_insert(struct sock *sk, u32 pid)
320{
321 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
322 struct hlist_head *head;
323 int err = -EADDRINUSE;
324 struct sock *osk;
325 struct hlist_node *node;
326 int len;
327
328 netlink_table_grab();
329 head = nl_pid_hashfn(hash, pid);
330 len = 0;
331 sk_for_each(osk, node, head) {
332 if (nlk_sk(osk)->pid == pid)
333 break;
334 len++;
335 }
336 if (node)
337 goto err;
338
339 err = -EBUSY;
340 if (nlk_sk(sk)->pid)
341 goto err;
342
343 err = -ENOMEM;
344 if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX))
345 goto err;
346
347 if (len && nl_pid_hash_dilute(hash, len))
348 head = nl_pid_hashfn(hash, pid);
349 hash->entries++;
350 nlk_sk(sk)->pid = pid;
351 sk_add_node(sk, head);
352 err = 0;
353
354err:
355 netlink_table_ungrab();
356 return err;
357}
358
359static void netlink_remove(struct sock *sk)
360{
361 netlink_table_grab();
David S. Millerd470e3b2005-06-26 15:31:51 -0700362 if (sk_del_node_init(sk))
363 nl_table[sk->sk_protocol].hash.entries--;
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700364 if (nlk_sk(sk)->subscriptions)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 __sk_del_bind_node(sk);
366 netlink_table_ungrab();
367}
368
369static struct proto netlink_proto = {
370 .name = "NETLINK",
371 .owner = THIS_MODULE,
372 .obj_size = sizeof(struct netlink_sock),
373};
374
Patrick McHardyab33a172005-08-14 19:31:36 -0700375static int __netlink_create(struct socket *sock, int protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376{
377 struct sock *sk;
378 struct netlink_sock *nlk;
Patrick McHardyab33a172005-08-14 19:31:36 -0700379
380 sock->ops = &netlink_ops;
381
382 sk = sk_alloc(PF_NETLINK, GFP_KERNEL, &netlink_proto, 1);
383 if (!sk)
384 return -ENOMEM;
385
386 sock_init_data(sock, sk);
387
388 nlk = nlk_sk(sk);
389 spin_lock_init(&nlk->cb_lock);
390 init_waitqueue_head(&nlk->wait);
391
392 sk->sk_destruct = netlink_sock_destruct;
393 sk->sk_protocol = protocol;
394 return 0;
395}
396
397static int netlink_create(struct socket *sock, int protocol)
398{
399 struct module *module = NULL;
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700400 struct netlink_sock *nlk;
401 unsigned int groups;
Patrick McHardyab33a172005-08-14 19:31:36 -0700402 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403
404 sock->state = SS_UNCONNECTED;
405
406 if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM)
407 return -ESOCKTNOSUPPORT;
408
409 if (protocol<0 || protocol >= MAX_LINKS)
410 return -EPROTONOSUPPORT;
411
Patrick McHardy77247bb2005-08-14 19:27:13 -0700412 netlink_lock_table();
Harald Welte4fdb3bb2005-08-09 19:40:55 -0700413#ifdef CONFIG_KMOD
Patrick McHardyab33a172005-08-14 19:31:36 -0700414 if (!nl_table[protocol].registered) {
Patrick McHardy77247bb2005-08-14 19:27:13 -0700415 netlink_unlock_table();
Harald Welte4fdb3bb2005-08-09 19:40:55 -0700416 request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol);
Patrick McHardy77247bb2005-08-14 19:27:13 -0700417 netlink_lock_table();
Harald Welte4fdb3bb2005-08-09 19:40:55 -0700418 }
Patrick McHardyab33a172005-08-14 19:31:36 -0700419#endif
420 if (nl_table[protocol].registered &&
421 try_module_get(nl_table[protocol].module))
422 module = nl_table[protocol].module;
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700423 groups = nl_table[protocol].groups;
Patrick McHardy77247bb2005-08-14 19:27:13 -0700424 netlink_unlock_table();
Harald Welte4fdb3bb2005-08-09 19:40:55 -0700425
Kirill Korotaev14591de2006-01-09 17:42:42 +0300426 if ((err = __netlink_create(sock, protocol)) < 0)
Patrick McHardyab33a172005-08-14 19:31:36 -0700427 goto out_module;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700429 nlk = nlk_sk(sock->sk);
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700430 nlk->module = module;
Patrick McHardyab33a172005-08-14 19:31:36 -0700431out:
432 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433
Patrick McHardyab33a172005-08-14 19:31:36 -0700434out_module:
435 module_put(module);
436 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437}
438
439static int netlink_release(struct socket *sock)
440{
441 struct sock *sk = sock->sk;
442 struct netlink_sock *nlk;
443
444 if (!sk)
445 return 0;
446
447 netlink_remove(sk);
448 nlk = nlk_sk(sk);
449
450 spin_lock(&nlk->cb_lock);
451 if (nlk->cb) {
Thomas Grafa8f74b22005-11-10 02:25:52 +0100452 if (nlk->cb->done)
453 nlk->cb->done(nlk->cb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 netlink_destroy_callback(nlk->cb);
455 nlk->cb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 }
457 spin_unlock(&nlk->cb_lock);
458
459 /* OK. Socket is unlinked, and, therefore,
460 no new packets will arrive */
461
462 sock_orphan(sk);
463 sock->sk = NULL;
464 wake_up_interruptible_all(&nlk->wait);
465
466 skb_queue_purge(&sk->sk_write_queue);
467
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700468 if (nlk->pid && !nlk->subscriptions) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 struct netlink_notify n = {
470 .protocol = sk->sk_protocol,
471 .pid = nlk->pid,
472 };
Alan Sterne041c682006-03-27 01:16:30 -0800473 atomic_notifier_call_chain(&netlink_chain,
474 NETLINK_URELEASE, &n);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 }
Harald Welte4fdb3bb2005-08-09 19:40:55 -0700476
Patrick McHardy77247bb2005-08-14 19:27:13 -0700477 if (nlk->module)
478 module_put(nlk->module);
Harald Welte4fdb3bb2005-08-09 19:40:55 -0700479
Patrick McHardy4277a082006-03-20 18:52:01 -0800480 netlink_table_grab();
Patrick McHardy77247bb2005-08-14 19:27:13 -0700481 if (nlk->flags & NETLINK_KERNEL_SOCKET) {
Patrick McHardy4277a082006-03-20 18:52:01 -0800482 kfree(nl_table[sk->sk_protocol].listeners);
Patrick McHardy77247bb2005-08-14 19:27:13 -0700483 nl_table[sk->sk_protocol].module = NULL;
Patrick McHardyab33a172005-08-14 19:31:36 -0700484 nl_table[sk->sk_protocol].registered = 0;
Patrick McHardy4277a082006-03-20 18:52:01 -0800485 } else if (nlk->subscriptions)
486 netlink_update_listeners(sk);
487 netlink_table_ungrab();
Patrick McHardy77247bb2005-08-14 19:27:13 -0700488
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700489 kfree(nlk->groups);
490 nlk->groups = NULL;
491
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 sock_put(sk);
493 return 0;
494}
495
496static int netlink_autobind(struct socket *sock)
497{
498 struct sock *sk = sock->sk;
499 struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash;
500 struct hlist_head *head;
501 struct sock *osk;
502 struct hlist_node *node;
Herbert Xuc27bd492005-11-22 14:41:50 -0800503 s32 pid = current->tgid;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504 int err;
505 static s32 rover = -4097;
506
507retry:
508 cond_resched();
509 netlink_table_grab();
510 head = nl_pid_hashfn(hash, pid);
511 sk_for_each(osk, node, head) {
512 if (nlk_sk(osk)->pid == pid) {
513 /* Bind collision, search negative pid values. */
514 pid = rover--;
515 if (rover > -4097)
516 rover = -4097;
517 netlink_table_ungrab();
518 goto retry;
519 }
520 }
521 netlink_table_ungrab();
522
523 err = netlink_insert(sk, pid);
524 if (err == -EADDRINUSE)
525 goto retry;
David S. Millerd470e3b2005-06-26 15:31:51 -0700526
527 /* If 2 threads race to autobind, that is fine. */
528 if (err == -EBUSY)
529 err = 0;
530
531 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532}
533
534static inline int netlink_capable(struct socket *sock, unsigned int flag)
535{
536 return (nl_table[sock->sk->sk_protocol].nl_nonroot & flag) ||
537 capable(CAP_NET_ADMIN);
538}
539
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700540static void
541netlink_update_subscriptions(struct sock *sk, unsigned int subscriptions)
542{
543 struct netlink_sock *nlk = nlk_sk(sk);
544
545 if (nlk->subscriptions && !subscriptions)
546 __sk_del_bind_node(sk);
547 else if (!nlk->subscriptions && subscriptions)
548 sk_add_bind_node(sk, &nl_table[sk->sk_protocol].mc_list);
549 nlk->subscriptions = subscriptions;
550}
551
Patrick McHardy513c2502005-09-06 15:43:59 -0700552static int netlink_alloc_groups(struct sock *sk)
553{
554 struct netlink_sock *nlk = nlk_sk(sk);
555 unsigned int groups;
556 int err = 0;
557
558 netlink_lock_table();
559 groups = nl_table[sk->sk_protocol].groups;
560 if (!nl_table[sk->sk_protocol].registered)
561 err = -ENOENT;
562 netlink_unlock_table();
563
564 if (err)
565 return err;
566
567 nlk->groups = kmalloc(NLGRPSZ(groups), GFP_KERNEL);
568 if (nlk->groups == NULL)
569 return -ENOMEM;
570 memset(nlk->groups, 0, NLGRPSZ(groups));
571 nlk->ngroups = groups;
572 return 0;
573}
574
Linus Torvalds1da177e2005-04-16 15:20:36 -0700575static int netlink_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
576{
577 struct sock *sk = sock->sk;
578 struct netlink_sock *nlk = nlk_sk(sk);
579 struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr;
580 int err;
581
582 if (nladdr->nl_family != AF_NETLINK)
583 return -EINVAL;
584
585 /* Only superuser is allowed to listen multicasts */
Patrick McHardy513c2502005-09-06 15:43:59 -0700586 if (nladdr->nl_groups) {
587 if (!netlink_capable(sock, NL_NONROOT_RECV))
588 return -EPERM;
589 if (nlk->groups == NULL) {
590 err = netlink_alloc_groups(sk);
591 if (err)
592 return err;
593 }
594 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700595
596 if (nlk->pid) {
597 if (nladdr->nl_pid != nlk->pid)
598 return -EINVAL;
599 } else {
600 err = nladdr->nl_pid ?
601 netlink_insert(sk, nladdr->nl_pid) :
602 netlink_autobind(sock);
603 if (err)
604 return err;
605 }
606
Patrick McHardy513c2502005-09-06 15:43:59 -0700607 if (!nladdr->nl_groups && (nlk->groups == NULL || !(u32)nlk->groups[0]))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 return 0;
609
610 netlink_table_grab();
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700611 netlink_update_subscriptions(sk, nlk->subscriptions +
612 hweight32(nladdr->nl_groups) -
613 hweight32(nlk->groups[0]));
614 nlk->groups[0] = (nlk->groups[0] & ~0xffffffffUL) | nladdr->nl_groups;
Patrick McHardy4277a082006-03-20 18:52:01 -0800615 netlink_update_listeners(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700616 netlink_table_ungrab();
617
618 return 0;
619}
620
621static int netlink_connect(struct socket *sock, struct sockaddr *addr,
622 int alen, int flags)
623{
624 int err = 0;
625 struct sock *sk = sock->sk;
626 struct netlink_sock *nlk = nlk_sk(sk);
627 struct sockaddr_nl *nladdr=(struct sockaddr_nl*)addr;
628
629 if (addr->sa_family == AF_UNSPEC) {
630 sk->sk_state = NETLINK_UNCONNECTED;
631 nlk->dst_pid = 0;
Patrick McHardyd629b832005-08-14 19:27:50 -0700632 nlk->dst_group = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 return 0;
634 }
635 if (addr->sa_family != AF_NETLINK)
636 return -EINVAL;
637
638 /* Only superuser is allowed to send multicasts */
639 if (nladdr->nl_groups && !netlink_capable(sock, NL_NONROOT_SEND))
640 return -EPERM;
641
642 if (!nlk->pid)
643 err = netlink_autobind(sock);
644
645 if (err == 0) {
646 sk->sk_state = NETLINK_CONNECTED;
647 nlk->dst_pid = nladdr->nl_pid;
Patrick McHardyd629b832005-08-14 19:27:50 -0700648 nlk->dst_group = ffs(nladdr->nl_groups);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700649 }
650
651 return err;
652}
653
654static int netlink_getname(struct socket *sock, struct sockaddr *addr, int *addr_len, int peer)
655{
656 struct sock *sk = sock->sk;
657 struct netlink_sock *nlk = nlk_sk(sk);
658 struct sockaddr_nl *nladdr=(struct sockaddr_nl *)addr;
659
660 nladdr->nl_family = AF_NETLINK;
661 nladdr->nl_pad = 0;
662 *addr_len = sizeof(*nladdr);
663
664 if (peer) {
665 nladdr->nl_pid = nlk->dst_pid;
Patrick McHardyd629b832005-08-14 19:27:50 -0700666 nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 } else {
668 nladdr->nl_pid = nlk->pid;
Patrick McHardy513c2502005-09-06 15:43:59 -0700669 nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 }
671 return 0;
672}
673
674static void netlink_overrun(struct sock *sk)
675{
676 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
677 sk->sk_err = ENOBUFS;
678 sk->sk_error_report(sk);
679 }
680}
681
682static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
683{
684 int protocol = ssk->sk_protocol;
685 struct sock *sock;
686 struct netlink_sock *nlk;
687
688 sock = netlink_lookup(protocol, pid);
689 if (!sock)
690 return ERR_PTR(-ECONNREFUSED);
691
692 /* Don't bother queuing skb if kernel socket has no input function */
693 nlk = nlk_sk(sock);
694 if ((nlk->pid == 0 && !nlk->data_ready) ||
695 (sock->sk_state == NETLINK_CONNECTED &&
696 nlk->dst_pid != nlk_sk(ssk)->pid)) {
697 sock_put(sock);
698 return ERR_PTR(-ECONNREFUSED);
699 }
700 return sock;
701}
702
703struct sock *netlink_getsockbyfilp(struct file *filp)
704{
705 struct inode *inode = filp->f_dentry->d_inode;
706 struct sock *sock;
707
708 if (!S_ISSOCK(inode->i_mode))
709 return ERR_PTR(-ENOTSOCK);
710
711 sock = SOCKET_I(inode)->sk;
712 if (sock->sk_family != AF_NETLINK)
713 return ERR_PTR(-EINVAL);
714
715 sock_hold(sock);
716 return sock;
717}
718
719/*
720 * Attach a skb to a netlink socket.
721 * The caller must hold a reference to the destination socket. On error, the
722 * reference is dropped. The skb is not send to the destination, just all
723 * all error checks are performed and memory in the queue is reserved.
724 * Return values:
725 * < 0: error. skb freed, reference to sock dropped.
726 * 0: continue
727 * 1: repeat lookup - reference dropped while waiting for socket memory.
728 */
Alexey Kuznetsova70ea992006-02-09 16:40:11 -0800729int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock,
730 long timeo, struct sock *ssk)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731{
732 struct netlink_sock *nlk;
733
734 nlk = nlk_sk(sk);
735
736 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
737 test_bit(0, &nlk->state)) {
738 DECLARE_WAITQUEUE(wait, current);
739 if (!timeo) {
Alexey Kuznetsova70ea992006-02-09 16:40:11 -0800740 if (!ssk || nlk_sk(ssk)->pid == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 netlink_overrun(sk);
742 sock_put(sk);
743 kfree_skb(skb);
744 return -EAGAIN;
745 }
746
747 __set_current_state(TASK_INTERRUPTIBLE);
748 add_wait_queue(&nlk->wait, &wait);
749
750 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
751 test_bit(0, &nlk->state)) &&
752 !sock_flag(sk, SOCK_DEAD))
753 timeo = schedule_timeout(timeo);
754
755 __set_current_state(TASK_RUNNING);
756 remove_wait_queue(&nlk->wait, &wait);
757 sock_put(sk);
758
759 if (signal_pending(current)) {
760 kfree_skb(skb);
761 return sock_intr_errno(timeo);
762 }
763 return 1;
764 }
765 skb_set_owner_r(skb, sk);
766 return 0;
767}
768
769int netlink_sendskb(struct sock *sk, struct sk_buff *skb, int protocol)
770{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 int len = skb->len;
772
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 skb_queue_tail(&sk->sk_receive_queue, skb);
774 sk->sk_data_ready(sk, len);
775 sock_put(sk);
776 return len;
777}
778
779void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
780{
781 kfree_skb(skb);
782 sock_put(sk);
783}
784
Victor Fusco37da6472005-07-18 13:35:43 -0700785static inline struct sk_buff *netlink_trim(struct sk_buff *skb,
Al Virodd0fc662005-10-07 07:46:04 +0100786 gfp_t allocation)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700787{
788 int delta;
789
790 skb_orphan(skb);
791
792 delta = skb->end - skb->tail;
793 if (delta * 2 < skb->truesize)
794 return skb;
795
796 if (skb_shared(skb)) {
797 struct sk_buff *nskb = skb_clone(skb, allocation);
798 if (!nskb)
799 return skb;
800 kfree_skb(skb);
801 skb = nskb;
802 }
803
804 if (!pskb_expand_head(skb, 0, -delta, allocation))
805 skb->truesize -= delta;
806
807 return skb;
808}
809
810int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
811{
812 struct sock *sk;
813 int err;
814 long timeo;
815
816 skb = netlink_trim(skb, gfp_any());
817
818 timeo = sock_sndtimeo(ssk, nonblock);
819retry:
820 sk = netlink_getsockbypid(ssk, pid);
821 if (IS_ERR(sk)) {
822 kfree_skb(skb);
823 return PTR_ERR(sk);
824 }
Alexey Kuznetsova70ea992006-02-09 16:40:11 -0800825 err = netlink_attachskb(sk, skb, nonblock, timeo, ssk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 if (err == 1)
827 goto retry;
828 if (err)
829 return err;
830
831 return netlink_sendskb(sk, skb, ssk->sk_protocol);
832}
833
Patrick McHardy4277a082006-03-20 18:52:01 -0800834int netlink_has_listeners(struct sock *sk, unsigned int group)
835{
836 int res = 0;
837
838 BUG_ON(!(nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET));
839 if (group - 1 < nl_table[sk->sk_protocol].groups)
840 res = test_bit(group - 1, nl_table[sk->sk_protocol].listeners);
841 return res;
842}
843EXPORT_SYMBOL_GPL(netlink_has_listeners);
844
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
846{
847 struct netlink_sock *nlk = nlk_sk(sk);
848
849 if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
850 !test_bit(0, &nlk->state)) {
851 skb_set_owner_r(skb, sk);
852 skb_queue_tail(&sk->sk_receive_queue, skb);
853 sk->sk_data_ready(sk, skb->len);
854 return atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf;
855 }
856 return -1;
857}
858
859struct netlink_broadcast_data {
860 struct sock *exclude_sk;
861 u32 pid;
862 u32 group;
863 int failure;
864 int congested;
865 int delivered;
Al Viro7d877f32005-10-21 03:20:43 -0400866 gfp_t allocation;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700867 struct sk_buff *skb, *skb2;
868};
869
870static inline int do_one_broadcast(struct sock *sk,
871 struct netlink_broadcast_data *p)
872{
873 struct netlink_sock *nlk = nlk_sk(sk);
874 int val;
875
876 if (p->exclude_sk == sk)
877 goto out;
878
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700879 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
880 !test_bit(p->group - 1, nlk->groups))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 goto out;
882
883 if (p->failure) {
884 netlink_overrun(sk);
885 goto out;
886 }
887
888 sock_hold(sk);
889 if (p->skb2 == NULL) {
Tommy S. Christensen68acc022005-05-19 13:06:35 -0700890 if (skb_shared(p->skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 p->skb2 = skb_clone(p->skb, p->allocation);
892 } else {
Tommy S. Christensen68acc022005-05-19 13:06:35 -0700893 p->skb2 = skb_get(p->skb);
894 /*
895 * skb ownership may have been set when
896 * delivered to a previous socket.
897 */
898 skb_orphan(p->skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899 }
900 }
901 if (p->skb2 == NULL) {
902 netlink_overrun(sk);
903 /* Clone failed. Notify ALL listeners. */
904 p->failure = 1;
905 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
906 netlink_overrun(sk);
907 } else {
908 p->congested |= val;
909 p->delivered = 1;
910 p->skb2 = NULL;
911 }
912 sock_put(sk);
913
914out:
915 return 0;
916}
917
918int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
Al Virodd0fc662005-10-07 07:46:04 +0100919 u32 group, gfp_t allocation)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920{
921 struct netlink_broadcast_data info;
922 struct hlist_node *node;
923 struct sock *sk;
924
925 skb = netlink_trim(skb, allocation);
926
927 info.exclude_sk = ssk;
928 info.pid = pid;
929 info.group = group;
930 info.failure = 0;
931 info.congested = 0;
932 info.delivered = 0;
933 info.allocation = allocation;
934 info.skb = skb;
935 info.skb2 = NULL;
936
937 /* While we sleep in clone, do not allow to change socket list */
938
939 netlink_lock_table();
940
941 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
942 do_one_broadcast(sk, &info);
943
Tommy S. Christensenaa1c6a62005-05-19 13:07:32 -0700944 kfree_skb(skb);
945
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946 netlink_unlock_table();
947
948 if (info.skb2)
949 kfree_skb(info.skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950
951 if (info.delivered) {
952 if (info.congested && (allocation & __GFP_WAIT))
953 yield();
954 return 0;
955 }
956 if (info.failure)
957 return -ENOBUFS;
958 return -ESRCH;
959}
960
961struct netlink_set_err_data {
962 struct sock *exclude_sk;
963 u32 pid;
964 u32 group;
965 int code;
966};
967
968static inline int do_one_set_err(struct sock *sk,
969 struct netlink_set_err_data *p)
970{
971 struct netlink_sock *nlk = nlk_sk(sk);
972
973 if (sk == p->exclude_sk)
974 goto out;
975
Patrick McHardyf7fa9b12005-08-15 12:29:13 -0700976 if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups ||
977 !test_bit(p->group - 1, nlk->groups))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 goto out;
979
980 sk->sk_err = p->code;
981 sk->sk_error_report(sk);
982out:
983 return 0;
984}
985
986void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
987{
988 struct netlink_set_err_data info;
989 struct hlist_node *node;
990 struct sock *sk;
991
992 info.exclude_sk = ssk;
993 info.pid = pid;
994 info.group = group;
995 info.code = code;
996
997 read_lock(&nl_table_lock);
998
999 sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
1000 do_one_set_err(sk, &info);
1001
1002 read_unlock(&nl_table_lock);
1003}
1004
Patrick McHardy9a4595b2005-08-15 12:32:15 -07001005static int netlink_setsockopt(struct socket *sock, int level, int optname,
1006 char __user *optval, int optlen)
1007{
1008 struct sock *sk = sock->sk;
1009 struct netlink_sock *nlk = nlk_sk(sk);
1010 int val = 0, err;
1011
1012 if (level != SOL_NETLINK)
1013 return -ENOPROTOOPT;
1014
1015 if (optlen >= sizeof(int) &&
1016 get_user(val, (int __user *)optval))
1017 return -EFAULT;
1018
1019 switch (optname) {
1020 case NETLINK_PKTINFO:
1021 if (val)
1022 nlk->flags |= NETLINK_RECV_PKTINFO;
1023 else
1024 nlk->flags &= ~NETLINK_RECV_PKTINFO;
1025 err = 0;
1026 break;
1027 case NETLINK_ADD_MEMBERSHIP:
1028 case NETLINK_DROP_MEMBERSHIP: {
1029 unsigned int subscriptions;
1030 int old, new = optname == NETLINK_ADD_MEMBERSHIP ? 1 : 0;
1031
1032 if (!netlink_capable(sock, NL_NONROOT_RECV))
1033 return -EPERM;
Patrick McHardy513c2502005-09-06 15:43:59 -07001034 if (nlk->groups == NULL) {
1035 err = netlink_alloc_groups(sk);
1036 if (err)
1037 return err;
1038 }
Patrick McHardy9a4595b2005-08-15 12:32:15 -07001039 if (!val || val - 1 >= nlk->ngroups)
1040 return -EINVAL;
1041 netlink_table_grab();
1042 old = test_bit(val - 1, nlk->groups);
1043 subscriptions = nlk->subscriptions - old + new;
1044 if (new)
1045 __set_bit(val - 1, nlk->groups);
1046 else
1047 __clear_bit(val - 1, nlk->groups);
1048 netlink_update_subscriptions(sk, subscriptions);
Patrick McHardy4277a082006-03-20 18:52:01 -08001049 netlink_update_listeners(sk);
Patrick McHardy9a4595b2005-08-15 12:32:15 -07001050 netlink_table_ungrab();
1051 err = 0;
1052 break;
1053 }
1054 default:
1055 err = -ENOPROTOOPT;
1056 }
1057 return err;
1058}
1059
1060static int netlink_getsockopt(struct socket *sock, int level, int optname,
1061 char __user *optval, int __user *optlen)
1062{
1063 struct sock *sk = sock->sk;
1064 struct netlink_sock *nlk = nlk_sk(sk);
1065 int len, val, err;
1066
1067 if (level != SOL_NETLINK)
1068 return -ENOPROTOOPT;
1069
1070 if (get_user(len, optlen))
1071 return -EFAULT;
1072 if (len < 0)
1073 return -EINVAL;
1074
1075 switch (optname) {
1076 case NETLINK_PKTINFO:
1077 if (len < sizeof(int))
1078 return -EINVAL;
1079 len = sizeof(int);
1080 val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
1081 put_user(len, optlen);
1082 put_user(val, optval);
1083 err = 0;
1084 break;
1085 default:
1086 err = -ENOPROTOOPT;
1087 }
1088 return err;
1089}
1090
1091static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
1092{
1093 struct nl_pktinfo info;
1094
1095 info.group = NETLINK_CB(skb).dst_group;
1096 put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
1097}
1098
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099static inline void netlink_rcv_wake(struct sock *sk)
1100{
1101 struct netlink_sock *nlk = nlk_sk(sk);
1102
David S. Millerb03efcf2005-07-08 14:57:23 -07001103 if (skb_queue_empty(&sk->sk_receive_queue))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 clear_bit(0, &nlk->state);
1105 if (!test_bit(0, &nlk->state))
1106 wake_up_interruptible(&nlk->wait);
1107}
1108
1109static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1110 struct msghdr *msg, size_t len)
1111{
1112 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1113 struct sock *sk = sock->sk;
1114 struct netlink_sock *nlk = nlk_sk(sk);
1115 struct sockaddr_nl *addr=msg->msg_name;
1116 u32 dst_pid;
Patrick McHardyd629b832005-08-14 19:27:50 -07001117 u32 dst_group;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001118 struct sk_buff *skb;
1119 int err;
1120 struct scm_cookie scm;
1121
1122 if (msg->msg_flags&MSG_OOB)
1123 return -EOPNOTSUPP;
1124
1125 if (NULL == siocb->scm)
1126 siocb->scm = &scm;
1127 err = scm_send(sock, msg, siocb->scm);
1128 if (err < 0)
1129 return err;
1130
1131 if (msg->msg_namelen) {
1132 if (addr->nl_family != AF_NETLINK)
1133 return -EINVAL;
1134 dst_pid = addr->nl_pid;
Patrick McHardyd629b832005-08-14 19:27:50 -07001135 dst_group = ffs(addr->nl_groups);
1136 if (dst_group && !netlink_capable(sock, NL_NONROOT_SEND))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001137 return -EPERM;
1138 } else {
1139 dst_pid = nlk->dst_pid;
Patrick McHardyd629b832005-08-14 19:27:50 -07001140 dst_group = nlk->dst_group;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141 }
1142
1143 if (!nlk->pid) {
1144 err = netlink_autobind(sock);
1145 if (err)
1146 goto out;
1147 }
1148
1149 err = -EMSGSIZE;
1150 if (len > sk->sk_sndbuf - 32)
1151 goto out;
1152 err = -ENOBUFS;
1153 skb = alloc_skb(len, GFP_KERNEL);
1154 if (skb==NULL)
1155 goto out;
1156
1157 NETLINK_CB(skb).pid = nlk->pid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001158 NETLINK_CB(skb).dst_pid = dst_pid;
Patrick McHardyd629b832005-08-14 19:27:50 -07001159 NETLINK_CB(skb).dst_group = dst_group;
Serge Hallync94c2572005-04-29 16:27:17 +01001160 NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context);
Steve Grubbe7c34972006-04-03 09:08:13 -04001161 selinux_get_task_sid(current, &(NETLINK_CB(skb).sid));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1163
1164 /* What can I do? Netlink is asynchronous, so that
1165 we will have to save current capabilities to
1166 check them, when this message will be delivered
1167 to corresponding kernel module. --ANK (980802)
1168 */
1169
1170 err = -EFAULT;
1171 if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len)) {
1172 kfree_skb(skb);
1173 goto out;
1174 }
1175
1176 err = security_netlink_send(sk, skb);
1177 if (err) {
1178 kfree_skb(skb);
1179 goto out;
1180 }
1181
Patrick McHardyd629b832005-08-14 19:27:50 -07001182 if (dst_group) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001183 atomic_inc(&skb->users);
Patrick McHardyd629b832005-08-14 19:27:50 -07001184 netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 }
1186 err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT);
1187
1188out:
1189 return err;
1190}
1191
1192static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
1193 struct msghdr *msg, size_t len,
1194 int flags)
1195{
1196 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1197 struct scm_cookie scm;
1198 struct sock *sk = sock->sk;
1199 struct netlink_sock *nlk = nlk_sk(sk);
1200 int noblock = flags&MSG_DONTWAIT;
1201 size_t copied;
1202 struct sk_buff *skb;
1203 int err;
1204
1205 if (flags&MSG_OOB)
1206 return -EOPNOTSUPP;
1207
1208 copied = 0;
1209
1210 skb = skb_recv_datagram(sk,flags,noblock,&err);
1211 if (skb==NULL)
1212 goto out;
1213
1214 msg->msg_namelen = 0;
1215
1216 copied = skb->len;
1217 if (len < copied) {
1218 msg->msg_flags |= MSG_TRUNC;
1219 copied = len;
1220 }
1221
1222 skb->h.raw = skb->data;
1223 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1224
1225 if (msg->msg_name) {
1226 struct sockaddr_nl *addr = (struct sockaddr_nl*)msg->msg_name;
1227 addr->nl_family = AF_NETLINK;
1228 addr->nl_pad = 0;
1229 addr->nl_pid = NETLINK_CB(skb).pid;
Patrick McHardyd629b832005-08-14 19:27:50 -07001230 addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001231 msg->msg_namelen = sizeof(*addr);
1232 }
1233
Patrick McHardycc9a06c2006-03-12 20:34:27 -08001234 if (nlk->flags & NETLINK_RECV_PKTINFO)
1235 netlink_cmsg_recv_pktinfo(msg, skb);
1236
Linus Torvalds1da177e2005-04-16 15:20:36 -07001237 if (NULL == siocb->scm) {
1238 memset(&scm, 0, sizeof(scm));
1239 siocb->scm = &scm;
1240 }
1241 siocb->scm->creds = *NETLINK_CREDS(skb);
1242 skb_free_datagram(sk, skb);
1243
1244 if (nlk->cb && atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2)
1245 netlink_dump(sk);
1246
1247 scm_recv(sock, msg, siocb->scm, flags);
1248
1249out:
1250 netlink_rcv_wake(sk);
1251 return err ? : copied;
1252}
1253
1254static void netlink_data_ready(struct sock *sk, int len)
1255{
1256 struct netlink_sock *nlk = nlk_sk(sk);
1257
1258 if (nlk->data_ready)
1259 nlk->data_ready(sk, len);
1260 netlink_rcv_wake(sk);
1261}
1262
1263/*
1264 * We export these functions to other modules. They provide a
1265 * complete set of kernel non-blocking support for message
1266 * queueing.
1267 */
1268
1269struct sock *
Patrick McHardy06628602005-08-15 12:33:26 -07001270netlink_kernel_create(int unit, unsigned int groups,
1271 void (*input)(struct sock *sk, int len),
1272 struct module *module)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001273{
1274 struct socket *sock;
1275 struct sock *sk;
Patrick McHardy77247bb2005-08-14 19:27:13 -07001276 struct netlink_sock *nlk;
Patrick McHardy4277a082006-03-20 18:52:01 -08001277 unsigned long *listeners = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278
1279 if (!nl_table)
1280 return NULL;
1281
1282 if (unit<0 || unit>=MAX_LINKS)
1283 return NULL;
1284
1285 if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
1286 return NULL;
1287
Patrick McHardyab33a172005-08-14 19:31:36 -07001288 if (__netlink_create(sock, unit) < 0)
Patrick McHardy77247bb2005-08-14 19:27:13 -07001289 goto out_sock_release;
Harald Welte4fdb3bb2005-08-09 19:40:55 -07001290
Patrick McHardy4277a082006-03-20 18:52:01 -08001291 if (groups < 32)
1292 groups = 32;
1293
1294 listeners = kzalloc(NLGRPSZ(groups), GFP_KERNEL);
1295 if (!listeners)
1296 goto out_sock_release;
1297
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 sk = sock->sk;
1299 sk->sk_data_ready = netlink_data_ready;
1300 if (input)
1301 nlk_sk(sk)->data_ready = input;
1302
Patrick McHardy77247bb2005-08-14 19:27:13 -07001303 if (netlink_insert(sk, 0))
1304 goto out_sock_release;
1305
1306 nlk = nlk_sk(sk);
1307 nlk->flags |= NETLINK_KERNEL_SOCKET;
1308
1309 netlink_table_grab();
Patrick McHardy4277a082006-03-20 18:52:01 -08001310 nl_table[unit].groups = groups;
1311 nl_table[unit].listeners = listeners;
Patrick McHardy77247bb2005-08-14 19:27:13 -07001312 nl_table[unit].module = module;
Patrick McHardyab33a172005-08-14 19:31:36 -07001313 nl_table[unit].registered = 1;
Patrick McHardy77247bb2005-08-14 19:27:13 -07001314 netlink_table_ungrab();
Harald Welte4fdb3bb2005-08-09 19:40:55 -07001315
1316 return sk;
1317
Harald Welte4fdb3bb2005-08-09 19:40:55 -07001318out_sock_release:
Patrick McHardy4277a082006-03-20 18:52:01 -08001319 kfree(listeners);
Harald Welte4fdb3bb2005-08-09 19:40:55 -07001320 sock_release(sock);
Patrick McHardy77247bb2005-08-14 19:27:13 -07001321 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322}
1323
1324void netlink_set_nonroot(int protocol, unsigned int flags)
1325{
1326 if ((unsigned int)protocol < MAX_LINKS)
1327 nl_table[protocol].nl_nonroot = flags;
1328}
1329
1330static void netlink_destroy_callback(struct netlink_callback *cb)
1331{
1332 if (cb->skb)
1333 kfree_skb(cb->skb);
1334 kfree(cb);
1335}
1336
1337/*
1338 * It looks a bit ugly.
1339 * It would be better to create kernel thread.
1340 */
1341
1342static int netlink_dump(struct sock *sk)
1343{
1344 struct netlink_sock *nlk = nlk_sk(sk);
1345 struct netlink_callback *cb;
1346 struct sk_buff *skb;
1347 struct nlmsghdr *nlh;
1348 int len;
1349
1350 skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL);
1351 if (!skb)
1352 return -ENOBUFS;
1353
1354 spin_lock(&nlk->cb_lock);
1355
1356 cb = nlk->cb;
1357 if (cb == NULL) {
1358 spin_unlock(&nlk->cb_lock);
1359 kfree_skb(skb);
1360 return -EINVAL;
1361 }
1362
1363 len = cb->dump(skb, cb);
1364
1365 if (len > 0) {
1366 spin_unlock(&nlk->cb_lock);
1367 skb_queue_tail(&sk->sk_receive_queue, skb);
1368 sk->sk_data_ready(sk, len);
1369 return 0;
1370 }
1371
Thomas Graf17977542005-06-18 22:53:48 -07001372 nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 memcpy(NLMSG_DATA(nlh), &len, sizeof(len));
1374 skb_queue_tail(&sk->sk_receive_queue, skb);
1375 sk->sk_data_ready(sk, skb->len);
1376
Thomas Grafa8f74b22005-11-10 02:25:52 +01001377 if (cb->done)
1378 cb->done(cb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379 nlk->cb = NULL;
1380 spin_unlock(&nlk->cb_lock);
1381
1382 netlink_destroy_callback(cb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 return 0;
Thomas Graf17977542005-06-18 22:53:48 -07001384
1385nlmsg_failure:
1386 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001387}
1388
1389int netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
1390 struct nlmsghdr *nlh,
1391 int (*dump)(struct sk_buff *skb, struct netlink_callback*),
1392 int (*done)(struct netlink_callback*))
1393{
1394 struct netlink_callback *cb;
1395 struct sock *sk;
1396 struct netlink_sock *nlk;
1397
1398 cb = kmalloc(sizeof(*cb), GFP_KERNEL);
1399 if (cb == NULL)
1400 return -ENOBUFS;
1401
1402 memset(cb, 0, sizeof(*cb));
1403 cb->dump = dump;
1404 cb->done = done;
1405 cb->nlh = nlh;
1406 atomic_inc(&skb->users);
1407 cb->skb = skb;
1408
1409 sk = netlink_lookup(ssk->sk_protocol, NETLINK_CB(skb).pid);
1410 if (sk == NULL) {
1411 netlink_destroy_callback(cb);
1412 return -ECONNREFUSED;
1413 }
1414 nlk = nlk_sk(sk);
1415 /* A dump is in progress... */
1416 spin_lock(&nlk->cb_lock);
1417 if (nlk->cb) {
1418 spin_unlock(&nlk->cb_lock);
1419 netlink_destroy_callback(cb);
1420 sock_put(sk);
1421 return -EBUSY;
1422 }
1423 nlk->cb = cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 spin_unlock(&nlk->cb_lock);
1425
1426 netlink_dump(sk);
1427 sock_put(sk);
1428 return 0;
1429}
1430
1431void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
1432{
1433 struct sk_buff *skb;
1434 struct nlmsghdr *rep;
1435 struct nlmsgerr *errmsg;
1436 int size;
1437
1438 if (err == 0)
1439 size = NLMSG_SPACE(sizeof(struct nlmsgerr));
1440 else
1441 size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len));
1442
1443 skb = alloc_skb(size, GFP_KERNEL);
1444 if (!skb) {
1445 struct sock *sk;
1446
1447 sk = netlink_lookup(in_skb->sk->sk_protocol,
1448 NETLINK_CB(in_skb).pid);
1449 if (sk) {
1450 sk->sk_err = ENOBUFS;
1451 sk->sk_error_report(sk);
1452 sock_put(sk);
1453 }
1454 return;
1455 }
1456
1457 rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
Thomas Graf17977542005-06-18 22:53:48 -07001458 NLMSG_ERROR, sizeof(struct nlmsgerr), 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001459 errmsg = NLMSG_DATA(rep);
1460 errmsg->error = err;
1461 memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr));
1462 netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1463}
1464
Thomas Graf82ace472005-11-10 02:25:53 +01001465static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *,
1466 struct nlmsghdr *, int *))
1467{
1468 unsigned int total_len;
1469 struct nlmsghdr *nlh;
1470 int err;
1471
1472 while (skb->len >= nlmsg_total_size(0)) {
1473 nlh = (struct nlmsghdr *) skb->data;
1474
Martin Murrayad8e4b72006-01-10 13:02:29 -08001475 if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len)
Thomas Graf82ace472005-11-10 02:25:53 +01001476 return 0;
1477
1478 total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len);
1479
1480 if (cb(skb, nlh, &err) < 0) {
1481 /* Not an error, but we have to interrupt processing
1482 * here. Note: that in this case we do not pull
1483 * message from skb, it will be processed later.
1484 */
1485 if (err == 0)
1486 return -1;
1487 netlink_ack(skb, nlh, err);
1488 } else if (nlh->nlmsg_flags & NLM_F_ACK)
1489 netlink_ack(skb, nlh, 0);
1490
1491 skb_pull(skb, total_len);
1492 }
1493
1494 return 0;
1495}
1496
1497/**
1498 * nelink_run_queue - Process netlink receive queue.
1499 * @sk: Netlink socket containing the queue
1500 * @qlen: Place to store queue length upon entry
1501 * @cb: Callback function invoked for each netlink message found
1502 *
1503 * Processes as much as there was in the queue upon entry and invokes
1504 * a callback function for each netlink message found. The callback
1505 * function may refuse a message by returning a negative error code
1506 * but setting the error pointer to 0 in which case this function
1507 * returns with a qlen != 0.
1508 *
1509 * qlen must be initialized to 0 before the initial entry, afterwards
1510 * the function may be called repeatedly until qlen reaches 0.
1511 */
1512void netlink_run_queue(struct sock *sk, unsigned int *qlen,
1513 int (*cb)(struct sk_buff *, struct nlmsghdr *, int *))
1514{
1515 struct sk_buff *skb;
1516
1517 if (!*qlen || *qlen > skb_queue_len(&sk->sk_receive_queue))
1518 *qlen = skb_queue_len(&sk->sk_receive_queue);
1519
1520 for (; *qlen; (*qlen)--) {
1521 skb = skb_dequeue(&sk->sk_receive_queue);
1522 if (netlink_rcv_skb(skb, cb)) {
1523 if (skb->len)
1524 skb_queue_head(&sk->sk_receive_queue, skb);
1525 else {
1526 kfree_skb(skb);
1527 (*qlen)--;
1528 }
1529 break;
1530 }
1531
1532 kfree_skb(skb);
1533 }
1534}
1535
1536/**
1537 * netlink_queue_skip - Skip netlink message while processing queue.
1538 * @nlh: Netlink message to be skipped
1539 * @skb: Socket buffer containing the netlink messages.
1540 *
1541 * Pulls the given netlink message off the socket buffer so the next
1542 * call to netlink_queue_run() will not reconsider the message.
1543 */
1544void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb)
1545{
1546 int msglen = NLMSG_ALIGN(nlh->nlmsg_len);
1547
1548 if (msglen > skb->len)
1549 msglen = skb->len;
1550
1551 skb_pull(skb, msglen);
1552}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001553
1554#ifdef CONFIG_PROC_FS
1555struct nl_seq_iter {
1556 int link;
1557 int hash_idx;
1558};
1559
1560static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
1561{
1562 struct nl_seq_iter *iter = seq->private;
1563 int i, j;
1564 struct sock *s;
1565 struct hlist_node *node;
1566 loff_t off = 0;
1567
1568 for (i=0; i<MAX_LINKS; i++) {
1569 struct nl_pid_hash *hash = &nl_table[i].hash;
1570
1571 for (j = 0; j <= hash->mask; j++) {
1572 sk_for_each(s, node, &hash->table[j]) {
1573 if (off == pos) {
1574 iter->link = i;
1575 iter->hash_idx = j;
1576 return s;
1577 }
1578 ++off;
1579 }
1580 }
1581 }
1582 return NULL;
1583}
1584
1585static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
1586{
1587 read_lock(&nl_table_lock);
1588 return *pos ? netlink_seq_socket_idx(seq, *pos - 1) : SEQ_START_TOKEN;
1589}
1590
1591static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1592{
1593 struct sock *s;
1594 struct nl_seq_iter *iter;
1595 int i, j;
1596
1597 ++*pos;
1598
1599 if (v == SEQ_START_TOKEN)
1600 return netlink_seq_socket_idx(seq, 0);
1601
1602 s = sk_next(v);
1603 if (s)
1604 return s;
1605
1606 iter = seq->private;
1607 i = iter->link;
1608 j = iter->hash_idx + 1;
1609
1610 do {
1611 struct nl_pid_hash *hash = &nl_table[i].hash;
1612
1613 for (; j <= hash->mask; j++) {
1614 s = sk_head(&hash->table[j]);
1615 if (s) {
1616 iter->link = i;
1617 iter->hash_idx = j;
1618 return s;
1619 }
1620 }
1621
1622 j = 0;
1623 } while (++i < MAX_LINKS);
1624
1625 return NULL;
1626}
1627
1628static void netlink_seq_stop(struct seq_file *seq, void *v)
1629{
1630 read_unlock(&nl_table_lock);
1631}
1632
1633
1634static int netlink_seq_show(struct seq_file *seq, void *v)
1635{
1636 if (v == SEQ_START_TOKEN)
1637 seq_puts(seq,
1638 "sk Eth Pid Groups "
1639 "Rmem Wmem Dump Locks\n");
1640 else {
1641 struct sock *s = v;
1642 struct netlink_sock *nlk = nlk_sk(s);
1643
1644 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n",
1645 s,
1646 s->sk_protocol,
1647 nlk->pid,
Patrick McHardy513c2502005-09-06 15:43:59 -07001648 nlk->groups ? (u32)nlk->groups[0] : 0,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 atomic_read(&s->sk_rmem_alloc),
1650 atomic_read(&s->sk_wmem_alloc),
1651 nlk->cb,
1652 atomic_read(&s->sk_refcnt)
1653 );
1654
1655 }
1656 return 0;
1657}
1658
1659static struct seq_operations netlink_seq_ops = {
1660 .start = netlink_seq_start,
1661 .next = netlink_seq_next,
1662 .stop = netlink_seq_stop,
1663 .show = netlink_seq_show,
1664};
1665
1666
1667static int netlink_seq_open(struct inode *inode, struct file *file)
1668{
1669 struct seq_file *seq;
1670 struct nl_seq_iter *iter;
1671 int err;
1672
1673 iter = kmalloc(sizeof(*iter), GFP_KERNEL);
1674 if (!iter)
1675 return -ENOMEM;
1676
1677 err = seq_open(file, &netlink_seq_ops);
1678 if (err) {
1679 kfree(iter);
1680 return err;
1681 }
1682
1683 memset(iter, 0, sizeof(*iter));
1684 seq = file->private_data;
1685 seq->private = iter;
1686 return 0;
1687}
1688
1689static struct file_operations netlink_seq_fops = {
1690 .owner = THIS_MODULE,
1691 .open = netlink_seq_open,
1692 .read = seq_read,
1693 .llseek = seq_lseek,
1694 .release = seq_release_private,
1695};
1696
1697#endif
1698
1699int netlink_register_notifier(struct notifier_block *nb)
1700{
Alan Sterne041c682006-03-27 01:16:30 -08001701 return atomic_notifier_chain_register(&netlink_chain, nb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702}
1703
1704int netlink_unregister_notifier(struct notifier_block *nb)
1705{
Alan Sterne041c682006-03-27 01:16:30 -08001706 return atomic_notifier_chain_unregister(&netlink_chain, nb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707}
1708
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001709static const struct proto_ops netlink_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001710 .family = PF_NETLINK,
1711 .owner = THIS_MODULE,
1712 .release = netlink_release,
1713 .bind = netlink_bind,
1714 .connect = netlink_connect,
1715 .socketpair = sock_no_socketpair,
1716 .accept = sock_no_accept,
1717 .getname = netlink_getname,
1718 .poll = datagram_poll,
1719 .ioctl = sock_no_ioctl,
1720 .listen = sock_no_listen,
1721 .shutdown = sock_no_shutdown,
Patrick McHardy9a4595b2005-08-15 12:32:15 -07001722 .setsockopt = netlink_setsockopt,
1723 .getsockopt = netlink_getsockopt,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 .sendmsg = netlink_sendmsg,
1725 .recvmsg = netlink_recvmsg,
1726 .mmap = sock_no_mmap,
1727 .sendpage = sock_no_sendpage,
1728};
1729
1730static struct net_proto_family netlink_family_ops = {
1731 .family = PF_NETLINK,
1732 .create = netlink_create,
1733 .owner = THIS_MODULE, /* for consistency 8) */
1734};
1735
1736extern void netlink_skb_parms_too_large(void);
1737
1738static int __init netlink_proto_init(void)
1739{
1740 struct sk_buff *dummy_skb;
1741 int i;
1742 unsigned long max;
1743 unsigned int order;
1744 int err = proto_register(&netlink_proto, 0);
1745
1746 if (err != 0)
1747 goto out;
1748
1749 if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb))
1750 netlink_skb_parms_too_large();
1751
1752 nl_table = kmalloc(sizeof(*nl_table) * MAX_LINKS, GFP_KERNEL);
1753 if (!nl_table) {
1754enomem:
1755 printk(KERN_CRIT "netlink_init: Cannot allocate nl_table\n");
1756 return -ENOMEM;
1757 }
1758
1759 memset(nl_table, 0, sizeof(*nl_table) * MAX_LINKS);
1760
1761 if (num_physpages >= (128 * 1024))
1762 max = num_physpages >> (21 - PAGE_SHIFT);
1763 else
1764 max = num_physpages >> (23 - PAGE_SHIFT);
1765
1766 order = get_bitmask_order(max) - 1 + PAGE_SHIFT;
1767 max = (1UL << order) / sizeof(struct hlist_head);
1768 order = get_bitmask_order(max > UINT_MAX ? UINT_MAX : max) - 1;
1769
1770 for (i = 0; i < MAX_LINKS; i++) {
1771 struct nl_pid_hash *hash = &nl_table[i].hash;
1772
1773 hash->table = nl_pid_hash_alloc(1 * sizeof(*hash->table));
1774 if (!hash->table) {
1775 while (i-- > 0)
1776 nl_pid_hash_free(nl_table[i].hash.table,
1777 1 * sizeof(*hash->table));
1778 kfree(nl_table);
1779 goto enomem;
1780 }
1781 memset(hash->table, 0, 1 * sizeof(*hash->table));
1782 hash->max_shift = order;
1783 hash->shift = 0;
1784 hash->mask = 0;
1785 hash->rehash_time = jiffies;
1786 }
1787
1788 sock_register(&netlink_family_ops);
1789#ifdef CONFIG_PROC_FS
1790 proc_net_fops_create("netlink", 0, &netlink_seq_fops);
1791#endif
1792 /* The netlink device handler may be needed early. */
1793 rtnetlink_init();
1794out:
1795 return err;
1796}
1797
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798core_initcall(netlink_proto_init);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799
1800EXPORT_SYMBOL(netlink_ack);
Thomas Graf82ace472005-11-10 02:25:53 +01001801EXPORT_SYMBOL(netlink_run_queue);
1802EXPORT_SYMBOL(netlink_queue_skip);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803EXPORT_SYMBOL(netlink_broadcast);
1804EXPORT_SYMBOL(netlink_dump_start);
1805EXPORT_SYMBOL(netlink_kernel_create);
1806EXPORT_SYMBOL(netlink_register_notifier);
1807EXPORT_SYMBOL(netlink_set_err);
1808EXPORT_SYMBOL(netlink_set_nonroot);
1809EXPORT_SYMBOL(netlink_unicast);
1810EXPORT_SYMBOL(netlink_unregister_notifier);
1811