blob: da156015d827d6eeeadaae006c61306eefb8aaa3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * ip6_flowlabel.c IPv6 flowlabel manager.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 */
11
Randy Dunlap4fc268d2006-01-11 12:17:47 -080012#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070013#include <linux/errno.h>
14#include <linux/types.h>
15#include <linux/socket.h>
16#include <linux/net.h>
17#include <linux/netdevice.h>
18#include <linux/if_arp.h>
19#include <linux/in6.h>
20#include <linux/route.h>
21#include <linux/proc_fs.h>
22#include <linux/seq_file.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090023#include <linux/slab.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040024#include <linux/export.h>
Eric W. Biederman4f82f452012-05-24 10:37:59 -060025#include <linux/pid_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070026
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020027#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include <net/sock.h>
29
30#include <net/ipv6.h>
31#include <net/ndisc.h>
32#include <net/protocol.h>
33#include <net/ip6_route.h>
34#include <net/addrconf.h>
35#include <net/rawv6.h>
36#include <net/icmp.h>
37#include <net/transp_v6.h>
38
39#include <asm/uaccess.h>
40
41#define FL_MIN_LINGER 6 /* Minimal linger. It is set to 6sec specified
42 in old IPv6 RFC. Well, it was reasonable value.
43 */
44#define FL_MAX_LINGER 60 /* Maximal linger timeout */
45
46/* FL hash table */
47
48#define FL_MAX_PER_SOCK 32
49#define FL_MAX_SIZE 4096
50#define FL_HASH_MASK 255
51#define FL_HASH(l) (ntohl(l)&FL_HASH_MASK)
52
53static atomic_t fl_size = ATOMIC_INIT(0);
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +000054static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -070055
56static void ip6_fl_gc(unsigned long dummy);
Ingo Molnar8d06afa2005-09-09 13:10:40 -070057static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -070058
59/* FL hash table lock: it protects only of GC */
60
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +000061static DEFINE_SPINLOCK(ip6_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070062
63/* Big socket sock */
64
65static DEFINE_RWLOCK(ip6_sk_fl_lock);
66
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +000067#define for_each_fl_rcu(hash, fl) \
68 for (fl = rcu_dereference(fl_ht[(hash)]); \
69 fl != NULL; \
70 fl = rcu_dereference(fl->next))
71#define for_each_fl_continue_rcu(fl) \
72 for (fl = rcu_dereference(fl->next); \
73 fl != NULL; \
74 fl = rcu_dereference(fl->next))
Linus Torvalds1da177e2005-04-16 15:20:36 -070075
Benjamin Thery60e8fbc2008-03-26 16:53:08 -070076static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
Linus Torvalds1da177e2005-04-16 15:20:36 -070077{
78 struct ip6_flowlabel *fl;
79
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +000080 for_each_fl_rcu(FL_HASH(label), fl) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -080081 if (fl->label == label && net_eq(fl->fl_net, net))
Linus Torvalds1da177e2005-04-16 15:20:36 -070082 return fl;
83 }
84 return NULL;
85}
86
Benjamin Thery60e8fbc2008-03-26 16:53:08 -070087static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
Linus Torvalds1da177e2005-04-16 15:20:36 -070088{
89 struct ip6_flowlabel *fl;
90
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +000091 rcu_read_lock_bh();
Benjamin Thery60e8fbc2008-03-26 16:53:08 -070092 fl = __fl_lookup(net, label);
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +000093 if (fl && !atomic_inc_not_zero(&fl->users))
94 fl = NULL;
95 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -070096 return fl;
97}
98
99
100static void fl_free(struct ip6_flowlabel *fl)
101{
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700102 if (fl) {
Dan Carpenter898132a2012-08-16 16:15:02 +0300103 if (fl->share == IPV6_FL_S_PROCESS)
104 put_pid(fl->owner.pid);
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700105 release_net(fl->fl_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106 kfree(fl->opt);
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000107 kfree_rcu(fl, rcu);
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700108 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109}
110
111static void fl_release(struct ip6_flowlabel *fl)
112{
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000113 spin_lock_bh(&ip6_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115 fl->lastuse = jiffies;
116 if (atomic_dec_and_test(&fl->users)) {
117 unsigned long ttd = fl->lastuse + fl->linger;
118 if (time_after(ttd, fl->expires))
119 fl->expires = ttd;
120 ttd = fl->expires;
121 if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
122 struct ipv6_txoptions *opt = fl->opt;
123 fl->opt = NULL;
124 kfree(opt);
125 }
126 if (!timer_pending(&ip6_fl_gc_timer) ||
127 time_after(ip6_fl_gc_timer.expires, ttd))
128 mod_timer(&ip6_fl_gc_timer, ttd);
129 }
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000130 spin_unlock_bh(&ip6_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700131}
132
133static void ip6_fl_gc(unsigned long dummy)
134{
135 int i;
136 unsigned long now = jiffies;
137 unsigned long sched = 0;
138
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000139 spin_lock(&ip6_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140
141 for (i=0; i<=FL_HASH_MASK; i++) {
142 struct ip6_flowlabel *fl, **flp;
143 flp = &fl_ht[i];
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000144 while ((fl = rcu_dereference_protected(*flp,
145 lockdep_is_held(&ip6_fl_lock))) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 if (atomic_read(&fl->users) == 0) {
147 unsigned long ttd = fl->lastuse + fl->linger;
148 if (time_after(ttd, fl->expires))
149 fl->expires = ttd;
150 ttd = fl->expires;
151 if (time_after_eq(now, ttd)) {
152 *flp = fl->next;
153 fl_free(fl);
154 atomic_dec(&fl_size);
155 continue;
156 }
157 if (!sched || time_before(ttd, sched))
158 sched = ttd;
159 }
160 flp = &fl->next;
161 }
162 }
163 if (!sched && atomic_read(&fl_size))
164 sched = now + FL_MAX_LINGER;
165 if (sched) {
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700166 mod_timer(&ip6_fl_gc_timer, sched);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 }
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000168 spin_unlock(&ip6_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169}
170
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000171static void __net_exit ip6_fl_purge(struct net *net)
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700172{
173 int i;
174
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000175 spin_lock(&ip6_fl_lock);
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700176 for (i = 0; i <= FL_HASH_MASK; i++) {
177 struct ip6_flowlabel *fl, **flp;
178 flp = &fl_ht[i];
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000179 while ((fl = rcu_dereference_protected(*flp,
180 lockdep_is_held(&ip6_fl_lock))) != NULL) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800181 if (net_eq(fl->fl_net, net) &&
182 atomic_read(&fl->users) == 0) {
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700183 *flp = fl->next;
184 fl_free(fl);
185 atomic_dec(&fl_size);
186 continue;
187 }
188 flp = &fl->next;
189 }
190 }
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000191 spin_unlock(&ip6_fl_lock);
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700192}
193
194static struct ip6_flowlabel *fl_intern(struct net *net,
195 struct ip6_flowlabel *fl, __be32 label)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196{
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700197 struct ip6_flowlabel *lfl;
198
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 fl->label = label & IPV6_FLOWLABEL_MASK;
200
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000201 spin_lock_bh(&ip6_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202 if (label == 0) {
203 for (;;) {
204 fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK;
205 if (fl->label) {
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700206 lfl = __fl_lookup(net, fl->label);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 if (lfl == NULL)
208 break;
209 }
210 }
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700211 } else {
212 /*
213 * we dropper the ip6_fl_lock, so this entry could reappear
214 * and we need to recheck with it.
215 *
216 * OTOH no need to search the active socket first, like it is
217 * done in ipv6_flowlabel_opt - sock is locked, so new entry
218 * with the same label can only appear on another sock
219 */
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700220 lfl = __fl_lookup(net, fl->label);
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700221 if (lfl != NULL) {
222 atomic_inc(&lfl->users);
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000223 spin_unlock_bh(&ip6_fl_lock);
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700224 return lfl;
225 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700226 }
227
228 fl->lastuse = jiffies;
229 fl->next = fl_ht[FL_HASH(fl->label)];
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000230 rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 atomic_inc(&fl_size);
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000232 spin_unlock_bh(&ip6_fl_lock);
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700233 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234}
235
236
237
238/* Socket flowlabel lists */
239
Al Viro90bcaf72006-11-08 00:25:17 -0800240struct ip6_flowlabel * fl6_sock_lookup(struct sock *sk, __be32 label)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241{
242 struct ipv6_fl_socklist *sfl;
243 struct ipv6_pinfo *np = inet6_sk(sk);
244
245 label &= IPV6_FLOWLABEL_MASK;
246
Pavel Emelyanovbd0bf572007-10-18 05:15:57 -0700247 read_lock_bh(&ip6_sk_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248 for (sfl=np->ipv6_fl_list; sfl; sfl = sfl->next) {
249 struct ip6_flowlabel *fl = sfl->fl;
250 if (fl->label == label) {
251 fl->lastuse = jiffies;
252 atomic_inc(&fl->users);
Pavel Emelyanov52f095e2007-10-18 05:38:48 -0700253 read_unlock_bh(&ip6_sk_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254 return fl;
255 }
256 }
Pavel Emelyanovbd0bf572007-10-18 05:15:57 -0700257 read_unlock_bh(&ip6_sk_fl_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 return NULL;
259}
260
Arnaldo Carvalho de Melo3cf3dc62005-12-13 23:23:20 -0800261EXPORT_SYMBOL_GPL(fl6_sock_lookup);
262
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263void fl6_free_socklist(struct sock *sk)
264{
265 struct ipv6_pinfo *np = inet6_sk(sk);
266 struct ipv6_fl_socklist *sfl;
267
YOSHIFUJI Hideaki / 吉藤英明f256dc52013-01-30 09:26:42 +0000268 if (!np->ipv6_fl_list)
269 return;
270
271 write_lock_bh(&ipv6_sk_fl_lock);
272 sfl = np->ipv6_fl_list;
273 np->ipv6_fl_list = NULL;
274 write_unlock_bh(&ipv6_sk_fl_lock);
275
276 while (sfl) {
277 struct ipv6_fl_socklist *next = sfl->next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278 fl_release(sfl->fl);
279 kfree(sfl);
YOSHIFUJI Hideaki / 吉藤英明f256dc52013-01-30 09:26:42 +0000280 sfl = next;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700281 }
282}
283
284/* Service routines */
285
286
287/*
288 It is the only difficult place. flowlabel enforces equal headers
289 before and including routing header, however user may supply options
290 following rthdr.
291 */
292
293struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
294 struct ip6_flowlabel * fl,
295 struct ipv6_txoptions * fopt)
296{
YOSHIFUJI Hideakidf9890c2005-11-20 12:23:18 +0900297 struct ipv6_txoptions * fl_opt = fl->opt;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900298
YOSHIFUJI Hideakidf9890c2005-11-20 12:23:18 +0900299 if (fopt == NULL || fopt->opt_flen == 0)
300 return fl_opt;
YOSHIFUJI Hideaki1ab14572007-02-09 23:24:49 +0900301
Linus Torvalds1da177e2005-04-16 15:20:36 -0700302 if (fl_opt != NULL) {
303 opt_space->hopopt = fl_opt->hopopt;
YOSHIFUJI Hideakidf9890c2005-11-20 12:23:18 +0900304 opt_space->dst0opt = fl_opt->dst0opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305 opt_space->srcrt = fl_opt->srcrt;
306 opt_space->opt_nflen = fl_opt->opt_nflen;
307 } else {
308 if (fopt->opt_nflen == 0)
309 return fopt;
310 opt_space->hopopt = NULL;
311 opt_space->dst0opt = NULL;
312 opt_space->srcrt = NULL;
313 opt_space->opt_nflen = 0;
314 }
315 opt_space->dst1opt = fopt->dst1opt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316 opt_space->opt_flen = fopt->opt_flen;
317 return opt_space;
318}
Chris Elstona495f832012-04-29 21:48:53 +0000319EXPORT_SYMBOL_GPL(fl6_merge_options);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320
321static unsigned long check_linger(unsigned long ttl)
322{
323 if (ttl < FL_MIN_LINGER)
324 return FL_MIN_LINGER*HZ;
325 if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
326 return 0;
327 return ttl*HZ;
328}
329
330static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
331{
332 linger = check_linger(linger);
333 if (!linger)
334 return -EPERM;
335 expires = check_linger(expires);
336 if (!expires)
337 return -EPERM;
338 fl->lastuse = jiffies;
339 if (time_before(fl->linger, linger))
340 fl->linger = linger;
341 if (time_before(expires, fl->linger))
342 expires = fl->linger;
343 if (time_before(fl->expires, fl->lastuse + expires))
344 fl->expires = fl->lastuse + expires;
345 return 0;
346}
347
348static struct ip6_flowlabel *
Maciej Żenczykowskiec0506d2011-08-28 12:35:31 +0000349fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
350 char __user *optval, int optlen, int *err_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351{
David S. Miller684de402009-02-06 00:49:55 -0800352 struct ip6_flowlabel *fl = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 int olen;
354 int addr_type;
355 int err;
356
David S. Miller684de402009-02-06 00:49:55 -0800357 olen = optlen - CMSG_ALIGN(sizeof(*freq));
358 err = -EINVAL;
359 if (olen > 64 * 1024)
360 goto done;
361
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 err = -ENOMEM;
Ingo Oeser0c600ed2006-03-20 23:01:32 -0800363 fl = kzalloc(sizeof(*fl), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 if (fl == NULL)
365 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 if (olen > 0) {
368 struct msghdr msg;
David S. Miller4c9483b2011-03-12 16:22:43 -0500369 struct flowi6 flowi6;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700370 int junk;
371
372 err = -ENOMEM;
373 fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
374 if (fl->opt == NULL)
375 goto done;
376
377 memset(fl->opt, 0, sizeof(*fl->opt));
378 fl->opt->tot_len = sizeof(*fl->opt) + olen;
379 err = -EFAULT;
380 if (copy_from_user(fl->opt+1, optval+CMSG_ALIGN(sizeof(*freq)), olen))
381 goto done;
382
383 msg.msg_controllen = olen;
384 msg.msg_control = (void*)(fl->opt+1);
David S. Miller4c9483b2011-03-12 16:22:43 -0500385 memset(&flowi6, 0, sizeof(flowi6));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386
Maciej Żenczykowskiec0506d2011-08-28 12:35:31 +0000387 err = datagram_send_ctl(net, sk, &msg, &flowi6, fl->opt, &junk,
Brian Haley13b52cd2010-04-23 11:26:08 +0000388 &junk, &junk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 if (err)
390 goto done;
391 err = -EINVAL;
392 if (fl->opt->opt_flen)
393 goto done;
394 if (fl->opt->opt_nflen == 0) {
395 kfree(fl->opt);
396 fl->opt = NULL;
397 }
398 }
399
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700400 fl->fl_net = hold_net(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 fl->expires = jiffies;
402 err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
403 if (err)
404 goto done;
405 fl->share = freq->flr_share;
406 addr_type = ipv6_addr_type(&freq->flr_dst);
Joe Perches35700212009-11-24 14:52:52 -0800407 if ((addr_type & IPV6_ADDR_MAPPED) ||
408 addr_type == IPV6_ADDR_ANY) {
James Morrisc6817e42006-10-30 18:56:06 -0800409 err = -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410 goto done;
James Morrisc6817e42006-10-30 18:56:06 -0800411 }
Alexey Dobriyan4e3fd7a2011-11-21 03:39:03 +0000412 fl->dst = freq->flr_dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413 atomic_set(&fl->users, 1);
414 switch (fl->share) {
415 case IPV6_FL_S_EXCL:
416 case IPV6_FL_S_ANY:
417 break;
418 case IPV6_FL_S_PROCESS:
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600419 fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420 break;
421 case IPV6_FL_S_USER:
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600422 fl->owner.uid = current_euid();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700423 break;
424 default:
425 err = -EINVAL;
426 goto done;
427 }
428 return fl;
429
430done:
431 fl_free(fl);
432 *err_p = err;
433 return NULL;
434}
435
436static int mem_check(struct sock *sk)
437{
438 struct ipv6_pinfo *np = inet6_sk(sk);
439 struct ipv6_fl_socklist *sfl;
440 int room = FL_MAX_SIZE - atomic_read(&fl_size);
441 int count = 0;
442
443 if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
444 return 0;
445
446 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next)
447 count++;
448
449 if (room <= 0 ||
450 ((count >= FL_MAX_PER_SOCK ||
Joe Perches35700212009-11-24 14:52:52 -0800451 (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
452 !capable(CAP_NET_ADMIN)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453 return -ENOBUFS;
454
455 return 0;
456}
457
Eric Dumazeta50feda2012-05-18 18:57:34 +0000458static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459{
460 if (h1 == h2)
Eric Dumazeta50feda2012-05-18 18:57:34 +0000461 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 if (h1 == NULL || h2 == NULL)
Eric Dumazeta50feda2012-05-18 18:57:34 +0000463 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 if (h1->hdrlen != h2->hdrlen)
Eric Dumazeta50feda2012-05-18 18:57:34 +0000465 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1));
467}
468
Eric Dumazeta50feda2012-05-18 18:57:34 +0000469static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470{
471 if (o1 == o2)
Eric Dumazeta50feda2012-05-18 18:57:34 +0000472 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 if (o1 == NULL || o2 == NULL)
Eric Dumazeta50feda2012-05-18 18:57:34 +0000474 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700475 if (o1->opt_nflen != o2->opt_nflen)
Eric Dumazeta50feda2012-05-18 18:57:34 +0000476 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477 if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000478 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700479 if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000480 return true;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700481 if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt))
Eric Dumazeta50feda2012-05-18 18:57:34 +0000482 return true;
483 return false;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700484}
485
Pavel Emelyanov04028042007-10-18 05:14:58 -0700486static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
487 struct ip6_flowlabel *fl)
488{
489 write_lock_bh(&ip6_sk_fl_lock);
490 sfl->fl = fl;
491 sfl->next = np->ipv6_fl_list;
492 np->ipv6_fl_list = sfl;
493 write_unlock_bh(&ip6_sk_fl_lock);
494}
495
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)
497{
Ingo Molnar55205d42008-11-25 16:50:30 -0800498 int uninitialized_var(err);
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700499 struct net *net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500 struct ipv6_pinfo *np = inet6_sk(sk);
501 struct in6_flowlabel_req freq;
502 struct ipv6_fl_socklist *sfl1=NULL;
503 struct ipv6_fl_socklist *sfl, **sflp;
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700504 struct ip6_flowlabel *fl, *fl1 = NULL;
505
Linus Torvalds1da177e2005-04-16 15:20:36 -0700506
507 if (optlen < sizeof(freq))
508 return -EINVAL;
509
510 if (copy_from_user(&freq, optval, sizeof(freq)))
511 return -EFAULT;
512
513 switch (freq.flr_action) {
514 case IPV6_FL_A_PUT:
515 write_lock_bh(&ip6_sk_fl_lock);
516 for (sflp = &np->ipv6_fl_list; (sfl=*sflp)!=NULL; sflp = &sfl->next) {
517 if (sfl->fl->label == freq.flr_label) {
518 if (freq.flr_label == (np->flow_label&IPV6_FLOWLABEL_MASK))
519 np->flow_label &= ~IPV6_FLOWLABEL_MASK;
520 *sflp = sfl->next;
521 write_unlock_bh(&ip6_sk_fl_lock);
522 fl_release(sfl->fl);
523 kfree(sfl);
524 return 0;
525 }
526 }
527 write_unlock_bh(&ip6_sk_fl_lock);
528 return -ESRCH;
529
530 case IPV6_FL_A_RENEW:
531 read_lock_bh(&ip6_sk_fl_lock);
532 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
533 if (sfl->fl->label == freq.flr_label) {
534 err = fl6_renew(sfl->fl, freq.flr_linger, freq.flr_expires);
535 read_unlock_bh(&ip6_sk_fl_lock);
536 return err;
537 }
538 }
539 read_unlock_bh(&ip6_sk_fl_lock);
540
Eric W. Biedermanaf31f412012-11-16 03:03:06 +0000541 if (freq.flr_share == IPV6_FL_S_NONE &&
542 ns_capable(net->user_ns, CAP_NET_ADMIN)) {
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700543 fl = fl_lookup(net, freq.flr_label);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700544 if (fl) {
545 err = fl6_renew(fl, freq.flr_linger, freq.flr_expires);
546 fl_release(fl);
547 return err;
548 }
549 }
550 return -ESRCH;
551
552 case IPV6_FL_A_GET:
553 if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)
554 return -EINVAL;
555
Maciej Żenczykowskiec0506d2011-08-28 12:35:31 +0000556 fl = fl_create(net, sk, &freq, optval, optlen, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700557 if (fl == NULL)
558 return err;
559 sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
560
561 if (freq.flr_label) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700562 err = -EEXIST;
563 read_lock_bh(&ip6_sk_fl_lock);
564 for (sfl = np->ipv6_fl_list; sfl; sfl = sfl->next) {
565 if (sfl->fl->label == freq.flr_label) {
566 if (freq.flr_flags&IPV6_FL_F_EXCL) {
567 read_unlock_bh(&ip6_sk_fl_lock);
568 goto done;
569 }
570 fl1 = sfl->fl;
Yan Zheng4ea6a802005-10-24 19:55:23 +0800571 atomic_inc(&fl1->users);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 break;
573 }
574 }
575 read_unlock_bh(&ip6_sk_fl_lock);
576
577 if (fl1 == NULL)
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700578 fl1 = fl_lookup(net, freq.flr_label);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 if (fl1) {
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700580recheck:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 err = -EEXIST;
582 if (freq.flr_flags&IPV6_FL_F_EXCL)
583 goto release;
584 err = -EPERM;
585 if (fl1->share == IPV6_FL_S_EXCL ||
586 fl1->share != fl->share ||
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600587 ((fl1->share == IPV6_FL_S_PROCESS) &&
588 (fl1->owner.pid == fl->owner.pid)) ||
589 ((fl1->share == IPV6_FL_S_USER) &&
590 uid_eq(fl1->owner.uid, fl->owner.uid)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591 goto release;
592
593 err = -EINVAL;
594 if (!ipv6_addr_equal(&fl1->dst, &fl->dst) ||
595 ipv6_opt_cmp(fl1->opt, fl->opt))
596 goto release;
597
598 err = -ENOMEM;
599 if (sfl1 == NULL)
600 goto release;
601 if (fl->linger > fl1->linger)
602 fl1->linger = fl->linger;
603 if ((long)(fl->expires - fl1->expires) > 0)
604 fl1->expires = fl->expires;
Pavel Emelyanov04028042007-10-18 05:14:58 -0700605 fl_link(np, sfl1, fl1);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606 fl_free(fl);
607 return 0;
608
609release:
610 fl_release(fl1);
611 goto done;
612 }
613 }
614 err = -ENOENT;
615 if (!(freq.flr_flags&IPV6_FL_F_CREATE))
616 goto done;
617
618 err = -ENOMEM;
619 if (sfl1 == NULL || (err = mem_check(sk)) != 0)
620 goto done;
621
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700622 fl1 = fl_intern(net, fl, freq.flr_label);
Pavel Emelyanov78c2e502007-10-18 05:18:56 -0700623 if (fl1 != NULL)
624 goto recheck;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700625
David S. Miller6c94d362005-05-29 20:28:01 -0700626 if (!freq.flr_label) {
627 if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label,
628 &fl->label, sizeof(fl->label))) {
629 /* Intentionally ignore fault. */
630 }
631 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
Pavel Emelyanov04028042007-10-18 05:14:58 -0700633 fl_link(np, sfl1, fl);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700634 return 0;
635
636 default:
637 return -EINVAL;
638 }
639
640done:
641 fl_free(fl);
642 kfree(sfl1);
643 return err;
644}
645
646#ifdef CONFIG_PROC_FS
647
648struct ip6fl_iter_state {
Benjamin Thery5983a3d2008-03-26 16:53:30 -0700649 struct seq_net_private p;
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600650 struct pid_namespace *pid_ns;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651 int bucket;
652};
653
654#define ip6fl_seq_private(seq) ((struct ip6fl_iter_state *)(seq)->private)
655
656static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
657{
658 struct ip6_flowlabel *fl = NULL;
659 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
Benjamin Thery5983a3d2008-03-26 16:53:30 -0700660 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700661
662 for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000663 for_each_fl_rcu(state->bucket, fl) {
664 if (net_eq(fl->fl_net, net))
665 goto out;
666 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667 }
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000668 fl = NULL;
669out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700670 return fl;
671}
672
673static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
674{
675 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
Benjamin Thery5983a3d2008-03-26 16:53:30 -0700676 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000678 for_each_fl_continue_rcu(fl) {
679 if (net_eq(fl->fl_net, net))
680 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 }
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000682
683try_again:
684 if (++state->bucket <= FL_HASH_MASK) {
685 for_each_fl_rcu(state->bucket, fl) {
686 if (net_eq(fl->fl_net, net))
687 goto out;
688 }
689 goto try_again;
690 }
691 fl = NULL;
692
693out:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 return fl;
695}
696
697static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
698{
699 struct ip6_flowlabel *fl = ip6fl_get_first(seq);
700 if (fl)
701 while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
702 --pos;
703 return pos ? NULL : fl;
704}
705
706static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000707 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700708{
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000709 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
711}
712
713static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
714{
715 struct ip6_flowlabel *fl;
716
717 if (v == SEQ_START_TOKEN)
718 fl = ip6fl_get_first(seq);
719 else
720 fl = ip6fl_get_next(seq, v);
721 ++*pos;
722 return fl;
723}
724
725static void ip6fl_seq_stop(struct seq_file *seq, void *v)
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000726 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727{
YOSHIFUJI Hideaki / 吉藤英明d3aedd52013-01-30 09:27:47 +0000728 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729}
730
James Morris1b7c2db2006-10-31 00:43:44 -0800731static int ip6fl_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732{
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600733 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
James Morris1b7c2db2006-10-31 00:43:44 -0800734 if (v == SEQ_START_TOKEN)
735 seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n",
736 "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt");
737 else {
738 struct ip6_flowlabel *fl = v;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 seq_printf(seq,
Harvey Harrison4b7a4272008-10-29 12:50:24 -0700740 "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
Eric Dumazet95c96172012-04-15 05:58:06 +0000741 (unsigned int)ntohl(fl->label),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700742 fl->share,
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600743 ((fl->share == IPV6_FL_S_PROCESS) ?
744 pid_nr_ns(fl->owner.pid, state->pid_ns) :
745 ((fl->share == IPV6_FL_S_USER) ?
746 from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
747 0)),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 atomic_read(&fl->users),
749 fl->linger/HZ,
750 (long)(fl->expires - jiffies)/HZ,
Harvey Harrisonb0711952008-10-28 16:05:40 -0700751 &fl->dst,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 fl->opt ? fl->opt->opt_nflen : 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 return 0;
755}
756
Philippe De Muyter56b3d972007-07-10 23:07:31 -0700757static const struct seq_operations ip6fl_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 .start = ip6fl_seq_start,
759 .next = ip6fl_seq_next,
760 .stop = ip6fl_seq_stop,
761 .show = ip6fl_seq_show,
762};
763
764static int ip6fl_seq_open(struct inode *inode, struct file *file)
765{
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600766 struct seq_file *seq;
767 struct ip6fl_iter_state *state;
768 int err;
769
770 err = seq_open_net(inode, file, &ip6fl_seq_ops,
771 sizeof(struct ip6fl_iter_state));
772
773 if (!err) {
774 seq = file->private_data;
775 state = ip6fl_seq_private(seq);
776 rcu_read_lock();
777 state->pid_ns = get_pid_ns(task_active_pid_ns(current));
778 rcu_read_unlock();
779 }
780 return err;
781}
782
783static int ip6fl_seq_release(struct inode *inode, struct file *file)
784{
785 struct seq_file *seq = file->private_data;
786 struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
787 put_pid_ns(state->pid_ns);
788 return seq_release_net(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
Arjan van de Ven9a321442007-02-12 00:55:35 -0800791static const struct file_operations ip6fl_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 .owner = THIS_MODULE,
793 .open = ip6fl_seq_open,
794 .read = seq_read,
795 .llseek = seq_lseek,
Eric W. Biederman4f82f452012-05-24 10:37:59 -0600796 .release = ip6fl_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797};
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000799static int __net_init ip6_flowlabel_proc_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800{
Benjamin Thery5983a3d2008-03-26 16:53:30 -0700801 if (!proc_net_fops_create(net, "ip6_flowlabel",
802 S_IRUGO, &ip6fl_seq_fops))
Daniel Lezcano0a3e78a2007-12-11 02:23:18 -0800803 return -ENOMEM;
804 return 0;
805}
806
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000807static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
Daniel Lezcano0a3e78a2007-12-11 02:23:18 -0800808{
809 proc_net_remove(net, "ip6_flowlabel");
810}
811#else
812static inline int ip6_flowlabel_proc_init(struct net *net)
813{
814 return 0;
815}
816static inline void ip6_flowlabel_proc_fini(struct net *net)
817{
Daniel Lezcano0a3e78a2007-12-11 02:23:18 -0800818}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819#endif
Daniel Lezcano0a3e78a2007-12-11 02:23:18 -0800820
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +0000821static void __net_exit ip6_flowlabel_net_exit(struct net *net)
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700822{
823 ip6_fl_purge(net);
Benjamin Thery5983a3d2008-03-26 16:53:30 -0700824 ip6_flowlabel_proc_fini(net);
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700825}
826
827static struct pernet_operations ip6_flowlabel_net_ops = {
Benjamin Thery5983a3d2008-03-26 16:53:30 -0700828 .init = ip6_flowlabel_proc_init,
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700829 .exit = ip6_flowlabel_net_exit,
830};
831
Daniel Lezcano0a3e78a2007-12-11 02:23:18 -0800832int ip6_flowlabel_init(void)
833{
Benjamin Thery5983a3d2008-03-26 16:53:30 -0700834 return register_pernet_subsys(&ip6_flowlabel_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835}
836
837void ip6_flowlabel_cleanup(void)
838{
839 del_timer(&ip6_fl_gc_timer);
Benjamin Thery60e8fbc2008-03-26 16:53:08 -0700840 unregister_pernet_subsys(&ip6_flowlabel_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700841}