blob: 5dfdad5cbcd43ae66ed97badbc58fa7db5c995e6 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070046#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
48#include "fib_lookup.h"
49
50#define FSprintk(a...)
51
Stephen Hemminger832b4c52006-08-29 16:48:09 -070052static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070053static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash;
55static unsigned int fib_hash_size;
56static unsigned int fib_info_cnt;
57
58#define DEVINDEX_HASHBITS 8
59#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61
62#ifdef CONFIG_IP_ROUTE_MULTIPATH
63
64static DEFINE_SPINLOCK(fib_multipath_lock);
65
66#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68
69#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71
72#else /* CONFIG_IP_ROUTE_MULTIPATH */
73
74/* Hope, that gcc will optimize it to get rid of dummy loop */
75
76#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77for (nhsel=0; nhsel < 1; nhsel++)
78
79#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80for (nhsel=0; nhsel < 1; nhsel++)
81
82#endif /* CONFIG_IP_ROUTE_MULTIPATH */
83
84#define endfor_nexthops(fi) }
85
86
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -080087static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070088{
89 int error;
90 u8 scope;
91} fib_props[RTA_MAX + 1] = {
92 {
93 .error = 0,
94 .scope = RT_SCOPE_NOWHERE,
95 }, /* RTN_UNSPEC */
96 {
97 .error = 0,
98 .scope = RT_SCOPE_UNIVERSE,
99 }, /* RTN_UNICAST */
100 {
101 .error = 0,
102 .scope = RT_SCOPE_HOST,
103 }, /* RTN_LOCAL */
104 {
105 .error = 0,
106 .scope = RT_SCOPE_LINK,
107 }, /* RTN_BROADCAST */
108 {
109 .error = 0,
110 .scope = RT_SCOPE_LINK,
111 }, /* RTN_ANYCAST */
112 {
113 .error = 0,
114 .scope = RT_SCOPE_UNIVERSE,
115 }, /* RTN_MULTICAST */
116 {
117 .error = -EINVAL,
118 .scope = RT_SCOPE_UNIVERSE,
119 }, /* RTN_BLACKHOLE */
120 {
121 .error = -EHOSTUNREACH,
122 .scope = RT_SCOPE_UNIVERSE,
123 }, /* RTN_UNREACHABLE */
124 {
125 .error = -EACCES,
126 .scope = RT_SCOPE_UNIVERSE,
127 }, /* RTN_PROHIBIT */
128 {
129 .error = -EAGAIN,
130 .scope = RT_SCOPE_UNIVERSE,
131 }, /* RTN_THROW */
132 {
133 .error = -EINVAL,
134 .scope = RT_SCOPE_NOWHERE,
135 }, /* RTN_NAT */
136 {
137 .error = -EINVAL,
138 .scope = RT_SCOPE_NOWHERE,
139 }, /* RTN_XRESOLVE */
140};
141
142
143/* Release a nexthop info record */
144
145void free_fib_info(struct fib_info *fi)
146{
147 if (fi->fib_dead == 0) {
148 printk("Freeing alive fib_info %p\n", fi);
149 return;
150 }
151 change_nexthops(fi) {
152 if (nh->nh_dev)
153 dev_put(nh->nh_dev);
154 nh->nh_dev = NULL;
155 } endfor_nexthops(fi);
156 fib_info_cnt--;
157 kfree(fi);
158}
159
160void fib_release_info(struct fib_info *fi)
161{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700162 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 if (fi && --fi->fib_treeref == 0) {
164 hlist_del(&fi->fib_hash);
165 if (fi->fib_prefsrc)
166 hlist_del(&fi->fib_lhash);
167 change_nexthops(fi) {
168 if (!nh->nh_dev)
169 continue;
170 hlist_del(&nh->nh_hash);
171 } endfor_nexthops(fi)
172 fi->fib_dead = 1;
173 fib_info_put(fi);
174 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700175 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176}
177
178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179{
180 const struct fib_nh *onh = ofi->fib_nh;
181
182 for_nexthops(fi) {
183 if (nh->nh_oif != onh->nh_oif ||
184 nh->nh_gw != onh->nh_gw ||
185 nh->nh_scope != onh->nh_scope ||
186#ifdef CONFIG_IP_ROUTE_MULTIPATH
187 nh->nh_weight != onh->nh_weight ||
188#endif
189#ifdef CONFIG_NET_CLS_ROUTE
190 nh->nh_tclassid != onh->nh_tclassid ||
191#endif
192 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 return -1;
194 onh++;
195 } endfor_nexthops(fi);
196 return 0;
197}
198
199static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200{
201 unsigned int mask = (fib_hash_size - 1);
202 unsigned int val = fi->fib_nhs;
203
204 val ^= fi->fib_protocol;
205 val ^= fi->fib_prefsrc;
206 val ^= fi->fib_priority;
207
208 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209}
210
211static struct fib_info *fib_find_info(const struct fib_info *nfi)
212{
213 struct hlist_head *head;
214 struct hlist_node *node;
215 struct fib_info *fi;
216 unsigned int hash;
217
218 hash = fib_info_hashfn(nfi);
219 head = &fib_info_hash[hash];
220
221 hlist_for_each_entry(fi, node, head, fib_hash) {
222 if (fi->fib_nhs != nfi->fib_nhs)
223 continue;
224 if (nfi->fib_protocol == fi->fib_protocol &&
225 nfi->fib_prefsrc == fi->fib_prefsrc &&
226 nfi->fib_priority == fi->fib_priority &&
227 memcmp(nfi->fib_metrics, fi->fib_metrics,
228 sizeof(fi->fib_metrics)) == 0 &&
229 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231 return fi;
232 }
233
234 return NULL;
235}
236
237static inline unsigned int fib_devindex_hashfn(unsigned int val)
238{
239 unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241 return (val ^
242 (val >> DEVINDEX_HASHBITS) ^
243 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244}
245
246/* Check, that the gateway is already configured.
247 Used only by redirect accept routine.
248 */
249
250int ip_fib_check_default(u32 gw, struct net_device *dev)
251{
252 struct hlist_head *head;
253 struct hlist_node *node;
254 struct fib_nh *nh;
255 unsigned int hash;
256
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700257 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259 hash = fib_devindex_hashfn(dev->ifindex);
260 head = &fib_info_devhash[hash];
261 hlist_for_each_entry(nh, node, head, nh_hash) {
262 if (nh->nh_dev == dev &&
263 nh->nh_gw == gw &&
264 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700265 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 return 0;
267 }
268 }
269
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700270 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
272 return -1;
273}
274
275void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
Patrick McHardy2dfe55b2006-08-10 23:08:33 -0700276 int z, u32 tb_id,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700277 struct nlmsghdr *n, struct netlink_skb_parms *req)
278{
279 struct sk_buff *skb;
Jamal Hadi Salim9ed19f32005-06-18 22:55:51 -0700280 u32 pid = req ? req->pid : n->nlmsg_pid;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700281 int payload = sizeof(struct rtmsg) + 256;
282 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283
Thomas Graff21c7bc2006-08-15 00:34:17 -0700284 skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
285 if (skb == NULL)
286 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700287
Thomas Graff21c7bc2006-08-15 00:34:17 -0700288 err = fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
289 fa->fa_type, fa->fa_scope, &key, z, fa->fa_tos,
290 fa->fa_info, 0);
291 if (err < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 kfree_skb(skb);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700293 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 }
Thomas Graff21c7bc2006-08-15 00:34:17 -0700295
296 err = rtnl_notify(skb, pid, RTNLGRP_IPV4_ROUTE, n, GFP_KERNEL);
297errout:
298 if (err < 0)
299 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700300}
301
302/* Return the first fib alias matching TOS with
303 * priority less than or equal to PRIO.
304 */
305struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
306{
307 if (fah) {
308 struct fib_alias *fa;
309 list_for_each_entry(fa, fah, fa_list) {
310 if (fa->fa_tos > tos)
311 continue;
312 if (fa->fa_info->fib_priority >= prio ||
313 fa->fa_tos < tos)
314 return fa;
315 }
316 }
317 return NULL;
318}
319
320int fib_detect_death(struct fib_info *fi, int order,
321 struct fib_info **last_resort, int *last_idx, int *dflt)
322{
323 struct neighbour *n;
324 int state = NUD_NONE;
325
326 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
327 if (n) {
328 state = n->nud_state;
329 neigh_release(n);
330 }
331 if (state==NUD_REACHABLE)
332 return 0;
333 if ((state&NUD_VALID) && order != *dflt)
334 return 0;
335 if ((state&NUD_VALID) ||
336 (*last_idx<0 && order > *dflt)) {
337 *last_resort = fi;
338 *last_idx = order;
339 }
340 return 1;
341}
342
343#ifdef CONFIG_IP_ROUTE_MULTIPATH
344
345static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
346{
347 while (RTA_OK(attr,attrlen)) {
348 if (attr->rta_type == type)
349 return *(u32*)RTA_DATA(attr);
350 attr = RTA_NEXT(attr, attrlen);
351 }
352 return 0;
353}
354
355static int
356fib_count_nexthops(struct rtattr *rta)
357{
358 int nhs = 0;
359 struct rtnexthop *nhp = RTA_DATA(rta);
360 int nhlen = RTA_PAYLOAD(rta);
361
362 while (nhlen >= (int)sizeof(struct rtnexthop)) {
363 if ((nhlen -= nhp->rtnh_len) < 0)
364 return 0;
365 nhs++;
366 nhp = RTNH_NEXT(nhp);
367 };
368 return nhs;
369}
370
371static int
372fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
373{
374 struct rtnexthop *nhp = RTA_DATA(rta);
375 int nhlen = RTA_PAYLOAD(rta);
376
377 change_nexthops(fi) {
378 int attrlen = nhlen - sizeof(struct rtnexthop);
379 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
380 return -EINVAL;
381 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
382 nh->nh_oif = nhp->rtnh_ifindex;
383 nh->nh_weight = nhp->rtnh_hops + 1;
384 if (attrlen) {
385 nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
386#ifdef CONFIG_NET_CLS_ROUTE
387 nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
388#endif
389 }
390 nhp = RTNH_NEXT(nhp);
391 } endfor_nexthops(fi);
392 return 0;
393}
394
395#endif
396
397int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
398 struct fib_info *fi)
399{
400#ifdef CONFIG_IP_ROUTE_MULTIPATH
401 struct rtnexthop *nhp;
402 int nhlen;
403#endif
404
405 if (rta->rta_priority &&
406 *rta->rta_priority != fi->fib_priority)
407 return 1;
408
409 if (rta->rta_oif || rta->rta_gw) {
410 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
411 (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
412 return 0;
413 return 1;
414 }
415
416#ifdef CONFIG_IP_ROUTE_MULTIPATH
417 if (rta->rta_mp == NULL)
418 return 0;
419 nhp = RTA_DATA(rta->rta_mp);
420 nhlen = RTA_PAYLOAD(rta->rta_mp);
421
422 for_nexthops(fi) {
423 int attrlen = nhlen - sizeof(struct rtnexthop);
424 u32 gw;
425
426 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
427 return -EINVAL;
428 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
429 return 1;
430 if (attrlen) {
431 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
432 if (gw && gw != nh->nh_gw)
433 return 1;
434#ifdef CONFIG_NET_CLS_ROUTE
435 gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
436 if (gw && gw != nh->nh_tclassid)
437 return 1;
438#endif
439 }
440 nhp = RTNH_NEXT(nhp);
441 } endfor_nexthops(fi);
442#endif
443 return 0;
444}
445
446
447/*
448 Picture
449 -------
450
451 Semantics of nexthop is very messy by historical reasons.
452 We have to take into account, that:
453 a) gateway can be actually local interface address,
454 so that gatewayed route is direct.
455 b) gateway must be on-link address, possibly
456 described not by an ifaddr, but also by a direct route.
457 c) If both gateway and interface are specified, they should not
458 contradict.
459 d) If we use tunnel routes, gateway could be not on-link.
460
461 Attempt to reconcile all of these (alas, self-contradictory) conditions
462 results in pretty ugly and hairy code with obscure logic.
463
464 I chose to generalized it instead, so that the size
465 of code does not increase practically, but it becomes
466 much more general.
467 Every prefix is assigned a "scope" value: "host" is local address,
468 "link" is direct route,
469 [ ... "site" ... "interior" ... ]
470 and "universe" is true gateway route with global meaning.
471
472 Every prefix refers to a set of "nexthop"s (gw, oif),
473 where gw must have narrower scope. This recursion stops
474 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
475 which means that gw is forced to be on link.
476
477 Code is still hairy, but now it is apparently logically
478 consistent and very flexible. F.e. as by-product it allows
479 to co-exists in peace independent exterior and interior
480 routing processes.
481
482 Normally it looks as following.
483
484 {universe prefix} -> (gw, oif) [scope link]
485 |
486 |-> {link prefix} -> (gw, oif) [scope local]
487 |
488 |-> {local prefix} (terminal node)
489 */
490
491static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
492{
493 int err;
494
495 if (nh->nh_gw) {
496 struct fib_result res;
497
498#ifdef CONFIG_IP_ROUTE_PERVASIVE
499 if (nh->nh_flags&RTNH_F_PERVASIVE)
500 return 0;
501#endif
502 if (nh->nh_flags&RTNH_F_ONLINK) {
503 struct net_device *dev;
504
505 if (r->rtm_scope >= RT_SCOPE_LINK)
506 return -EINVAL;
507 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508 return -EINVAL;
509 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
510 return -ENODEV;
511 if (!(dev->flags&IFF_UP))
512 return -ENETDOWN;
513 nh->nh_dev = dev;
514 dev_hold(dev);
515 nh->nh_scope = RT_SCOPE_LINK;
516 return 0;
517 }
518 {
519 struct flowi fl = { .nl_u = { .ip4_u =
520 { .daddr = nh->nh_gw,
521 .scope = r->rtm_scope + 1 } },
522 .oif = nh->nh_oif };
523
524 /* It is not necessary, but requires a bit of thinking */
525 if (fl.fl4_scope < RT_SCOPE_LINK)
526 fl.fl4_scope = RT_SCOPE_LINK;
527 if ((err = fib_lookup(&fl, &res)) != 0)
528 return err;
529 }
530 err = -EINVAL;
531 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
532 goto out;
533 nh->nh_scope = res.scope;
534 nh->nh_oif = FIB_RES_OIF(res);
535 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
536 goto out;
537 dev_hold(nh->nh_dev);
538 err = -ENETDOWN;
539 if (!(nh->nh_dev->flags & IFF_UP))
540 goto out;
541 err = 0;
542out:
543 fib_res_put(&res);
544 return err;
545 } else {
546 struct in_device *in_dev;
547
548 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
549 return -EINVAL;
550
551 in_dev = inetdev_by_index(nh->nh_oif);
552 if (in_dev == NULL)
553 return -ENODEV;
554 if (!(in_dev->dev->flags&IFF_UP)) {
555 in_dev_put(in_dev);
556 return -ENETDOWN;
557 }
558 nh->nh_dev = in_dev->dev;
559 dev_hold(nh->nh_dev);
560 nh->nh_scope = RT_SCOPE_HOST;
561 in_dev_put(in_dev);
562 }
563 return 0;
564}
565
566static inline unsigned int fib_laddr_hashfn(u32 val)
567{
568 unsigned int mask = (fib_hash_size - 1);
569
570 return (val ^ (val >> 7) ^ (val >> 14)) & mask;
571}
572
573static struct hlist_head *fib_hash_alloc(int bytes)
574{
575 if (bytes <= PAGE_SIZE)
576 return kmalloc(bytes, GFP_KERNEL);
577 else
578 return (struct hlist_head *)
579 __get_free_pages(GFP_KERNEL, get_order(bytes));
580}
581
582static void fib_hash_free(struct hlist_head *hash, int bytes)
583{
584 if (!hash)
585 return;
586
587 if (bytes <= PAGE_SIZE)
588 kfree(hash);
589 else
590 free_pages((unsigned long) hash, get_order(bytes));
591}
592
593static void fib_hash_move(struct hlist_head *new_info_hash,
594 struct hlist_head *new_laddrhash,
595 unsigned int new_size)
596{
David S. Millerb7656e72005-08-05 04:12:48 -0700597 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700599 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700601 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700602 old_info_hash = fib_info_hash;
603 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700604 fib_hash_size = new_size;
605
606 for (i = 0; i < old_size; i++) {
607 struct hlist_head *head = &fib_info_hash[i];
608 struct hlist_node *node, *n;
609 struct fib_info *fi;
610
611 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
612 struct hlist_head *dest;
613 unsigned int new_hash;
614
615 hlist_del(&fi->fib_hash);
616
617 new_hash = fib_info_hashfn(fi);
618 dest = &new_info_hash[new_hash];
619 hlist_add_head(&fi->fib_hash, dest);
620 }
621 }
622 fib_info_hash = new_info_hash;
623
624 for (i = 0; i < old_size; i++) {
625 struct hlist_head *lhead = &fib_info_laddrhash[i];
626 struct hlist_node *node, *n;
627 struct fib_info *fi;
628
629 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
630 struct hlist_head *ldest;
631 unsigned int new_hash;
632
633 hlist_del(&fi->fib_lhash);
634
635 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
636 ldest = &new_laddrhash[new_hash];
637 hlist_add_head(&fi->fib_lhash, ldest);
638 }
639 }
640 fib_info_laddrhash = new_laddrhash;
641
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700642 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700643
644 bytes = old_size * sizeof(struct hlist_head *);
645 fib_hash_free(old_info_hash, bytes);
646 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700647}
648
649struct fib_info *
650fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
651 const struct nlmsghdr *nlh, int *errp)
652{
653 int err;
654 struct fib_info *fi = NULL;
655 struct fib_info *ofi;
656#ifdef CONFIG_IP_ROUTE_MULTIPATH
657 int nhs = 1;
658#else
659 const int nhs = 1;
660#endif
661#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
662 u32 mp_alg = IP_MP_ALG_NONE;
663#endif
664
665 /* Fast check to catch the most weird cases */
666 if (fib_props[r->rtm_type].scope > r->rtm_scope)
667 goto err_inval;
668
669#ifdef CONFIG_IP_ROUTE_MULTIPATH
670 if (rta->rta_mp) {
671 nhs = fib_count_nexthops(rta->rta_mp);
672 if (nhs == 0)
673 goto err_inval;
674 }
675#endif
676#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
677 if (rta->rta_mp_alg) {
678 mp_alg = *rta->rta_mp_alg;
679
680 if (mp_alg < IP_MP_ALG_NONE ||
681 mp_alg > IP_MP_ALG_MAX)
682 goto err_inval;
683 }
684#endif
685
686 err = -ENOBUFS;
687 if (fib_info_cnt >= fib_hash_size) {
688 unsigned int new_size = fib_hash_size << 1;
689 struct hlist_head *new_info_hash;
690 struct hlist_head *new_laddrhash;
691 unsigned int bytes;
692
693 if (!new_size)
694 new_size = 1;
695 bytes = new_size * sizeof(struct hlist_head *);
696 new_info_hash = fib_hash_alloc(bytes);
697 new_laddrhash = fib_hash_alloc(bytes);
698 if (!new_info_hash || !new_laddrhash) {
699 fib_hash_free(new_info_hash, bytes);
700 fib_hash_free(new_laddrhash, bytes);
701 } else {
702 memset(new_info_hash, 0, bytes);
703 memset(new_laddrhash, 0, bytes);
704
705 fib_hash_move(new_info_hash, new_laddrhash, new_size);
706 }
707
708 if (!fib_hash_size)
709 goto failure;
710 }
711
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700712 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 if (fi == NULL)
714 goto failure;
715 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716
717 fi->fib_protocol = r->rtm_protocol;
718
719 fi->fib_nhs = nhs;
720 change_nexthops(fi) {
721 nh->nh_parent = fi;
722 } endfor_nexthops(fi)
723
724 fi->fib_flags = r->rtm_flags;
725 if (rta->rta_priority)
726 fi->fib_priority = *rta->rta_priority;
727 if (rta->rta_mx) {
728 int attrlen = RTA_PAYLOAD(rta->rta_mx);
729 struct rtattr *attr = RTA_DATA(rta->rta_mx);
730
731 while (RTA_OK(attr, attrlen)) {
732 unsigned flavor = attr->rta_type;
733 if (flavor) {
734 if (flavor > RTAX_MAX)
735 goto err_inval;
736 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
737 }
738 attr = RTA_NEXT(attr, attrlen);
739 }
740 }
741 if (rta->rta_prefsrc)
742 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743
744 if (rta->rta_mp) {
745#ifdef CONFIG_IP_ROUTE_MULTIPATH
746 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
747 goto failure;
748 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
749 goto err_inval;
750 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
751 goto err_inval;
752#ifdef CONFIG_NET_CLS_ROUTE
753 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
754 goto err_inval;
755#endif
756#else
757 goto err_inval;
758#endif
759 } else {
760 struct fib_nh *nh = fi->fib_nh;
761 if (rta->rta_oif)
762 nh->nh_oif = *rta->rta_oif;
763 if (rta->rta_gw)
764 memcpy(&nh->nh_gw, rta->rta_gw, 4);
765#ifdef CONFIG_NET_CLS_ROUTE
766 if (rta->rta_flow)
767 memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768#endif
769 nh->nh_flags = r->rtm_flags;
770#ifdef CONFIG_IP_ROUTE_MULTIPATH
771 nh->nh_weight = 1;
772#endif
773 }
774
775#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776 fi->fib_mp_alg = mp_alg;
777#endif
778
779 if (fib_props[r->rtm_type].error) {
780 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
781 goto err_inval;
782 goto link_it;
783 }
784
785 if (r->rtm_scope > RT_SCOPE_HOST)
786 goto err_inval;
787
788 if (r->rtm_scope == RT_SCOPE_HOST) {
789 struct fib_nh *nh = fi->fib_nh;
790
791 /* Local address is added. */
792 if (nhs != 1 || nh->nh_gw)
793 goto err_inval;
794 nh->nh_scope = RT_SCOPE_NOWHERE;
795 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
796 err = -ENODEV;
797 if (nh->nh_dev == NULL)
798 goto failure;
799 } else {
800 change_nexthops(fi) {
801 if ((err = fib_check_nh(r, fi, nh)) != 0)
802 goto failure;
803 } endfor_nexthops(fi)
804 }
805
806 if (fi->fib_prefsrc) {
807 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
808 memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
809 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810 goto err_inval;
811 }
812
813link_it:
814 if ((ofi = fib_find_info(fi)) != NULL) {
815 fi->fib_dead = 1;
816 free_fib_info(fi);
817 ofi->fib_treeref++;
818 return ofi;
819 }
820
821 fi->fib_treeref++;
822 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700823 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 hlist_add_head(&fi->fib_hash,
825 &fib_info_hash[fib_info_hashfn(fi)]);
826 if (fi->fib_prefsrc) {
827 struct hlist_head *head;
828
829 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
830 hlist_add_head(&fi->fib_lhash, head);
831 }
832 change_nexthops(fi) {
833 struct hlist_head *head;
834 unsigned int hash;
835
836 if (!nh->nh_dev)
837 continue;
838 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
839 head = &fib_info_devhash[hash];
840 hlist_add_head(&nh->nh_hash, head);
841 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700842 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 return fi;
844
845err_inval:
846 err = -EINVAL;
847
848failure:
849 *errp = err;
850 if (fi) {
851 fi->fib_dead = 1;
852 free_fib_info(fi);
853 }
854 return NULL;
855}
856
Robert Olssone5b43762005-08-25 13:01:03 -0700857/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858int fib_semantic_match(struct list_head *head, const struct flowi *flp,
859 struct fib_result *res, __u32 zone, __u32 mask,
860 int prefixlen)
861{
862 struct fib_alias *fa;
863 int nh_sel = 0;
864
Robert Olssone5b43762005-08-25 13:01:03 -0700865 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 int err;
867
868 if (fa->fa_tos &&
869 fa->fa_tos != flp->fl4_tos)
870 continue;
871
872 if (fa->fa_scope < flp->fl4_scope)
873 continue;
874
875 fa->fa_state |= FA_S_ACCESSED;
876
877 err = fib_props[fa->fa_type].error;
878 if (err == 0) {
879 struct fib_info *fi = fa->fa_info;
880
881 if (fi->fib_flags & RTNH_F_DEAD)
882 continue;
883
884 switch (fa->fa_type) {
885 case RTN_UNICAST:
886 case RTN_LOCAL:
887 case RTN_BROADCAST:
888 case RTN_ANYCAST:
889 case RTN_MULTICAST:
890 for_nexthops(fi) {
891 if (nh->nh_flags&RTNH_F_DEAD)
892 continue;
893 if (!flp->oif || flp->oif == nh->nh_oif)
894 break;
895 }
896#ifdef CONFIG_IP_ROUTE_MULTIPATH
897 if (nhsel < fi->fib_nhs) {
898 nh_sel = nhsel;
899 goto out_fill_res;
900 }
901#else
902 if (nhsel < 1) {
903 goto out_fill_res;
904 }
905#endif
906 endfor_nexthops(fi);
907 continue;
908
909 default:
910 printk(KERN_DEBUG "impossible 102\n");
911 return -EINVAL;
912 };
913 }
914 return err;
915 }
916 return 1;
917
918out_fill_res:
919 res->prefixlen = prefixlen;
920 res->nh_sel = nh_sel;
921 res->type = fa->fa_type;
922 res->scope = fa->fa_scope;
923 res->fi = fa->fa_info;
924#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
925 res->netmask = mask;
926 res->network = zone &
927 (0xFFFFFFFF >> (32 - prefixlen));
928#endif
929 atomic_inc(&res->fi->fib_clntref);
930 return 0;
931}
932
933/* Find appropriate source address to this destination */
934
935u32 __fib_res_prefsrc(struct fib_result *res)
936{
937 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
938}
939
940int
941fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Patrick McHardy2dfe55b2006-08-10 23:08:33 -0700942 u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700943 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944{
945 struct rtmsg *rtm;
946 struct nlmsghdr *nlh;
947 unsigned char *b = skb->tail;
948
Jamal Hadi Salimb6544c02005-06-18 22:54:12 -0700949 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 rtm = NLMSG_DATA(nlh);
951 rtm->rtm_family = AF_INET;
952 rtm->rtm_dst_len = dst_len;
953 rtm->rtm_src_len = 0;
954 rtm->rtm_tos = tos;
955 rtm->rtm_table = tb_id;
Patrick McHardy9e762a42006-08-10 23:09:48 -0700956 RTA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957 rtm->rtm_type = type;
958 rtm->rtm_flags = fi->fib_flags;
959 rtm->rtm_scope = scope;
960 if (rtm->rtm_dst_len)
961 RTA_PUT(skb, RTA_DST, 4, dst);
962 rtm->rtm_protocol = fi->fib_protocol;
963 if (fi->fib_priority)
964 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700965 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
966 goto rtattr_failure;
967 if (fi->fib_prefsrc)
968 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
969 if (fi->fib_nhs == 1) {
970 if (fi->fib_nh->nh_gw)
971 RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
972 if (fi->fib_nh->nh_oif)
973 RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700974#ifdef CONFIG_NET_CLS_ROUTE
975 if (fi->fib_nh[0].nh_tclassid)
976 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
977#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 }
979#ifdef CONFIG_IP_ROUTE_MULTIPATH
980 if (fi->fib_nhs > 1) {
981 struct rtnexthop *nhp;
982 struct rtattr *mp_head;
983 if (skb_tailroom(skb) <= RTA_SPACE(0))
984 goto rtattr_failure;
985 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
986
987 for_nexthops(fi) {
988 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
989 goto rtattr_failure;
990 nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
991 nhp->rtnh_flags = nh->nh_flags & 0xFF;
992 nhp->rtnh_hops = nh->nh_weight-1;
993 nhp->rtnh_ifindex = nh->nh_oif;
994 if (nh->nh_gw)
995 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700996#ifdef CONFIG_NET_CLS_ROUTE
997 if (nh->nh_tclassid)
998 RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid);
999#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
1001 } endfor_nexthops(fi);
1002 mp_head->rta_type = RTA_MULTIPATH;
1003 mp_head->rta_len = skb->tail - (u8*)mp_head;
1004 }
1005#endif
1006 nlh->nlmsg_len = skb->tail - b;
1007 return skb->len;
1008
1009nlmsg_failure:
1010rtattr_failure:
1011 skb_trim(skb, b - skb->data);
1012 return -1;
1013}
1014
1015#ifndef CONFIG_IP_NOSIOCRT
1016
1017int
1018fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1019 struct kern_rta *rta, struct rtentry *r)
1020{
1021 int plen;
1022 u32 *ptr;
1023
1024 memset(rtm, 0, sizeof(*rtm));
1025 memset(rta, 0, sizeof(*rta));
1026
1027 if (r->rt_dst.sa_family != AF_INET)
1028 return -EAFNOSUPPORT;
1029
1030 /* Check mask for validity:
1031 a) it must be contiguous.
1032 b) destination must have all host bits clear.
1033 c) if application forgot to set correct family (AF_INET),
1034 reject request unless it is absolutely clear i.e.
1035 both family and mask are zero.
1036 */
1037 plen = 32;
1038 ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1039 if (!(r->rt_flags&RTF_HOST)) {
1040 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1041 if (r->rt_genmask.sa_family != AF_INET) {
1042 if (mask || r->rt_genmask.sa_family)
1043 return -EAFNOSUPPORT;
1044 }
1045 if (bad_mask(mask, *ptr))
1046 return -EINVAL;
1047 plen = inet_mask_len(mask);
1048 }
1049
1050 nl->nlmsg_flags = NLM_F_REQUEST;
Alexey Kuznetsov28633512006-02-09 16:40:58 -08001051 nl->nlmsg_pid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 nl->nlmsg_seq = 0;
1053 nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1054 if (cmd == SIOCDELRT) {
1055 nl->nlmsg_type = RTM_DELROUTE;
1056 nl->nlmsg_flags = 0;
1057 } else {
1058 nl->nlmsg_type = RTM_NEWROUTE;
1059 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1060 rtm->rtm_protocol = RTPROT_BOOT;
1061 }
1062
1063 rtm->rtm_dst_len = plen;
1064 rta->rta_dst = ptr;
1065
1066 if (r->rt_metric) {
1067 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1068 rta->rta_priority = (u32*)&r->rt_pad3;
1069 }
1070 if (r->rt_flags&RTF_REJECT) {
1071 rtm->rtm_scope = RT_SCOPE_HOST;
1072 rtm->rtm_type = RTN_UNREACHABLE;
1073 return 0;
1074 }
1075 rtm->rtm_scope = RT_SCOPE_NOWHERE;
1076 rtm->rtm_type = RTN_UNICAST;
1077
1078 if (r->rt_dev) {
1079 char *colon;
1080 struct net_device *dev;
1081 char devname[IFNAMSIZ];
1082
1083 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1084 return -EFAULT;
1085 devname[IFNAMSIZ-1] = 0;
1086 colon = strchr(devname, ':');
1087 if (colon)
1088 *colon = 0;
1089 dev = __dev_get_by_name(devname);
1090 if (!dev)
1091 return -ENODEV;
1092 rta->rta_oif = &dev->ifindex;
1093 if (colon) {
1094 struct in_ifaddr *ifa;
Herbert Xue5ed6392005-10-03 14:35:55 -07001095 struct in_device *in_dev = __in_dev_get_rtnl(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 if (!in_dev)
1097 return -ENODEV;
1098 *colon = ':';
1099 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1100 if (strcmp(ifa->ifa_label, devname) == 0)
1101 break;
1102 if (ifa == NULL)
1103 return -ENODEV;
1104 rta->rta_prefsrc = &ifa->ifa_local;
1105 }
1106 }
1107
1108 ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1109 if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1110 rta->rta_gw = ptr;
1111 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1112 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1113 }
1114
1115 if (cmd == SIOCDELRT)
1116 return 0;
1117
1118 if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1119 return -EINVAL;
1120
1121 if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1122 rtm->rtm_scope = RT_SCOPE_LINK;
1123
1124 if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1125 struct rtattr *rec;
1126 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1127 if (mx == NULL)
1128 return -ENOMEM;
1129 rta->rta_mx = mx;
1130 mx->rta_type = RTA_METRICS;
1131 mx->rta_len = RTA_LENGTH(0);
1132 if (r->rt_flags&RTF_MTU) {
1133 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1134 rec->rta_type = RTAX_ADVMSS;
1135 rec->rta_len = RTA_LENGTH(4);
1136 mx->rta_len += RTA_LENGTH(4);
1137 *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1138 }
1139 if (r->rt_flags&RTF_WINDOW) {
1140 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1141 rec->rta_type = RTAX_WINDOW;
1142 rec->rta_len = RTA_LENGTH(4);
1143 mx->rta_len += RTA_LENGTH(4);
1144 *(u32*)RTA_DATA(rec) = r->rt_window;
1145 }
1146 if (r->rt_flags&RTF_IRTT) {
1147 rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1148 rec->rta_type = RTAX_RTT;
1149 rec->rta_len = RTA_LENGTH(4);
1150 mx->rta_len += RTA_LENGTH(4);
1151 *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1152 }
1153 }
1154 return 0;
1155}
1156
1157#endif
1158
1159/*
1160 Update FIB if:
1161 - local address disappeared -> we must delete all the entries
1162 referring to it.
1163 - device went down -> we must shutdown all nexthops going via it.
1164 */
1165
1166int fib_sync_down(u32 local, struct net_device *dev, int force)
1167{
1168 int ret = 0;
1169 int scope = RT_SCOPE_NOWHERE;
1170
1171 if (force)
1172 scope = -1;
1173
1174 if (local && fib_info_laddrhash) {
1175 unsigned int hash = fib_laddr_hashfn(local);
1176 struct hlist_head *head = &fib_info_laddrhash[hash];
1177 struct hlist_node *node;
1178 struct fib_info *fi;
1179
1180 hlist_for_each_entry(fi, node, head, fib_lhash) {
1181 if (fi->fib_prefsrc == local) {
1182 fi->fib_flags |= RTNH_F_DEAD;
1183 ret++;
1184 }
1185 }
1186 }
1187
1188 if (dev) {
1189 struct fib_info *prev_fi = NULL;
1190 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1191 struct hlist_head *head = &fib_info_devhash[hash];
1192 struct hlist_node *node;
1193 struct fib_nh *nh;
1194
1195 hlist_for_each_entry(nh, node, head, nh_hash) {
1196 struct fib_info *fi = nh->nh_parent;
1197 int dead;
1198
1199 BUG_ON(!fi->fib_nhs);
1200 if (nh->nh_dev != dev || fi == prev_fi)
1201 continue;
1202 prev_fi = fi;
1203 dead = 0;
1204 change_nexthops(fi) {
1205 if (nh->nh_flags&RTNH_F_DEAD)
1206 dead++;
1207 else if (nh->nh_dev == dev &&
1208 nh->nh_scope != scope) {
1209 nh->nh_flags |= RTNH_F_DEAD;
1210#ifdef CONFIG_IP_ROUTE_MULTIPATH
1211 spin_lock_bh(&fib_multipath_lock);
1212 fi->fib_power -= nh->nh_power;
1213 nh->nh_power = 0;
1214 spin_unlock_bh(&fib_multipath_lock);
1215#endif
1216 dead++;
1217 }
1218#ifdef CONFIG_IP_ROUTE_MULTIPATH
1219 if (force > 1 && nh->nh_dev == dev) {
1220 dead = fi->fib_nhs;
1221 break;
1222 }
1223#endif
1224 } endfor_nexthops(fi)
1225 if (dead == fi->fib_nhs) {
1226 fi->fib_flags |= RTNH_F_DEAD;
1227 ret++;
1228 }
1229 }
1230 }
1231
1232 return ret;
1233}
1234
1235#ifdef CONFIG_IP_ROUTE_MULTIPATH
1236
1237/*
1238 Dead device goes up. We wake up dead nexthops.
1239 It takes sense only on multipath routes.
1240 */
1241
1242int fib_sync_up(struct net_device *dev)
1243{
1244 struct fib_info *prev_fi;
1245 unsigned int hash;
1246 struct hlist_head *head;
1247 struct hlist_node *node;
1248 struct fib_nh *nh;
1249 int ret;
1250
1251 if (!(dev->flags&IFF_UP))
1252 return 0;
1253
1254 prev_fi = NULL;
1255 hash = fib_devindex_hashfn(dev->ifindex);
1256 head = &fib_info_devhash[hash];
1257 ret = 0;
1258
1259 hlist_for_each_entry(nh, node, head, nh_hash) {
1260 struct fib_info *fi = nh->nh_parent;
1261 int alive;
1262
1263 BUG_ON(!fi->fib_nhs);
1264 if (nh->nh_dev != dev || fi == prev_fi)
1265 continue;
1266
1267 prev_fi = fi;
1268 alive = 0;
1269 change_nexthops(fi) {
1270 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1271 alive++;
1272 continue;
1273 }
1274 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1275 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001276 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 continue;
1278 alive++;
1279 spin_lock_bh(&fib_multipath_lock);
1280 nh->nh_power = 0;
1281 nh->nh_flags &= ~RTNH_F_DEAD;
1282 spin_unlock_bh(&fib_multipath_lock);
1283 } endfor_nexthops(fi)
1284
1285 if (alive > 0) {
1286 fi->fib_flags &= ~RTNH_F_DEAD;
1287 ret++;
1288 }
1289 }
1290
1291 return ret;
1292}
1293
1294/*
1295 The algorithm is suboptimal, but it provides really
1296 fair weighted route distribution.
1297 */
1298
1299void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1300{
1301 struct fib_info *fi = res->fi;
1302 int w;
1303
1304 spin_lock_bh(&fib_multipath_lock);
1305 if (fi->fib_power <= 0) {
1306 int power = 0;
1307 change_nexthops(fi) {
1308 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1309 power += nh->nh_weight;
1310 nh->nh_power = nh->nh_weight;
1311 }
1312 } endfor_nexthops(fi);
1313 fi->fib_power = power;
1314 if (power <= 0) {
1315 spin_unlock_bh(&fib_multipath_lock);
1316 /* Race condition: route has just become dead. */
1317 res->nh_sel = 0;
1318 return;
1319 }
1320 }
1321
1322
1323 /* w should be random number [0..fi->fib_power-1],
1324 it is pretty bad approximation.
1325 */
1326
1327 w = jiffies % fi->fib_power;
1328
1329 change_nexthops(fi) {
1330 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1331 if ((w -= nh->nh_power) <= 0) {
1332 nh->nh_power--;
1333 fi->fib_power--;
1334 res->nh_sel = nhsel;
1335 spin_unlock_bh(&fib_multipath_lock);
1336 return;
1337 }
1338 }
1339 } endfor_nexthops(fi);
1340
1341 /* Race condition: route has just become dead. */
1342 res->nh_sel = 0;
1343 spin_unlock_bh(&fib_multipath_lock);
1344}
1345#endif