blob: e63b8a98fb4df624b91f0bb44bf06e864570d7f5 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
45#include <net/ip_mp_alg.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070046#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070047#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49#include "fib_lookup.h"
50
51#define FSprintk(a...)
52
Stephen Hemminger832b4c52006-08-29 16:48:09 -070053static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070054static struct hlist_head *fib_info_hash;
55static struct hlist_head *fib_info_laddrhash;
56static unsigned int fib_hash_size;
57static unsigned int fib_info_cnt;
58
59#define DEVINDEX_HASHBITS 8
60#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63#ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65static DEFINE_SPINLOCK(fib_multipath_lock);
66
67#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73#else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75/* Hope, that gcc will optimize it to get rid of dummy loop */
76
77#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78for (nhsel=0; nhsel < 1; nhsel++)
79
80#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81for (nhsel=0; nhsel < 1; nhsel++)
82
83#endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85#define endfor_nexthops(fi) }
86
87
Arjan van de Ven9b5b5cf2005-11-29 16:21:38 -080088static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070089{
90 int error;
91 u8 scope;
92} fib_props[RTA_MAX + 1] = {
93 {
94 .error = 0,
95 .scope = RT_SCOPE_NOWHERE,
96 }, /* RTN_UNSPEC */
97 {
98 .error = 0,
99 .scope = RT_SCOPE_UNIVERSE,
100 }, /* RTN_UNICAST */
101 {
102 .error = 0,
103 .scope = RT_SCOPE_HOST,
104 }, /* RTN_LOCAL */
105 {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_BROADCAST */
109 {
110 .error = 0,
111 .scope = RT_SCOPE_LINK,
112 }, /* RTN_ANYCAST */
113 {
114 .error = 0,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_MULTICAST */
117 {
118 .error = -EINVAL,
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_BLACKHOLE */
121 {
122 .error = -EHOSTUNREACH,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_UNREACHABLE */
125 {
126 .error = -EACCES,
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_PROHIBIT */
129 {
130 .error = -EAGAIN,
131 .scope = RT_SCOPE_UNIVERSE,
132 }, /* RTN_THROW */
133 {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_NAT */
137 {
138 .error = -EINVAL,
139 .scope = RT_SCOPE_NOWHERE,
140 }, /* RTN_XRESOLVE */
141};
142
143
144/* Release a nexthop info record */
145
146void free_fib_info(struct fib_info *fi)
147{
148 if (fi->fib_dead == 0) {
149 printk("Freeing alive fib_info %p\n", fi);
150 return;
151 }
152 change_nexthops(fi) {
153 if (nh->nh_dev)
154 dev_put(nh->nh_dev);
155 nh->nh_dev = NULL;
156 } endfor_nexthops(fi);
157 fib_info_cnt--;
158 kfree(fi);
159}
160
161void fib_release_info(struct fib_info *fi)
162{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700163 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164 if (fi && --fi->fib_treeref == 0) {
165 hlist_del(&fi->fib_hash);
166 if (fi->fib_prefsrc)
167 hlist_del(&fi->fib_lhash);
168 change_nexthops(fi) {
169 if (!nh->nh_dev)
170 continue;
171 hlist_del(&nh->nh_hash);
172 } endfor_nexthops(fi)
173 fi->fib_dead = 1;
174 fib_info_put(fi);
175 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700176 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177}
178
179static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180{
181 const struct fib_nh *onh = ofi->fib_nh;
182
183 for_nexthops(fi) {
184 if (nh->nh_oif != onh->nh_oif ||
185 nh->nh_gw != onh->nh_gw ||
186 nh->nh_scope != onh->nh_scope ||
187#ifdef CONFIG_IP_ROUTE_MULTIPATH
188 nh->nh_weight != onh->nh_weight ||
189#endif
190#ifdef CONFIG_NET_CLS_ROUTE
191 nh->nh_tclassid != onh->nh_tclassid ||
192#endif
193 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194 return -1;
195 onh++;
196 } endfor_nexthops(fi);
197 return 0;
198}
199
200static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201{
202 unsigned int mask = (fib_hash_size - 1);
203 unsigned int val = fi->fib_nhs;
204
205 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700206 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 val ^= fi->fib_priority;
208
209 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210}
211
212static struct fib_info *fib_find_info(const struct fib_info *nfi)
213{
214 struct hlist_head *head;
215 struct hlist_node *node;
216 struct fib_info *fi;
217 unsigned int hash;
218
219 hash = fib_info_hashfn(nfi);
220 head = &fib_info_hash[hash];
221
222 hlist_for_each_entry(fi, node, head, fib_hash) {
223 if (fi->fib_nhs != nfi->fib_nhs)
224 continue;
225 if (nfi->fib_protocol == fi->fib_protocol &&
226 nfi->fib_prefsrc == fi->fib_prefsrc &&
227 nfi->fib_priority == fi->fib_priority &&
228 memcmp(nfi->fib_metrics, fi->fib_metrics,
229 sizeof(fi->fib_metrics)) == 0 &&
230 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232 return fi;
233 }
234
235 return NULL;
236}
237
238static inline unsigned int fib_devindex_hashfn(unsigned int val)
239{
240 unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242 return (val ^
243 (val >> DEVINDEX_HASHBITS) ^
244 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245}
246
247/* Check, that the gateway is already configured.
248 Used only by redirect accept routine.
249 */
250
Al Virod878e72e2006-09-26 22:18:13 -0700251int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252{
253 struct hlist_head *head;
254 struct hlist_node *node;
255 struct fib_nh *nh;
256 unsigned int hash;
257
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700258 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259
260 hash = fib_devindex_hashfn(dev->ifindex);
261 head = &fib_info_devhash[hash];
262 hlist_for_each_entry(nh, node, head, nh_hash) {
263 if (nh->nh_dev == dev &&
264 nh->nh_gw == gw &&
265 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700266 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 return 0;
268 }
269 }
270
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700271 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272
273 return -1;
274}
275
Thomas Graf339bf982006-11-10 14:10:15 -0800276static inline size_t fib_nlmsg_size(struct fib_info *fi)
277{
278 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
279 + nla_total_size(4) /* RTA_TABLE */
280 + nla_total_size(4) /* RTA_DST */
281 + nla_total_size(4) /* RTA_PRIORITY */
282 + nla_total_size(4); /* RTA_PREFSRC */
283
284 /* space for nested metrics */
285 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
286
287 if (fi->fib_nhs) {
288 /* Also handles the special case fib_nhs == 1 */
289
290 /* each nexthop is packed in an attribute */
291 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
292
293 /* may contain flow and gateway attribute */
294 nhsize += 2 * nla_total_size(4);
295
296 /* all nexthops are packed in a nested attribute */
297 payload += nla_total_size(fi->fib_nhs * nhsize);
298 }
299
300 return payload;
301}
302
Al Viro81f7bf62006-09-27 18:40:00 -0700303void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Thomas Graf4e902c52006-08-17 18:14:52 -0700304 int dst_len, u32 tb_id, struct nl_info *info)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305{
306 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700308 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Thomas Graf339bf982006-11-10 14:10:15 -0800310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700311 if (skb == NULL)
312 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
Thomas Graf4e902c52006-08-17 18:14:52 -0700314 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700315 fa->fa_type, fa->fa_scope, key, dst_len,
Thomas Graf4e902c52006-08-17 18:14:52 -0700316 fa->fa_tos, fa->fa_info, 0);
Thomas Graf339bf982006-11-10 14:10:15 -0800317 /* failure implies BUG in fib_nlmsg_size() */
318 BUG_ON(err < 0);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700319
Thomas Graf4e902c52006-08-17 18:14:52 -0700320 err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
321 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700322errout:
323 if (err < 0)
324 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325}
326
327/* Return the first fib alias matching TOS with
328 * priority less than or equal to PRIO.
329 */
330struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
331{
332 if (fah) {
333 struct fib_alias *fa;
334 list_for_each_entry(fa, fah, fa_list) {
335 if (fa->fa_tos > tos)
336 continue;
337 if (fa->fa_info->fib_priority >= prio ||
338 fa->fa_tos < tos)
339 return fa;
340 }
341 }
342 return NULL;
343}
344
345int fib_detect_death(struct fib_info *fi, int order,
346 struct fib_info **last_resort, int *last_idx, int *dflt)
347{
348 struct neighbour *n;
349 int state = NUD_NONE;
350
351 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
352 if (n) {
353 state = n->nud_state;
354 neigh_release(n);
355 }
356 if (state==NUD_REACHABLE)
357 return 0;
358 if ((state&NUD_VALID) && order != *dflt)
359 return 0;
360 if ((state&NUD_VALID) ||
361 (*last_idx<0 && order > *dflt)) {
362 *last_resort = fi;
363 *last_idx = order;
364 }
365 return 1;
366}
367
368#ifdef CONFIG_IP_ROUTE_MULTIPATH
369
Thomas Graf4e902c52006-08-17 18:14:52 -0700370static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700371{
372 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373
Thomas Graf4e902c52006-08-17 18:14:52 -0700374 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700376 rtnh = rtnh_next(rtnh, &remaining);
377 }
378
379 /* leftover implies invalid nexthop configuration, discard it */
380 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381}
382
Thomas Graf4e902c52006-08-17 18:14:52 -0700383static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
384 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700387 int attrlen;
388
389 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700391
392 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
393 nh->nh_oif = rtnh->rtnh_ifindex;
394 nh->nh_weight = rtnh->rtnh_hops + 1;
395
396 attrlen = rtnh_attrlen(rtnh);
397 if (attrlen > 0) {
398 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
399
400 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700401 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700403 nla = nla_find(attrs, attrlen, RTA_FLOW);
404 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405#endif
406 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700407
408 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700410
Linus Torvalds1da177e2005-04-16 15:20:36 -0700411 return 0;
412}
413
414#endif
415
Thomas Graf4e902c52006-08-17 18:14:52 -0700416int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417{
418#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700419 struct rtnexthop *rtnh;
420 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700421#endif
422
Thomas Graf4e902c52006-08-17 18:14:52 -0700423 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424 return 1;
425
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 if (cfg->fc_oif || cfg->fc_gw) {
427 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
428 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429 return 0;
430 return 1;
431 }
432
433#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700434 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700436
437 rtnh = cfg->fc_mp;
438 remaining = cfg->fc_mp_len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700439
440 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700441 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442
Thomas Graf4e902c52006-08-17 18:14:52 -0700443 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700445
446 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700448
449 attrlen = rtnh_attrlen(rtnh);
450 if (attrlen < 0) {
451 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
452
453 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700454 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 return 1;
456#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700457 nla = nla_find(attrs, attrlen, RTA_FLOW);
458 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700459 return 1;
460#endif
461 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700462
463 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 } endfor_nexthops(fi);
465#endif
466 return 0;
467}
468
469
470/*
471 Picture
472 -------
473
474 Semantics of nexthop is very messy by historical reasons.
475 We have to take into account, that:
476 a) gateway can be actually local interface address,
477 so that gatewayed route is direct.
478 b) gateway must be on-link address, possibly
479 described not by an ifaddr, but also by a direct route.
480 c) If both gateway and interface are specified, they should not
481 contradict.
482 d) If we use tunnel routes, gateway could be not on-link.
483
484 Attempt to reconcile all of these (alas, self-contradictory) conditions
485 results in pretty ugly and hairy code with obscure logic.
486
487 I chose to generalized it instead, so that the size
488 of code does not increase practically, but it becomes
489 much more general.
490 Every prefix is assigned a "scope" value: "host" is local address,
491 "link" is direct route,
492 [ ... "site" ... "interior" ... ]
493 and "universe" is true gateway route with global meaning.
494
495 Every prefix refers to a set of "nexthop"s (gw, oif),
496 where gw must have narrower scope. This recursion stops
497 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
498 which means that gw is forced to be on link.
499
500 Code is still hairy, but now it is apparently logically
501 consistent and very flexible. F.e. as by-product it allows
502 to co-exists in peace independent exterior and interior
503 routing processes.
504
505 Normally it looks as following.
506
507 {universe prefix} -> (gw, oif) [scope link]
508 |
509 |-> {link prefix} -> (gw, oif) [scope local]
510 |
511 |-> {local prefix} (terminal node)
512 */
513
Thomas Graf4e902c52006-08-17 18:14:52 -0700514static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
515 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516{
517 int err;
518
519 if (nh->nh_gw) {
520 struct fib_result res;
521
522#ifdef CONFIG_IP_ROUTE_PERVASIVE
523 if (nh->nh_flags&RTNH_F_PERVASIVE)
524 return 0;
525#endif
526 if (nh->nh_flags&RTNH_F_ONLINK) {
527 struct net_device *dev;
528
Thomas Graf4e902c52006-08-17 18:14:52 -0700529 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 return -EINVAL;
531 if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
532 return -EINVAL;
533 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
534 return -ENODEV;
535 if (!(dev->flags&IFF_UP))
536 return -ENETDOWN;
537 nh->nh_dev = dev;
538 dev_hold(dev);
539 nh->nh_scope = RT_SCOPE_LINK;
540 return 0;
541 }
542 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700543 struct flowi fl = {
544 .nl_u = {
545 .ip4_u = {
546 .daddr = nh->nh_gw,
547 .scope = cfg->fc_scope + 1,
548 },
549 },
550 .oif = nh->nh_oif,
551 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700552
553 /* It is not necessary, but requires a bit of thinking */
554 if (fl.fl4_scope < RT_SCOPE_LINK)
555 fl.fl4_scope = RT_SCOPE_LINK;
556 if ((err = fib_lookup(&fl, &res)) != 0)
557 return err;
558 }
559 err = -EINVAL;
560 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
561 goto out;
562 nh->nh_scope = res.scope;
563 nh->nh_oif = FIB_RES_OIF(res);
564 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
565 goto out;
566 dev_hold(nh->nh_dev);
567 err = -ENETDOWN;
568 if (!(nh->nh_dev->flags & IFF_UP))
569 goto out;
570 err = 0;
571out:
572 fib_res_put(&res);
573 return err;
574 } else {
575 struct in_device *in_dev;
576
577 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
578 return -EINVAL;
579
580 in_dev = inetdev_by_index(nh->nh_oif);
581 if (in_dev == NULL)
582 return -ENODEV;
583 if (!(in_dev->dev->flags&IFF_UP)) {
584 in_dev_put(in_dev);
585 return -ENETDOWN;
586 }
587 nh->nh_dev = in_dev->dev;
588 dev_hold(nh->nh_dev);
589 nh->nh_scope = RT_SCOPE_HOST;
590 in_dev_put(in_dev);
591 }
592 return 0;
593}
594
Al Viro81f7bf62006-09-27 18:40:00 -0700595static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700596{
597 unsigned int mask = (fib_hash_size - 1);
598
Al Viro81f7bf62006-09-27 18:40:00 -0700599 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700600}
601
602static struct hlist_head *fib_hash_alloc(int bytes)
603{
604 if (bytes <= PAGE_SIZE)
605 return kmalloc(bytes, GFP_KERNEL);
606 else
607 return (struct hlist_head *)
608 __get_free_pages(GFP_KERNEL, get_order(bytes));
609}
610
611static void fib_hash_free(struct hlist_head *hash, int bytes)
612{
613 if (!hash)
614 return;
615
616 if (bytes <= PAGE_SIZE)
617 kfree(hash);
618 else
619 free_pages((unsigned long) hash, get_order(bytes));
620}
621
622static void fib_hash_move(struct hlist_head *new_info_hash,
623 struct hlist_head *new_laddrhash,
624 unsigned int new_size)
625{
David S. Millerb7656e72005-08-05 04:12:48 -0700626 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700628 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700630 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700631 old_info_hash = fib_info_hash;
632 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 fib_hash_size = new_size;
634
635 for (i = 0; i < old_size; i++) {
636 struct hlist_head *head = &fib_info_hash[i];
637 struct hlist_node *node, *n;
638 struct fib_info *fi;
639
640 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
641 struct hlist_head *dest;
642 unsigned int new_hash;
643
644 hlist_del(&fi->fib_hash);
645
646 new_hash = fib_info_hashfn(fi);
647 dest = &new_info_hash[new_hash];
648 hlist_add_head(&fi->fib_hash, dest);
649 }
650 }
651 fib_info_hash = new_info_hash;
652
653 for (i = 0; i < old_size; i++) {
654 struct hlist_head *lhead = &fib_info_laddrhash[i];
655 struct hlist_node *node, *n;
656 struct fib_info *fi;
657
658 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
659 struct hlist_head *ldest;
660 unsigned int new_hash;
661
662 hlist_del(&fi->fib_lhash);
663
664 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
665 ldest = &new_laddrhash[new_hash];
666 hlist_add_head(&fi->fib_lhash, ldest);
667 }
668 }
669 fib_info_laddrhash = new_laddrhash;
670
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700671 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700672
673 bytes = old_size * sizeof(struct hlist_head *);
674 fib_hash_free(old_info_hash, bytes);
675 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700676}
677
Thomas Graf4e902c52006-08-17 18:14:52 -0700678struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679{
680 int err;
681 struct fib_info *fi = NULL;
682 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700683 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684
685 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700686 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 goto err_inval;
688
689#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700690 if (cfg->fc_mp) {
691 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 if (nhs == 0)
693 goto err_inval;
694 }
695#endif
696#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700697 if (cfg->fc_mp_alg) {
698 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
699 cfg->fc_mp_alg > IP_MP_ALG_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 goto err_inval;
701 }
702#endif
703
704 err = -ENOBUFS;
705 if (fib_info_cnt >= fib_hash_size) {
706 unsigned int new_size = fib_hash_size << 1;
707 struct hlist_head *new_info_hash;
708 struct hlist_head *new_laddrhash;
709 unsigned int bytes;
710
711 if (!new_size)
712 new_size = 1;
713 bytes = new_size * sizeof(struct hlist_head *);
714 new_info_hash = fib_hash_alloc(bytes);
715 new_laddrhash = fib_hash_alloc(bytes);
716 if (!new_info_hash || !new_laddrhash) {
717 fib_hash_free(new_info_hash, bytes);
718 fib_hash_free(new_laddrhash, bytes);
719 } else {
720 memset(new_info_hash, 0, bytes);
721 memset(new_laddrhash, 0, bytes);
722
723 fib_hash_move(new_info_hash, new_laddrhash, new_size);
724 }
725
726 if (!fib_hash_size)
727 goto failure;
728 }
729
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700730 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 if (fi == NULL)
732 goto failure;
733 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734
Thomas Graf4e902c52006-08-17 18:14:52 -0700735 fi->fib_protocol = cfg->fc_protocol;
736 fi->fib_flags = cfg->fc_flags;
737 fi->fib_priority = cfg->fc_priority;
738 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739
740 fi->fib_nhs = nhs;
741 change_nexthops(fi) {
742 nh->nh_parent = fi;
743 } endfor_nexthops(fi)
744
Thomas Graf4e902c52006-08-17 18:14:52 -0700745 if (cfg->fc_mx) {
746 struct nlattr *nla;
747 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
Thomas Graf4e902c52006-08-17 18:14:52 -0700749 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
750 int type = nla->nla_type;
751
752 if (type) {
753 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700755 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 }
758 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759
Thomas Graf4e902c52006-08-17 18:14:52 -0700760 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700762 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
763 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700765 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700767 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 goto err_inval;
769#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700770 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 goto err_inval;
772#endif
773#else
774 goto err_inval;
775#endif
776 } else {
777 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700778
779 nh->nh_oif = cfg->fc_oif;
780 nh->nh_gw = cfg->fc_gw;
781 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700783 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785#ifdef CONFIG_IP_ROUTE_MULTIPATH
786 nh->nh_weight = 1;
787#endif
788 }
789
790#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
Thomas Graf4e902c52006-08-17 18:14:52 -0700791 fi->fib_mp_alg = cfg->fc_mp_alg;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792#endif
793
Thomas Graf4e902c52006-08-17 18:14:52 -0700794 if (fib_props[cfg->fc_type].error) {
795 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 goto err_inval;
797 goto link_it;
798 }
799
Thomas Graf4e902c52006-08-17 18:14:52 -0700800 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801 goto err_inval;
802
Thomas Graf4e902c52006-08-17 18:14:52 -0700803 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 struct fib_nh *nh = fi->fib_nh;
805
806 /* Local address is added. */
807 if (nhs != 1 || nh->nh_gw)
808 goto err_inval;
809 nh->nh_scope = RT_SCOPE_NOWHERE;
810 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
811 err = -ENODEV;
812 if (nh->nh_dev == NULL)
813 goto failure;
814 } else {
815 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700816 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817 goto failure;
818 } endfor_nexthops(fi)
819 }
820
821 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700822 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
823 fi->fib_prefsrc != cfg->fc_dst)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
825 goto err_inval;
826 }
827
828link_it:
829 if ((ofi = fib_find_info(fi)) != NULL) {
830 fi->fib_dead = 1;
831 free_fib_info(fi);
832 ofi->fib_treeref++;
833 return ofi;
834 }
835
836 fi->fib_treeref++;
837 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700838 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700839 hlist_add_head(&fi->fib_hash,
840 &fib_info_hash[fib_info_hashfn(fi)]);
841 if (fi->fib_prefsrc) {
842 struct hlist_head *head;
843
844 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
845 hlist_add_head(&fi->fib_lhash, head);
846 }
847 change_nexthops(fi) {
848 struct hlist_head *head;
849 unsigned int hash;
850
851 if (!nh->nh_dev)
852 continue;
853 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
854 head = &fib_info_devhash[hash];
855 hlist_add_head(&nh->nh_hash, head);
856 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700857 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 return fi;
859
860err_inval:
861 err = -EINVAL;
862
863failure:
Linus Torvalds1da177e2005-04-16 15:20:36 -0700864 if (fi) {
865 fi->fib_dead = 1;
866 free_fib_info(fi);
867 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700868
869 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870}
871
Robert Olssone5b43762005-08-25 13:01:03 -0700872/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700874 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875 int prefixlen)
876{
877 struct fib_alias *fa;
878 int nh_sel = 0;
879
Robert Olssone5b43762005-08-25 13:01:03 -0700880 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881 int err;
882
883 if (fa->fa_tos &&
884 fa->fa_tos != flp->fl4_tos)
885 continue;
886
887 if (fa->fa_scope < flp->fl4_scope)
888 continue;
889
890 fa->fa_state |= FA_S_ACCESSED;
891
892 err = fib_props[fa->fa_type].error;
893 if (err == 0) {
894 struct fib_info *fi = fa->fa_info;
895
896 if (fi->fib_flags & RTNH_F_DEAD)
897 continue;
898
899 switch (fa->fa_type) {
900 case RTN_UNICAST:
901 case RTN_LOCAL:
902 case RTN_BROADCAST:
903 case RTN_ANYCAST:
904 case RTN_MULTICAST:
905 for_nexthops(fi) {
906 if (nh->nh_flags&RTNH_F_DEAD)
907 continue;
908 if (!flp->oif || flp->oif == nh->nh_oif)
909 break;
910 }
911#ifdef CONFIG_IP_ROUTE_MULTIPATH
912 if (nhsel < fi->fib_nhs) {
913 nh_sel = nhsel;
914 goto out_fill_res;
915 }
916#else
917 if (nhsel < 1) {
918 goto out_fill_res;
919 }
920#endif
921 endfor_nexthops(fi);
922 continue;
923
924 default:
925 printk(KERN_DEBUG "impossible 102\n");
926 return -EINVAL;
927 };
928 }
929 return err;
930 }
931 return 1;
932
933out_fill_res:
934 res->prefixlen = prefixlen;
935 res->nh_sel = nh_sel;
936 res->type = fa->fa_type;
937 res->scope = fa->fa_scope;
938 res->fi = fa->fa_info;
939#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
940 res->netmask = mask;
Al Viro1e8aa6f2006-09-26 22:21:22 -0700941 res->network = zone & inet_make_mask(prefixlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942#endif
943 atomic_inc(&res->fi->fib_clntref);
944 return 0;
945}
946
947/* Find appropriate source address to this destination */
948
Al Virob83738a2006-09-26 22:14:15 -0700949__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950{
951 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
952}
953
Thomas Grafbe403ea2006-08-17 18:15:17 -0700954int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700955 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700956 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700957{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700958 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700960
Thomas Grafbe403ea2006-08-17 18:15:17 -0700961 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
962 if (nlh == NULL)
963 return -ENOBUFS;
964
965 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700966 rtm->rtm_family = AF_INET;
967 rtm->rtm_dst_len = dst_len;
968 rtm->rtm_src_len = 0;
969 rtm->rtm_tos = tos;
970 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700971 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 rtm->rtm_type = type;
973 rtm->rtm_flags = fi->fib_flags;
974 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700976
977 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700978 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700979
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700981 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
982
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700984 goto nla_put_failure;
985
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700987 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700988
Linus Torvalds1da177e2005-04-16 15:20:36 -0700989 if (fi->fib_nhs == 1) {
990 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700991 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700992
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700994 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700995#ifdef CONFIG_NET_CLS_ROUTE
996 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700997 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700998#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 }
1000#ifdef CONFIG_IP_ROUTE_MULTIPATH
1001 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001002 struct rtnexthop *rtnh;
1003 struct nlattr *mp;
1004
1005 mp = nla_nest_start(skb, RTA_MULTIPATH);
1006 if (mp == NULL)
1007 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001008
1009 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001010 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1011 if (rtnh == NULL)
1012 goto nla_put_failure;
1013
1014 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1015 rtnh->rtnh_hops = nh->nh_weight - 1;
1016 rtnh->rtnh_ifindex = nh->nh_oif;
1017
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001019 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001020#ifdef CONFIG_NET_CLS_ROUTE
1021 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001022 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001023#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001024 /* length of rtnetlink header + attributes */
1025 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001027
1028 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 }
1030#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001031 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032
Thomas Grafbe403ea2006-08-17 18:15:17 -07001033nla_put_failure:
1034 return nlmsg_cancel(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035}
1036
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037/*
1038 Update FIB if:
1039 - local address disappeared -> we must delete all the entries
1040 referring to it.
1041 - device went down -> we must shutdown all nexthops going via it.
1042 */
1043
Al Viro81f7bf62006-09-27 18:40:00 -07001044int fib_sync_down(__be32 local, struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001045{
1046 int ret = 0;
1047 int scope = RT_SCOPE_NOWHERE;
1048
1049 if (force)
1050 scope = -1;
1051
1052 if (local && fib_info_laddrhash) {
1053 unsigned int hash = fib_laddr_hashfn(local);
1054 struct hlist_head *head = &fib_info_laddrhash[hash];
1055 struct hlist_node *node;
1056 struct fib_info *fi;
1057
1058 hlist_for_each_entry(fi, node, head, fib_lhash) {
1059 if (fi->fib_prefsrc == local) {
1060 fi->fib_flags |= RTNH_F_DEAD;
1061 ret++;
1062 }
1063 }
1064 }
1065
1066 if (dev) {
1067 struct fib_info *prev_fi = NULL;
1068 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1069 struct hlist_head *head = &fib_info_devhash[hash];
1070 struct hlist_node *node;
1071 struct fib_nh *nh;
1072
1073 hlist_for_each_entry(nh, node, head, nh_hash) {
1074 struct fib_info *fi = nh->nh_parent;
1075 int dead;
1076
1077 BUG_ON(!fi->fib_nhs);
1078 if (nh->nh_dev != dev || fi == prev_fi)
1079 continue;
1080 prev_fi = fi;
1081 dead = 0;
1082 change_nexthops(fi) {
1083 if (nh->nh_flags&RTNH_F_DEAD)
1084 dead++;
1085 else if (nh->nh_dev == dev &&
1086 nh->nh_scope != scope) {
1087 nh->nh_flags |= RTNH_F_DEAD;
1088#ifdef CONFIG_IP_ROUTE_MULTIPATH
1089 spin_lock_bh(&fib_multipath_lock);
1090 fi->fib_power -= nh->nh_power;
1091 nh->nh_power = 0;
1092 spin_unlock_bh(&fib_multipath_lock);
1093#endif
1094 dead++;
1095 }
1096#ifdef CONFIG_IP_ROUTE_MULTIPATH
1097 if (force > 1 && nh->nh_dev == dev) {
1098 dead = fi->fib_nhs;
1099 break;
1100 }
1101#endif
1102 } endfor_nexthops(fi)
1103 if (dead == fi->fib_nhs) {
1104 fi->fib_flags |= RTNH_F_DEAD;
1105 ret++;
1106 }
1107 }
1108 }
1109
1110 return ret;
1111}
1112
1113#ifdef CONFIG_IP_ROUTE_MULTIPATH
1114
1115/*
1116 Dead device goes up. We wake up dead nexthops.
1117 It takes sense only on multipath routes.
1118 */
1119
1120int fib_sync_up(struct net_device *dev)
1121{
1122 struct fib_info *prev_fi;
1123 unsigned int hash;
1124 struct hlist_head *head;
1125 struct hlist_node *node;
1126 struct fib_nh *nh;
1127 int ret;
1128
1129 if (!(dev->flags&IFF_UP))
1130 return 0;
1131
1132 prev_fi = NULL;
1133 hash = fib_devindex_hashfn(dev->ifindex);
1134 head = &fib_info_devhash[hash];
1135 ret = 0;
1136
1137 hlist_for_each_entry(nh, node, head, nh_hash) {
1138 struct fib_info *fi = nh->nh_parent;
1139 int alive;
1140
1141 BUG_ON(!fi->fib_nhs);
1142 if (nh->nh_dev != dev || fi == prev_fi)
1143 continue;
1144
1145 prev_fi = fi;
1146 alive = 0;
1147 change_nexthops(fi) {
1148 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1149 alive++;
1150 continue;
1151 }
1152 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1153 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001154 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 continue;
1156 alive++;
1157 spin_lock_bh(&fib_multipath_lock);
1158 nh->nh_power = 0;
1159 nh->nh_flags &= ~RTNH_F_DEAD;
1160 spin_unlock_bh(&fib_multipath_lock);
1161 } endfor_nexthops(fi)
1162
1163 if (alive > 0) {
1164 fi->fib_flags &= ~RTNH_F_DEAD;
1165 ret++;
1166 }
1167 }
1168
1169 return ret;
1170}
1171
1172/*
1173 The algorithm is suboptimal, but it provides really
1174 fair weighted route distribution.
1175 */
1176
1177void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1178{
1179 struct fib_info *fi = res->fi;
1180 int w;
1181
1182 spin_lock_bh(&fib_multipath_lock);
1183 if (fi->fib_power <= 0) {
1184 int power = 0;
1185 change_nexthops(fi) {
1186 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1187 power += nh->nh_weight;
1188 nh->nh_power = nh->nh_weight;
1189 }
1190 } endfor_nexthops(fi);
1191 fi->fib_power = power;
1192 if (power <= 0) {
1193 spin_unlock_bh(&fib_multipath_lock);
1194 /* Race condition: route has just become dead. */
1195 res->nh_sel = 0;
1196 return;
1197 }
1198 }
1199
1200
1201 /* w should be random number [0..fi->fib_power-1],
1202 it is pretty bad approximation.
1203 */
1204
1205 w = jiffies % fi->fib_power;
1206
1207 change_nexthops(fi) {
1208 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1209 if ((w -= nh->nh_power) <= 0) {
1210 nh->nh_power--;
1211 fi->fib_power--;
1212 res->nh_sel = nhsel;
1213 spin_unlock_bh(&fib_multipath_lock);
1214 return;
1215 }
1216 }
1217 } endfor_nexthops(fi);
1218
1219 /* Race condition: route has just become dead. */
1220 res->nh_sel = 0;
1221 spin_unlock_bh(&fib_multipath_lock);
1222}
1223#endif