blob: c1263e23876be91c573e94fa25242cb206dafb00 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
8 * Version: $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9 *
10 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <asm/uaccess.h>
19#include <asm/system.h>
20#include <linux/bitops.h>
21#include <linux/types.h>
22#include <linux/kernel.h>
23#include <linux/jiffies.h>
24#include <linux/mm.h>
25#include <linux/string.h>
26#include <linux/socket.h>
27#include <linux/sockios.h>
28#include <linux/errno.h>
29#include <linux/in.h>
30#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020031#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include <linux/netdevice.h>
33#include <linux/if_arp.h>
34#include <linux/proc_fs.h>
35#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <linux/init.h>
37
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020038#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039#include <net/ip.h>
40#include <net/protocol.h>
41#include <net/route.h>
42#include <net/tcp.h>
43#include <net/sock.h>
44#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070045#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070046#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070047
48#include "fib_lookup.h"
49
50#define FSprintk(a...)
51
Stephen Hemminger832b4c52006-08-29 16:48:09 -070052static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070053static struct hlist_head *fib_info_hash;
54static struct hlist_head *fib_info_laddrhash;
55static unsigned int fib_hash_size;
56static unsigned int fib_info_cnt;
57
58#define DEVINDEX_HASHBITS 8
59#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
60static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
61
62#ifdef CONFIG_IP_ROUTE_MULTIPATH
63
64static DEFINE_SPINLOCK(fib_multipath_lock);
65
66#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
67for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
68
69#define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
70for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
71
72#else /* CONFIG_IP_ROUTE_MULTIPATH */
73
74/* Hope, that gcc will optimize it to get rid of dummy loop */
75
76#define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
77for (nhsel=0; nhsel < 1; nhsel++)
78
79#define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
80for (nhsel=0; nhsel < 1; nhsel++)
81
82#endif /* CONFIG_IP_ROUTE_MULTIPATH */
83
84#define endfor_nexthops(fi) }
85
86
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090087static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070088{
89 int error;
90 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070091} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090092 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070093 .error = 0,
94 .scope = RT_SCOPE_NOWHERE,
95 }, /* RTN_UNSPEC */
96 {
97 .error = 0,
98 .scope = RT_SCOPE_UNIVERSE,
99 }, /* RTN_UNICAST */
100 {
101 .error = 0,
102 .scope = RT_SCOPE_HOST,
103 }, /* RTN_LOCAL */
104 {
105 .error = 0,
106 .scope = RT_SCOPE_LINK,
107 }, /* RTN_BROADCAST */
108 {
109 .error = 0,
110 .scope = RT_SCOPE_LINK,
111 }, /* RTN_ANYCAST */
112 {
113 .error = 0,
114 .scope = RT_SCOPE_UNIVERSE,
115 }, /* RTN_MULTICAST */
116 {
117 .error = -EINVAL,
118 .scope = RT_SCOPE_UNIVERSE,
119 }, /* RTN_BLACKHOLE */
120 {
121 .error = -EHOSTUNREACH,
122 .scope = RT_SCOPE_UNIVERSE,
123 }, /* RTN_UNREACHABLE */
124 {
125 .error = -EACCES,
126 .scope = RT_SCOPE_UNIVERSE,
127 }, /* RTN_PROHIBIT */
128 {
129 .error = -EAGAIN,
130 .scope = RT_SCOPE_UNIVERSE,
131 }, /* RTN_THROW */
132 {
133 .error = -EINVAL,
134 .scope = RT_SCOPE_NOWHERE,
135 }, /* RTN_NAT */
136 {
137 .error = -EINVAL,
138 .scope = RT_SCOPE_NOWHERE,
139 }, /* RTN_XRESOLVE */
140};
141
142
143/* Release a nexthop info record */
144
145void free_fib_info(struct fib_info *fi)
146{
147 if (fi->fib_dead == 0) {
148 printk("Freeing alive fib_info %p\n", fi);
149 return;
150 }
151 change_nexthops(fi) {
152 if (nh->nh_dev)
153 dev_put(nh->nh_dev);
154 nh->nh_dev = NULL;
155 } endfor_nexthops(fi);
156 fib_info_cnt--;
157 kfree(fi);
158}
159
160void fib_release_info(struct fib_info *fi)
161{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700162 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 if (fi && --fi->fib_treeref == 0) {
164 hlist_del(&fi->fib_hash);
165 if (fi->fib_prefsrc)
166 hlist_del(&fi->fib_lhash);
167 change_nexthops(fi) {
168 if (!nh->nh_dev)
169 continue;
170 hlist_del(&nh->nh_hash);
171 } endfor_nexthops(fi)
172 fi->fib_dead = 1;
173 fib_info_put(fi);
174 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700175 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176}
177
178static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
179{
180 const struct fib_nh *onh = ofi->fib_nh;
181
182 for_nexthops(fi) {
183 if (nh->nh_oif != onh->nh_oif ||
184 nh->nh_gw != onh->nh_gw ||
185 nh->nh_scope != onh->nh_scope ||
186#ifdef CONFIG_IP_ROUTE_MULTIPATH
187 nh->nh_weight != onh->nh_weight ||
188#endif
189#ifdef CONFIG_NET_CLS_ROUTE
190 nh->nh_tclassid != onh->nh_tclassid ||
191#endif
192 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
193 return -1;
194 onh++;
195 } endfor_nexthops(fi);
196 return 0;
197}
198
199static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
200{
201 unsigned int mask = (fib_hash_size - 1);
202 unsigned int val = fi->fib_nhs;
203
204 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700205 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 val ^= fi->fib_priority;
207
208 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
209}
210
211static struct fib_info *fib_find_info(const struct fib_info *nfi)
212{
213 struct hlist_head *head;
214 struct hlist_node *node;
215 struct fib_info *fi;
216 unsigned int hash;
217
218 hash = fib_info_hashfn(nfi);
219 head = &fib_info_hash[hash];
220
221 hlist_for_each_entry(fi, node, head, fib_hash) {
222 if (fi->fib_nhs != nfi->fib_nhs)
223 continue;
224 if (nfi->fib_protocol == fi->fib_protocol &&
225 nfi->fib_prefsrc == fi->fib_prefsrc &&
226 nfi->fib_priority == fi->fib_priority &&
227 memcmp(nfi->fib_metrics, fi->fib_metrics,
228 sizeof(fi->fib_metrics)) == 0 &&
229 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
230 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
231 return fi;
232 }
233
234 return NULL;
235}
236
237static inline unsigned int fib_devindex_hashfn(unsigned int val)
238{
239 unsigned int mask = DEVINDEX_HASHSIZE - 1;
240
241 return (val ^
242 (val >> DEVINDEX_HASHBITS) ^
243 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
244}
245
246/* Check, that the gateway is already configured.
247 Used only by redirect accept routine.
248 */
249
Al Virod878e72e2006-09-26 22:18:13 -0700250int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251{
252 struct hlist_head *head;
253 struct hlist_node *node;
254 struct fib_nh *nh;
255 unsigned int hash;
256
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700257 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258
259 hash = fib_devindex_hashfn(dev->ifindex);
260 head = &fib_info_devhash[hash];
261 hlist_for_each_entry(nh, node, head, nh_hash) {
262 if (nh->nh_dev == dev &&
263 nh->nh_gw == gw &&
264 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700265 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700266 return 0;
267 }
268 }
269
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700270 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
272 return -1;
273}
274
Thomas Graf339bf982006-11-10 14:10:15 -0800275static inline size_t fib_nlmsg_size(struct fib_info *fi)
276{
277 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
278 + nla_total_size(4) /* RTA_TABLE */
279 + nla_total_size(4) /* RTA_DST */
280 + nla_total_size(4) /* RTA_PRIORITY */
281 + nla_total_size(4); /* RTA_PREFSRC */
282
283 /* space for nested metrics */
284 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
285
286 if (fi->fib_nhs) {
287 /* Also handles the special case fib_nhs == 1 */
288
289 /* each nexthop is packed in an attribute */
290 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
291
292 /* may contain flow and gateway attribute */
293 nhsize += 2 * nla_total_size(4);
294
295 /* all nexthops are packed in a nested attribute */
296 payload += nla_total_size(fi->fib_nhs * nhsize);
297 }
298
299 return payload;
300}
301
Al Viro81f7bf62006-09-27 18:40:00 -0700302void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700303 int dst_len, u32 tb_id, struct nl_info *info,
304 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305{
306 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700307 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700308 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
Thomas Graf339bf982006-11-10 14:10:15 -0800310 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700311 if (skb == NULL)
312 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
Thomas Graf4e902c52006-08-17 18:14:52 -0700314 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700315 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700316 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800317 if (err < 0) {
318 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
319 WARN_ON(err == -EMSGSIZE);
320 kfree_skb(skb);
321 goto errout;
322 }
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800323 err = rtnl_notify(skb, &init_net, info->pid, RTNLGRP_IPV4_ROUTE,
Thomas Graf4e902c52006-08-17 18:14:52 -0700324 info->nlh, GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700325errout:
326 if (err < 0)
Denis V. Lunev97c53ca2007-11-19 22:26:51 -0800327 rtnl_set_sk_err(&init_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328}
329
330/* Return the first fib alias matching TOS with
331 * priority less than or equal to PRIO.
332 */
333struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
334{
335 if (fah) {
336 struct fib_alias *fa;
337 list_for_each_entry(fa, fah, fa_list) {
338 if (fa->fa_tos > tos)
339 continue;
340 if (fa->fa_info->fib_priority >= prio ||
341 fa->fa_tos < tos)
342 return fa;
343 }
344 }
345 return NULL;
346}
347
348int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800349 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350{
351 struct neighbour *n;
352 int state = NUD_NONE;
353
354 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
355 if (n) {
356 state = n->nud_state;
357 neigh_release(n);
358 }
359 if (state==NUD_REACHABLE)
360 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800361 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 return 0;
363 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800364 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 *last_resort = fi;
366 *last_idx = order;
367 }
368 return 1;
369}
370
371#ifdef CONFIG_IP_ROUTE_MULTIPATH
372
Thomas Graf4e902c52006-08-17 18:14:52 -0700373static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700374{
375 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376
Thomas Graf4e902c52006-08-17 18:14:52 -0700377 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700379 rtnh = rtnh_next(rtnh, &remaining);
380 }
381
382 /* leftover implies invalid nexthop configuration, discard it */
383 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384}
385
Thomas Graf4e902c52006-08-17 18:14:52 -0700386static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
387 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700389 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700390 int attrlen;
391
392 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700394
395 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
396 nh->nh_oif = rtnh->rtnh_ifindex;
397 nh->nh_weight = rtnh->rtnh_hops + 1;
398
399 attrlen = rtnh_attrlen(rtnh);
400 if (attrlen > 0) {
401 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
402
403 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700404 nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700406 nla = nla_find(attrs, attrlen, RTA_FLOW);
407 nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700408#endif
409 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700410
411 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700413
Linus Torvalds1da177e2005-04-16 15:20:36 -0700414 return 0;
415}
416
417#endif
418
Thomas Graf4e902c52006-08-17 18:14:52 -0700419int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700420{
421#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700422 struct rtnexthop *rtnh;
423 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424#endif
425
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427 return 1;
428
Thomas Graf4e902c52006-08-17 18:14:52 -0700429 if (cfg->fc_oif || cfg->fc_gw) {
430 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
431 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 return 0;
433 return 1;
434 }
435
436#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700437 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700439
440 rtnh = cfg->fc_mp;
441 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900442
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700444 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445
Thomas Graf4e902c52006-08-17 18:14:52 -0700446 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700448
449 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700451
452 attrlen = rtnh_attrlen(rtnh);
453 if (attrlen < 0) {
454 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
455
456 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700457 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700458 return 1;
459#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700460 nla = nla_find(attrs, attrlen, RTA_FLOW);
461 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return 1;
463#endif
464 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700465
466 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 } endfor_nexthops(fi);
468#endif
469 return 0;
470}
471
472
473/*
474 Picture
475 -------
476
477 Semantics of nexthop is very messy by historical reasons.
478 We have to take into account, that:
479 a) gateway can be actually local interface address,
480 so that gatewayed route is direct.
481 b) gateway must be on-link address, possibly
482 described not by an ifaddr, but also by a direct route.
483 c) If both gateway and interface are specified, they should not
484 contradict.
485 d) If we use tunnel routes, gateway could be not on-link.
486
487 Attempt to reconcile all of these (alas, self-contradictory) conditions
488 results in pretty ugly and hairy code with obscure logic.
489
490 I chose to generalized it instead, so that the size
491 of code does not increase practically, but it becomes
492 much more general.
493 Every prefix is assigned a "scope" value: "host" is local address,
494 "link" is direct route,
495 [ ... "site" ... "interior" ... ]
496 and "universe" is true gateway route with global meaning.
497
498 Every prefix refers to a set of "nexthop"s (gw, oif),
499 where gw must have narrower scope. This recursion stops
500 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
501 which means that gw is forced to be on link.
502
503 Code is still hairy, but now it is apparently logically
504 consistent and very flexible. F.e. as by-product it allows
505 to co-exists in peace independent exterior and interior
506 routing processes.
507
508 Normally it looks as following.
509
510 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900511 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700512 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900513 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514 |-> {local prefix} (terminal node)
515 */
516
Thomas Graf4e902c52006-08-17 18:14:52 -0700517static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
518 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519{
520 int err;
521
522 if (nh->nh_gw) {
523 struct fib_result res;
524
525#ifdef CONFIG_IP_ROUTE_PERVASIVE
526 if (nh->nh_flags&RTNH_F_PERVASIVE)
527 return 0;
528#endif
529 if (nh->nh_flags&RTNH_F_ONLINK) {
530 struct net_device *dev;
531
Thomas Graf4e902c52006-08-17 18:14:52 -0700532 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533 return -EINVAL;
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800534 if (inet_addr_type(&init_net, nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700535 return -EINVAL;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700536 if ((dev = __dev_get_by_index(&init_net, nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 return -ENODEV;
538 if (!(dev->flags&IFF_UP))
539 return -ENETDOWN;
540 nh->nh_dev = dev;
541 dev_hold(dev);
542 nh->nh_scope = RT_SCOPE_LINK;
543 return 0;
544 }
545 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700546 struct flowi fl = {
547 .nl_u = {
548 .ip4_u = {
549 .daddr = nh->nh_gw,
550 .scope = cfg->fc_scope + 1,
551 },
552 },
553 .oif = nh->nh_oif,
554 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700555
556 /* It is not necessary, but requires a bit of thinking */
557 if (fl.fl4_scope < RT_SCOPE_LINK)
558 fl.fl4_scope = RT_SCOPE_LINK;
559 if ((err = fib_lookup(&fl, &res)) != 0)
560 return err;
561 }
562 err = -EINVAL;
563 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
564 goto out;
565 nh->nh_scope = res.scope;
566 nh->nh_oif = FIB_RES_OIF(res);
567 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
568 goto out;
569 dev_hold(nh->nh_dev);
570 err = -ENETDOWN;
571 if (!(nh->nh_dev->flags & IFF_UP))
572 goto out;
573 err = 0;
574out:
575 fib_res_put(&res);
576 return err;
577 } else {
578 struct in_device *in_dev;
579
580 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
581 return -EINVAL;
582
583 in_dev = inetdev_by_index(nh->nh_oif);
584 if (in_dev == NULL)
585 return -ENODEV;
586 if (!(in_dev->dev->flags&IFF_UP)) {
587 in_dev_put(in_dev);
588 return -ENETDOWN;
589 }
590 nh->nh_dev = in_dev->dev;
591 dev_hold(nh->nh_dev);
592 nh->nh_scope = RT_SCOPE_HOST;
593 in_dev_put(in_dev);
594 }
595 return 0;
596}
597
Al Viro81f7bf62006-09-27 18:40:00 -0700598static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599{
600 unsigned int mask = (fib_hash_size - 1);
601
Al Viro81f7bf62006-09-27 18:40:00 -0700602 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700603}
604
605static struct hlist_head *fib_hash_alloc(int bytes)
606{
607 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800608 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609 else
610 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800611 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612}
613
614static void fib_hash_free(struct hlist_head *hash, int bytes)
615{
616 if (!hash)
617 return;
618
619 if (bytes <= PAGE_SIZE)
620 kfree(hash);
621 else
622 free_pages((unsigned long) hash, get_order(bytes));
623}
624
625static void fib_hash_move(struct hlist_head *new_info_hash,
626 struct hlist_head *new_laddrhash,
627 unsigned int new_size)
628{
David S. Millerb7656e72005-08-05 04:12:48 -0700629 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700630 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700631 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700633 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700634 old_info_hash = fib_info_hash;
635 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 fib_hash_size = new_size;
637
638 for (i = 0; i < old_size; i++) {
639 struct hlist_head *head = &fib_info_hash[i];
640 struct hlist_node *node, *n;
641 struct fib_info *fi;
642
643 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
644 struct hlist_head *dest;
645 unsigned int new_hash;
646
647 hlist_del(&fi->fib_hash);
648
649 new_hash = fib_info_hashfn(fi);
650 dest = &new_info_hash[new_hash];
651 hlist_add_head(&fi->fib_hash, dest);
652 }
653 }
654 fib_info_hash = new_info_hash;
655
656 for (i = 0; i < old_size; i++) {
657 struct hlist_head *lhead = &fib_info_laddrhash[i];
658 struct hlist_node *node, *n;
659 struct fib_info *fi;
660
661 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
662 struct hlist_head *ldest;
663 unsigned int new_hash;
664
665 hlist_del(&fi->fib_lhash);
666
667 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
668 ldest = &new_laddrhash[new_hash];
669 hlist_add_head(&fi->fib_lhash, ldest);
670 }
671 }
672 fib_info_laddrhash = new_laddrhash;
673
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700674 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700675
676 bytes = old_size * sizeof(struct hlist_head *);
677 fib_hash_free(old_info_hash, bytes);
678 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679}
680
Thomas Graf4e902c52006-08-17 18:14:52 -0700681struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682{
683 int err;
684 struct fib_info *fi = NULL;
685 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700686 int nhs = 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687
688 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700689 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 goto err_inval;
691
692#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700693 if (cfg->fc_mp) {
694 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700695 if (nhs == 0)
696 goto err_inval;
697 }
698#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699
700 err = -ENOBUFS;
701 if (fib_info_cnt >= fib_hash_size) {
702 unsigned int new_size = fib_hash_size << 1;
703 struct hlist_head *new_info_hash;
704 struct hlist_head *new_laddrhash;
705 unsigned int bytes;
706
707 if (!new_size)
708 new_size = 1;
709 bytes = new_size * sizeof(struct hlist_head *);
710 new_info_hash = fib_hash_alloc(bytes);
711 new_laddrhash = fib_hash_alloc(bytes);
712 if (!new_info_hash || !new_laddrhash) {
713 fib_hash_free(new_info_hash, bytes);
714 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800715 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717
718 if (!fib_hash_size)
719 goto failure;
720 }
721
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700722 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 if (fi == NULL)
724 goto failure;
725 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726
Thomas Graf4e902c52006-08-17 18:14:52 -0700727 fi->fib_protocol = cfg->fc_protocol;
728 fi->fib_flags = cfg->fc_flags;
729 fi->fib_priority = cfg->fc_priority;
730 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731
732 fi->fib_nhs = nhs;
733 change_nexthops(fi) {
734 nh->nh_parent = fi;
735 } endfor_nexthops(fi)
736
Thomas Graf4e902c52006-08-17 18:14:52 -0700737 if (cfg->fc_mx) {
738 struct nlattr *nla;
739 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740
Thomas Graf4e902c52006-08-17 18:14:52 -0700741 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200742 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700743
744 if (type) {
745 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700747 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 }
750 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751
Thomas Graf4e902c52006-08-17 18:14:52 -0700752 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700754 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
755 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700757 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700759 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 goto err_inval;
761#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700762 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 goto err_inval;
764#endif
765#else
766 goto err_inval;
767#endif
768 } else {
769 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700770
771 nh->nh_oif = cfg->fc_oif;
772 nh->nh_gw = cfg->fc_gw;
773 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700775 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777#ifdef CONFIG_IP_ROUTE_MULTIPATH
778 nh->nh_weight = 1;
779#endif
780 }
781
Thomas Graf4e902c52006-08-17 18:14:52 -0700782 if (fib_props[cfg->fc_type].error) {
783 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784 goto err_inval;
785 goto link_it;
786 }
787
Thomas Graf4e902c52006-08-17 18:14:52 -0700788 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 goto err_inval;
790
Thomas Graf4e902c52006-08-17 18:14:52 -0700791 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 struct fib_nh *nh = fi->fib_nh;
793
794 /* Local address is added. */
795 if (nhs != 1 || nh->nh_gw)
796 goto err_inval;
797 nh->nh_scope = RT_SCOPE_NOWHERE;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700798 nh->nh_dev = dev_get_by_index(&init_net, fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700799 err = -ENODEV;
800 if (nh->nh_dev == NULL)
801 goto failure;
802 } else {
803 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700804 if ((err = fib_check_nh(cfg, fi, nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700805 goto failure;
806 } endfor_nexthops(fi)
807 }
808
809 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700810 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
811 fi->fib_prefsrc != cfg->fc_dst)
Eric W. Biederman6b175b22008-01-10 03:25:28 -0800812 if (inet_addr_type(&init_net, fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 goto err_inval;
814 }
815
816link_it:
817 if ((ofi = fib_find_info(fi)) != NULL) {
818 fi->fib_dead = 1;
819 free_fib_info(fi);
820 ofi->fib_treeref++;
821 return ofi;
822 }
823
824 fi->fib_treeref++;
825 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700826 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827 hlist_add_head(&fi->fib_hash,
828 &fib_info_hash[fib_info_hashfn(fi)]);
829 if (fi->fib_prefsrc) {
830 struct hlist_head *head;
831
832 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
833 hlist_add_head(&fi->fib_lhash, head);
834 }
835 change_nexthops(fi) {
836 struct hlist_head *head;
837 unsigned int hash;
838
839 if (!nh->nh_dev)
840 continue;
841 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
842 head = &fib_info_devhash[hash];
843 hlist_add_head(&nh->nh_hash, head);
844 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700845 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700846 return fi;
847
848err_inval:
849 err = -EINVAL;
850
851failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900852 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700853 fi->fib_dead = 1;
854 free_fib_info(fi);
855 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700856
857 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858}
859
Robert Olssone5b43762005-08-25 13:01:03 -0700860/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Al Viro1ef1b8c2006-09-26 22:20:56 -0700862 struct fib_result *res, __be32 zone, __be32 mask,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863 int prefixlen)
864{
865 struct fib_alias *fa;
866 int nh_sel = 0;
867
Robert Olssone5b43762005-08-25 13:01:03 -0700868 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 int err;
870
871 if (fa->fa_tos &&
872 fa->fa_tos != flp->fl4_tos)
873 continue;
874
875 if (fa->fa_scope < flp->fl4_scope)
876 continue;
877
878 fa->fa_state |= FA_S_ACCESSED;
879
880 err = fib_props[fa->fa_type].error;
881 if (err == 0) {
882 struct fib_info *fi = fa->fa_info;
883
884 if (fi->fib_flags & RTNH_F_DEAD)
885 continue;
886
887 switch (fa->fa_type) {
888 case RTN_UNICAST:
889 case RTN_LOCAL:
890 case RTN_BROADCAST:
891 case RTN_ANYCAST:
892 case RTN_MULTICAST:
893 for_nexthops(fi) {
894 if (nh->nh_flags&RTNH_F_DEAD)
895 continue;
896 if (!flp->oif || flp->oif == nh->nh_oif)
897 break;
898 }
899#ifdef CONFIG_IP_ROUTE_MULTIPATH
900 if (nhsel < fi->fib_nhs) {
901 nh_sel = nhsel;
902 goto out_fill_res;
903 }
904#else
905 if (nhsel < 1) {
906 goto out_fill_res;
907 }
908#endif
909 endfor_nexthops(fi);
910 continue;
911
912 default:
913 printk(KERN_DEBUG "impossible 102\n");
914 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700915 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700916 }
917 return err;
918 }
919 return 1;
920
921out_fill_res:
922 res->prefixlen = prefixlen;
923 res->nh_sel = nh_sel;
924 res->type = fa->fa_type;
925 res->scope = fa->fa_scope;
926 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927 atomic_inc(&res->fi->fib_clntref);
928 return 0;
929}
930
931/* Find appropriate source address to this destination */
932
Al Virob83738a2006-09-26 22:14:15 -0700933__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934{
935 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
936}
937
Thomas Grafbe403ea2006-08-17 18:15:17 -0700938int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700939 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700940 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700942 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700944
Thomas Grafbe403ea2006-08-17 18:15:17 -0700945 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
946 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800947 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700948
949 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700950 rtm->rtm_family = AF_INET;
951 rtm->rtm_dst_len = dst_len;
952 rtm->rtm_src_len = 0;
953 rtm->rtm_tos = tos;
954 rtm->rtm_table = tb_id;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700955 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956 rtm->rtm_type = type;
957 rtm->rtm_flags = fi->fib_flags;
958 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700960
961 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700962 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700963
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700965 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
966
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700968 goto nla_put_failure;
969
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700971 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700972
Linus Torvalds1da177e2005-04-16 15:20:36 -0700973 if (fi->fib_nhs == 1) {
974 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700975 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700976
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700978 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700979#ifdef CONFIG_NET_CLS_ROUTE
980 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700981 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700982#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700983 }
984#ifdef CONFIG_IP_ROUTE_MULTIPATH
985 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700986 struct rtnexthop *rtnh;
987 struct nlattr *mp;
988
989 mp = nla_nest_start(skb, RTA_MULTIPATH);
990 if (mp == NULL)
991 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992
993 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700994 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
995 if (rtnh == NULL)
996 goto nla_put_failure;
997
998 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
999 rtnh->rtnh_hops = nh->nh_weight - 1;
1000 rtnh->rtnh_ifindex = nh->nh_oif;
1001
Linus Torvalds1da177e2005-04-16 15:20:36 -07001002 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001003 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001004#ifdef CONFIG_NET_CLS_ROUTE
1005 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001006 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001007#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001008 /* length of rtnetlink header + attributes */
1009 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001011
1012 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 }
1014#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001015 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016
Thomas Grafbe403ea2006-08-17 18:15:17 -07001017nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001018 nlmsg_cancel(skb, nlh);
1019 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020}
1021
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022/*
1023 Update FIB if:
1024 - local address disappeared -> we must delete all the entries
1025 referring to it.
1026 - device went down -> we must shutdown all nexthops going via it.
1027 */
1028
Al Viro81f7bf62006-09-27 18:40:00 -07001029int fib_sync_down(__be32 local, struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030{
1031 int ret = 0;
1032 int scope = RT_SCOPE_NOWHERE;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001033
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034 if (force)
1035 scope = -1;
1036
1037 if (local && fib_info_laddrhash) {
1038 unsigned int hash = fib_laddr_hashfn(local);
1039 struct hlist_head *head = &fib_info_laddrhash[hash];
1040 struct hlist_node *node;
1041 struct fib_info *fi;
1042
1043 hlist_for_each_entry(fi, node, head, fib_lhash) {
1044 if (fi->fib_prefsrc == local) {
1045 fi->fib_flags |= RTNH_F_DEAD;
1046 ret++;
1047 }
1048 }
1049 }
1050
1051 if (dev) {
1052 struct fib_info *prev_fi = NULL;
1053 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1054 struct hlist_head *head = &fib_info_devhash[hash];
1055 struct hlist_node *node;
1056 struct fib_nh *nh;
1057
1058 hlist_for_each_entry(nh, node, head, nh_hash) {
1059 struct fib_info *fi = nh->nh_parent;
1060 int dead;
1061
1062 BUG_ON(!fi->fib_nhs);
1063 if (nh->nh_dev != dev || fi == prev_fi)
1064 continue;
1065 prev_fi = fi;
1066 dead = 0;
1067 change_nexthops(fi) {
1068 if (nh->nh_flags&RTNH_F_DEAD)
1069 dead++;
1070 else if (nh->nh_dev == dev &&
1071 nh->nh_scope != scope) {
1072 nh->nh_flags |= RTNH_F_DEAD;
1073#ifdef CONFIG_IP_ROUTE_MULTIPATH
1074 spin_lock_bh(&fib_multipath_lock);
1075 fi->fib_power -= nh->nh_power;
1076 nh->nh_power = 0;
1077 spin_unlock_bh(&fib_multipath_lock);
1078#endif
1079 dead++;
1080 }
1081#ifdef CONFIG_IP_ROUTE_MULTIPATH
1082 if (force > 1 && nh->nh_dev == dev) {
1083 dead = fi->fib_nhs;
1084 break;
1085 }
1086#endif
1087 } endfor_nexthops(fi)
1088 if (dead == fi->fib_nhs) {
1089 fi->fib_flags |= RTNH_F_DEAD;
1090 ret++;
1091 }
1092 }
1093 }
1094
1095 return ret;
1096}
1097
1098#ifdef CONFIG_IP_ROUTE_MULTIPATH
1099
1100/*
1101 Dead device goes up. We wake up dead nexthops.
1102 It takes sense only on multipath routes.
1103 */
1104
1105int fib_sync_up(struct net_device *dev)
1106{
1107 struct fib_info *prev_fi;
1108 unsigned int hash;
1109 struct hlist_head *head;
1110 struct hlist_node *node;
1111 struct fib_nh *nh;
1112 int ret;
1113
1114 if (!(dev->flags&IFF_UP))
1115 return 0;
1116
1117 prev_fi = NULL;
1118 hash = fib_devindex_hashfn(dev->ifindex);
1119 head = &fib_info_devhash[hash];
1120 ret = 0;
1121
1122 hlist_for_each_entry(nh, node, head, nh_hash) {
1123 struct fib_info *fi = nh->nh_parent;
1124 int alive;
1125
1126 BUG_ON(!fi->fib_nhs);
1127 if (nh->nh_dev != dev || fi == prev_fi)
1128 continue;
1129
1130 prev_fi = fi;
1131 alive = 0;
1132 change_nexthops(fi) {
1133 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1134 alive++;
1135 continue;
1136 }
1137 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1138 continue;
Herbert Xue5ed6392005-10-03 14:35:55 -07001139 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 continue;
1141 alive++;
1142 spin_lock_bh(&fib_multipath_lock);
1143 nh->nh_power = 0;
1144 nh->nh_flags &= ~RTNH_F_DEAD;
1145 spin_unlock_bh(&fib_multipath_lock);
1146 } endfor_nexthops(fi)
1147
1148 if (alive > 0) {
1149 fi->fib_flags &= ~RTNH_F_DEAD;
1150 ret++;
1151 }
1152 }
1153
1154 return ret;
1155}
1156
1157/*
1158 The algorithm is suboptimal, but it provides really
1159 fair weighted route distribution.
1160 */
1161
1162void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1163{
1164 struct fib_info *fi = res->fi;
1165 int w;
1166
1167 spin_lock_bh(&fib_multipath_lock);
1168 if (fi->fib_power <= 0) {
1169 int power = 0;
1170 change_nexthops(fi) {
1171 if (!(nh->nh_flags&RTNH_F_DEAD)) {
1172 power += nh->nh_weight;
1173 nh->nh_power = nh->nh_weight;
1174 }
1175 } endfor_nexthops(fi);
1176 fi->fib_power = power;
1177 if (power <= 0) {
1178 spin_unlock_bh(&fib_multipath_lock);
1179 /* Race condition: route has just become dead. */
1180 res->nh_sel = 0;
1181 return;
1182 }
1183 }
1184
1185
1186 /* w should be random number [0..fi->fib_power-1],
1187 it is pretty bad approximation.
1188 */
1189
1190 w = jiffies % fi->fib_power;
1191
1192 change_nexthops(fi) {
1193 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1194 if ((w -= nh->nh_power) <= 0) {
1195 nh->nh_power--;
1196 fi->fib_power--;
1197 res->nh_sel = nhsel;
1198 spin_unlock_bh(&fib_multipath_lock);
1199 return;
1200 }
1201 }
1202 } endfor_nexthops(fi);
1203
1204 /* Race condition: route has just become dead. */
1205 res->nh_sel = 0;
1206 spin_unlock_bh(&fib_multipath_lock);
1207}
1208#endif