blob: 96b21011a3e49dd422360abeda56baea36562c94 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <asm/uaccess.h>
17#include <asm/system.h>
18#include <linux/bitops.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/jiffies.h>
22#include <linux/mm.h>
23#include <linux/string.h>
24#include <linux/socket.h>
25#include <linux/sockios.h>
26#include <linux/errno.h>
27#include <linux/in.h>
28#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020029#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/proc_fs.h>
33#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/init.h>
35
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020036#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include <net/ip.h>
38#include <net/protocol.h>
39#include <net/route.h>
40#include <net/tcp.h>
41#include <net/sock.h>
42#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070043#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070044#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070045
46#include "fib_lookup.h"
47
Stephen Hemminger832b4c52006-08-29 16:48:09 -070048static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070049static struct hlist_head *fib_info_hash;
50static struct hlist_head *fib_info_laddrhash;
51static unsigned int fib_hash_size;
52static unsigned int fib_info_cnt;
53
54#define DEVINDEX_HASHBITS 8
55#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
56static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
57
58#ifdef CONFIG_IP_ROUTE_MULTIPATH
59
60static DEFINE_SPINLOCK(fib_multipath_lock);
61
62#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
63for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
64
David S. Miller71fceff2010-01-15 01:16:40 -080065#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
66for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
Linus Torvalds1da177e2005-04-16 15:20:36 -070067
68#else /* CONFIG_IP_ROUTE_MULTIPATH */
69
70/* Hope, that gcc will optimize it to get rid of dummy loop */
71
Jianjun Kongd9319102008-11-03 00:23:42 -080072#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
Linus Torvalds1da177e2005-04-16 15:20:36 -070073for (nhsel=0; nhsel < 1; nhsel++)
74
David S. Miller71fceff2010-01-15 01:16:40 -080075#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
Linus Torvalds1da177e2005-04-16 15:20:36 -070076for (nhsel=0; nhsel < 1; nhsel++)
77
78#endif /* CONFIG_IP_ROUTE_MULTIPATH */
79
80#define endfor_nexthops(fi) }
81
82
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090083static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070084{
85 int error;
86 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070087} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090088 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070089 .error = 0,
90 .scope = RT_SCOPE_NOWHERE,
91 }, /* RTN_UNSPEC */
92 {
93 .error = 0,
94 .scope = RT_SCOPE_UNIVERSE,
95 }, /* RTN_UNICAST */
96 {
97 .error = 0,
98 .scope = RT_SCOPE_HOST,
99 }, /* RTN_LOCAL */
100 {
101 .error = 0,
102 .scope = RT_SCOPE_LINK,
103 }, /* RTN_BROADCAST */
104 {
105 .error = 0,
106 .scope = RT_SCOPE_LINK,
107 }, /* RTN_ANYCAST */
108 {
109 .error = 0,
110 .scope = RT_SCOPE_UNIVERSE,
111 }, /* RTN_MULTICAST */
112 {
113 .error = -EINVAL,
114 .scope = RT_SCOPE_UNIVERSE,
115 }, /* RTN_BLACKHOLE */
116 {
117 .error = -EHOSTUNREACH,
118 .scope = RT_SCOPE_UNIVERSE,
119 }, /* RTN_UNREACHABLE */
120 {
121 .error = -EACCES,
122 .scope = RT_SCOPE_UNIVERSE,
123 }, /* RTN_PROHIBIT */
124 {
125 .error = -EAGAIN,
126 .scope = RT_SCOPE_UNIVERSE,
127 }, /* RTN_THROW */
128 {
129 .error = -EINVAL,
130 .scope = RT_SCOPE_NOWHERE,
131 }, /* RTN_NAT */
132 {
133 .error = -EINVAL,
134 .scope = RT_SCOPE_NOWHERE,
135 }, /* RTN_XRESOLVE */
136};
137
138
139/* Release a nexthop info record */
140
141void free_fib_info(struct fib_info *fi)
142{
143 if (fi->fib_dead == 0) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800144 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 return;
146 }
147 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800148 if (nexthop_nh->nh_dev)
149 dev_put(nexthop_nh->nh_dev);
150 nexthop_nh->nh_dev = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 } endfor_nexthops(fi);
152 fib_info_cnt--;
Denis V. Lunev57d7a602008-04-16 02:00:50 -0700153 release_net(fi->fib_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 kfree(fi);
155}
156
157void fib_release_info(struct fib_info *fi)
158{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700159 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 if (fi && --fi->fib_treeref == 0) {
161 hlist_del(&fi->fib_hash);
162 if (fi->fib_prefsrc)
163 hlist_del(&fi->fib_lhash);
164 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800165 if (!nexthop_nh->nh_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166 continue;
David S. Miller71fceff2010-01-15 01:16:40 -0800167 hlist_del(&nexthop_nh->nh_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 } endfor_nexthops(fi)
169 fi->fib_dead = 1;
170 fib_info_put(fi);
171 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700172 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700173}
174
175static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
176{
177 const struct fib_nh *onh = ofi->fib_nh;
178
179 for_nexthops(fi) {
180 if (nh->nh_oif != onh->nh_oif ||
181 nh->nh_gw != onh->nh_gw ||
182 nh->nh_scope != onh->nh_scope ||
183#ifdef CONFIG_IP_ROUTE_MULTIPATH
184 nh->nh_weight != onh->nh_weight ||
185#endif
186#ifdef CONFIG_NET_CLS_ROUTE
187 nh->nh_tclassid != onh->nh_tclassid ||
188#endif
189 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
190 return -1;
191 onh++;
192 } endfor_nexthops(fi);
193 return 0;
194}
195
David S. Miller88ebc722008-01-12 21:49:01 -0800196static inline unsigned int fib_devindex_hashfn(unsigned int val)
197{
198 unsigned int mask = DEVINDEX_HASHSIZE - 1;
199
200 return (val ^
201 (val >> DEVINDEX_HASHBITS) ^
202 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
203}
204
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
206{
207 unsigned int mask = (fib_hash_size - 1);
208 unsigned int val = fi->fib_nhs;
209
210 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700211 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 val ^= fi->fib_priority;
David S. Miller88ebc722008-01-12 21:49:01 -0800213 for_nexthops(fi) {
214 val ^= fib_devindex_hashfn(nh->nh_oif);
215 } endfor_nexthops(fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216
217 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
218}
219
220static struct fib_info *fib_find_info(const struct fib_info *nfi)
221{
222 struct hlist_head *head;
223 struct hlist_node *node;
224 struct fib_info *fi;
225 unsigned int hash;
226
227 hash = fib_info_hashfn(nfi);
228 head = &fib_info_hash[hash];
229
230 hlist_for_each_entry(fi, node, head, fib_hash) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800231 if (!net_eq(fi->fib_net, nfi->fib_net))
Denis V. Lunev4814bdb2008-01-31 18:50:07 -0800232 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700233 if (fi->fib_nhs != nfi->fib_nhs)
234 continue;
235 if (nfi->fib_protocol == fi->fib_protocol &&
236 nfi->fib_prefsrc == fi->fib_prefsrc &&
237 nfi->fib_priority == fi->fib_priority &&
238 memcmp(nfi->fib_metrics, fi->fib_metrics,
239 sizeof(fi->fib_metrics)) == 0 &&
240 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
241 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
242 return fi;
243 }
244
245 return NULL;
246}
247
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248/* Check, that the gateway is already configured.
249 Used only by redirect accept routine.
250 */
251
Al Virod878e72e2006-09-26 22:18:13 -0700252int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700253{
254 struct hlist_head *head;
255 struct hlist_node *node;
256 struct fib_nh *nh;
257 unsigned int hash;
258
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700259 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260
261 hash = fib_devindex_hashfn(dev->ifindex);
262 head = &fib_info_devhash[hash];
263 hlist_for_each_entry(nh, node, head, nh_hash) {
264 if (nh->nh_dev == dev &&
265 nh->nh_gw == gw &&
266 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700267 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 return 0;
269 }
270 }
271
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700272 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273
274 return -1;
275}
276
Thomas Graf339bf982006-11-10 14:10:15 -0800277static inline size_t fib_nlmsg_size(struct fib_info *fi)
278{
279 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
280 + nla_total_size(4) /* RTA_TABLE */
281 + nla_total_size(4) /* RTA_DST */
282 + nla_total_size(4) /* RTA_PRIORITY */
283 + nla_total_size(4); /* RTA_PREFSRC */
284
285 /* space for nested metrics */
286 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
287
288 if (fi->fib_nhs) {
289 /* Also handles the special case fib_nhs == 1 */
290
291 /* each nexthop is packed in an attribute */
292 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
293
294 /* may contain flow and gateway attribute */
295 nhsize += 2 * nla_total_size(4);
296
297 /* all nexthops are packed in a nested attribute */
298 payload += nla_total_size(fi->fib_nhs * nhsize);
299 }
300
301 return payload;
302}
303
Al Viro81f7bf62006-09-27 18:40:00 -0700304void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700305 int dst_len, u32 tb_id, struct nl_info *info,
306 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700307{
308 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700309 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700310 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700311
Thomas Graf339bf982006-11-10 14:10:15 -0800312 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700313 if (skb == NULL)
314 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315
Thomas Graf4e902c52006-08-17 18:14:52 -0700316 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700317 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700318 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800319 if (err < 0) {
320 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
321 WARN_ON(err == -EMSGSIZE);
322 kfree_skb(skb);
323 goto errout;
324 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -0800325 rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
326 info->nlh, GFP_KERNEL);
327 return;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700328errout:
329 if (err < 0)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331}
332
333/* Return the first fib alias matching TOS with
334 * priority less than or equal to PRIO.
335 */
336struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
337{
338 if (fah) {
339 struct fib_alias *fa;
340 list_for_each_entry(fa, fah, fa_list) {
341 if (fa->fa_tos > tos)
342 continue;
343 if (fa->fa_info->fib_priority >= prio ||
344 fa->fa_tos < tos)
345 return fa;
346 }
347 }
348 return NULL;
349}
350
351int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800352 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353{
354 struct neighbour *n;
355 int state = NUD_NONE;
356
357 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
358 if (n) {
359 state = n->nud_state;
360 neigh_release(n);
361 }
Jianjun Kongd9319102008-11-03 00:23:42 -0800362 if (state == NUD_REACHABLE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700363 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800364 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 return 0;
366 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800367 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700368 *last_resort = fi;
369 *last_idx = order;
370 }
371 return 1;
372}
373
374#ifdef CONFIG_IP_ROUTE_MULTIPATH
375
Thomas Graf4e902c52006-08-17 18:14:52 -0700376static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377{
378 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700379
Thomas Graf4e902c52006-08-17 18:14:52 -0700380 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700382 rtnh = rtnh_next(rtnh, &remaining);
383 }
384
385 /* leftover implies invalid nexthop configuration, discard it */
386 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387}
388
Thomas Graf4e902c52006-08-17 18:14:52 -0700389static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
390 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700393 int attrlen;
394
395 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700397
David S. Miller71fceff2010-01-15 01:16:40 -0800398 nexthop_nh->nh_flags =
399 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
400 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
401 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700402
403 attrlen = rtnh_attrlen(rtnh);
404 if (attrlen > 0) {
405 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
406
407 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
David S. Miller71fceff2010-01-15 01:16:40 -0800408 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700410 nla = nla_find(attrs, attrlen, RTA_FLOW);
David S. Miller71fceff2010-01-15 01:16:40 -0800411 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412#endif
413 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700414
415 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700417
Linus Torvalds1da177e2005-04-16 15:20:36 -0700418 return 0;
419}
420
421#endif
422
Thomas Graf4e902c52006-08-17 18:14:52 -0700423int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700424{
425#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700426 struct rtnexthop *rtnh;
427 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700428#endif
429
Thomas Graf4e902c52006-08-17 18:14:52 -0700430 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431 return 1;
432
Thomas Graf4e902c52006-08-17 18:14:52 -0700433 if (cfg->fc_oif || cfg->fc_gw) {
434 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
435 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 return 0;
437 return 1;
438 }
439
440#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700441 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700443
444 rtnh = cfg->fc_mp;
445 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700448 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700449
Thomas Graf4e902c52006-08-17 18:14:52 -0700450 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700452
453 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700455
456 attrlen = rtnh_attrlen(rtnh);
457 if (attrlen < 0) {
458 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
459
460 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700461 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700462 return 1;
463#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700464 nla = nla_find(attrs, attrlen, RTA_FLOW);
465 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 return 1;
467#endif
468 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700469
470 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 } endfor_nexthops(fi);
472#endif
473 return 0;
474}
475
476
477/*
478 Picture
479 -------
480
481 Semantics of nexthop is very messy by historical reasons.
482 We have to take into account, that:
483 a) gateway can be actually local interface address,
484 so that gatewayed route is direct.
485 b) gateway must be on-link address, possibly
486 described not by an ifaddr, but also by a direct route.
487 c) If both gateway and interface are specified, they should not
488 contradict.
489 d) If we use tunnel routes, gateway could be not on-link.
490
491 Attempt to reconcile all of these (alas, self-contradictory) conditions
492 results in pretty ugly and hairy code with obscure logic.
493
494 I chose to generalized it instead, so that the size
495 of code does not increase practically, but it becomes
496 much more general.
497 Every prefix is assigned a "scope" value: "host" is local address,
498 "link" is direct route,
499 [ ... "site" ... "interior" ... ]
500 and "universe" is true gateway route with global meaning.
501
502 Every prefix refers to a set of "nexthop"s (gw, oif),
503 where gw must have narrower scope. This recursion stops
504 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
505 which means that gw is forced to be on link.
506
507 Code is still hairy, but now it is apparently logically
508 consistent and very flexible. F.e. as by-product it allows
509 to co-exists in peace independent exterior and interior
510 routing processes.
511
512 Normally it looks as following.
513
514 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900515 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900517 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 |-> {local prefix} (terminal node)
519 */
520
Thomas Graf4e902c52006-08-17 18:14:52 -0700521static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
522 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523{
524 int err;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800525 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526
Denis V. Lunev86167a32008-01-21 17:34:00 -0800527 net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700528 if (nh->nh_gw) {
529 struct fib_result res;
530
531#ifdef CONFIG_IP_ROUTE_PERVASIVE
532 if (nh->nh_flags&RTNH_F_PERVASIVE)
533 return 0;
534#endif
535 if (nh->nh_flags&RTNH_F_ONLINK) {
536 struct net_device *dev;
537
Thomas Graf4e902c52006-08-17 18:14:52 -0700538 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800540 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800542 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 return -ENODEV;
544 if (!(dev->flags&IFF_UP))
545 return -ENETDOWN;
546 nh->nh_dev = dev;
547 dev_hold(dev);
548 nh->nh_scope = RT_SCOPE_LINK;
549 return 0;
550 }
551 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700552 struct flowi fl = {
553 .nl_u = {
554 .ip4_u = {
555 .daddr = nh->nh_gw,
556 .scope = cfg->fc_scope + 1,
557 },
558 },
559 .oif = nh->nh_oif,
560 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700561
562 /* It is not necessary, but requires a bit of thinking */
563 if (fl.fl4_scope < RT_SCOPE_LINK)
564 fl.fl4_scope = RT_SCOPE_LINK;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800565 if ((err = fib_lookup(net, &fl, &res)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 return err;
567 }
568 err = -EINVAL;
569 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
570 goto out;
571 nh->nh_scope = res.scope;
572 nh->nh_oif = FIB_RES_OIF(res);
573 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
574 goto out;
575 dev_hold(nh->nh_dev);
576 err = -ENETDOWN;
577 if (!(nh->nh_dev->flags & IFF_UP))
578 goto out;
579 err = 0;
580out:
581 fib_res_put(&res);
582 return err;
583 } else {
584 struct in_device *in_dev;
585
586 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
587 return -EINVAL;
588
Denis V. Lunev86167a32008-01-21 17:34:00 -0800589 in_dev = inetdev_by_index(net, nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 if (in_dev == NULL)
591 return -ENODEV;
592 if (!(in_dev->dev->flags&IFF_UP)) {
593 in_dev_put(in_dev);
594 return -ENETDOWN;
595 }
596 nh->nh_dev = in_dev->dev;
597 dev_hold(nh->nh_dev);
598 nh->nh_scope = RT_SCOPE_HOST;
599 in_dev_put(in_dev);
600 }
601 return 0;
602}
603
Al Viro81f7bf62006-09-27 18:40:00 -0700604static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605{
606 unsigned int mask = (fib_hash_size - 1);
607
Al Viro81f7bf62006-09-27 18:40:00 -0700608 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700609}
610
611static struct hlist_head *fib_hash_alloc(int bytes)
612{
613 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800614 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 else
616 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800617 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618}
619
620static void fib_hash_free(struct hlist_head *hash, int bytes)
621{
622 if (!hash)
623 return;
624
625 if (bytes <= PAGE_SIZE)
626 kfree(hash);
627 else
628 free_pages((unsigned long) hash, get_order(bytes));
629}
630
631static void fib_hash_move(struct hlist_head *new_info_hash,
632 struct hlist_head *new_laddrhash,
633 unsigned int new_size)
634{
David S. Millerb7656e72005-08-05 04:12:48 -0700635 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700637 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700639 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700640 old_info_hash = fib_info_hash;
641 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 fib_hash_size = new_size;
643
644 for (i = 0; i < old_size; i++) {
645 struct hlist_head *head = &fib_info_hash[i];
646 struct hlist_node *node, *n;
647 struct fib_info *fi;
648
649 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
650 struct hlist_head *dest;
651 unsigned int new_hash;
652
653 hlist_del(&fi->fib_hash);
654
655 new_hash = fib_info_hashfn(fi);
656 dest = &new_info_hash[new_hash];
657 hlist_add_head(&fi->fib_hash, dest);
658 }
659 }
660 fib_info_hash = new_info_hash;
661
662 for (i = 0; i < old_size; i++) {
663 struct hlist_head *lhead = &fib_info_laddrhash[i];
664 struct hlist_node *node, *n;
665 struct fib_info *fi;
666
667 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
668 struct hlist_head *ldest;
669 unsigned int new_hash;
670
671 hlist_del(&fi->fib_lhash);
672
673 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
674 ldest = &new_laddrhash[new_hash];
675 hlist_add_head(&fi->fib_lhash, ldest);
676 }
677 }
678 fib_info_laddrhash = new_laddrhash;
679
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700680 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700681
682 bytes = old_size * sizeof(struct hlist_head *);
683 fib_hash_free(old_info_hash, bytes);
684 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685}
686
Thomas Graf4e902c52006-08-17 18:14:52 -0700687struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688{
689 int err;
690 struct fib_info *fi = NULL;
691 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700692 int nhs = 1;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800693 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694
695 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700696 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697 goto err_inval;
698
699#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700700 if (cfg->fc_mp) {
701 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 if (nhs == 0)
703 goto err_inval;
704 }
705#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707 err = -ENOBUFS;
708 if (fib_info_cnt >= fib_hash_size) {
709 unsigned int new_size = fib_hash_size << 1;
710 struct hlist_head *new_info_hash;
711 struct hlist_head *new_laddrhash;
712 unsigned int bytes;
713
714 if (!new_size)
715 new_size = 1;
716 bytes = new_size * sizeof(struct hlist_head *);
717 new_info_hash = fib_hash_alloc(bytes);
718 new_laddrhash = fib_hash_alloc(bytes);
719 if (!new_info_hash || !new_laddrhash) {
720 fib_hash_free(new_info_hash, bytes);
721 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800722 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
725 if (!fib_hash_size)
726 goto failure;
727 }
728
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700729 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 if (fi == NULL)
731 goto failure;
732 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733
Denis V. Lunev57d7a602008-04-16 02:00:50 -0700734 fi->fib_net = hold_net(net);
Thomas Graf4e902c52006-08-17 18:14:52 -0700735 fi->fib_protocol = cfg->fc_protocol;
736 fi->fib_flags = cfg->fc_flags;
737 fi->fib_priority = cfg->fc_priority;
738 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739
740 fi->fib_nhs = nhs;
741 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800742 nexthop_nh->nh_parent = fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700743 } endfor_nexthops(fi)
744
Thomas Graf4e902c52006-08-17 18:14:52 -0700745 if (cfg->fc_mx) {
746 struct nlattr *nla;
747 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700748
Thomas Graf4e902c52006-08-17 18:14:52 -0700749 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200750 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700751
752 if (type) {
753 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700755 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757 }
758 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759
Thomas Graf4e902c52006-08-17 18:14:52 -0700760 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700762 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
763 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700765 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700767 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 goto err_inval;
769#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700770 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 goto err_inval;
772#endif
773#else
774 goto err_inval;
775#endif
776 } else {
777 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700778
779 nh->nh_oif = cfg->fc_oif;
780 nh->nh_gw = cfg->fc_gw;
781 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700783 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700784#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785#ifdef CONFIG_IP_ROUTE_MULTIPATH
786 nh->nh_weight = 1;
787#endif
788 }
789
Thomas Graf4e902c52006-08-17 18:14:52 -0700790 if (fib_props[cfg->fc_type].error) {
791 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 goto err_inval;
793 goto link_it;
794 }
795
Thomas Graf4e902c52006-08-17 18:14:52 -0700796 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 goto err_inval;
798
Thomas Graf4e902c52006-08-17 18:14:52 -0700799 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700800 struct fib_nh *nh = fi->fib_nh;
801
802 /* Local address is added. */
803 if (nhs != 1 || nh->nh_gw)
804 goto err_inval;
805 nh->nh_scope = RT_SCOPE_NOWHERE;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800806 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 err = -ENODEV;
808 if (nh->nh_dev == NULL)
809 goto failure;
810 } else {
811 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800812 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 goto failure;
814 } endfor_nexthops(fi)
815 }
816
817 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700818 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
819 fi->fib_prefsrc != cfg->fc_dst)
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800820 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700821 goto err_inval;
822 }
823
824link_it:
825 if ((ofi = fib_find_info(fi)) != NULL) {
826 fi->fib_dead = 1;
827 free_fib_info(fi);
828 ofi->fib_treeref++;
829 return ofi;
830 }
831
832 fi->fib_treeref++;
833 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700834 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700835 hlist_add_head(&fi->fib_hash,
836 &fib_info_hash[fib_info_hashfn(fi)]);
837 if (fi->fib_prefsrc) {
838 struct hlist_head *head;
839
840 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
841 hlist_add_head(&fi->fib_lhash, head);
842 }
843 change_nexthops(fi) {
844 struct hlist_head *head;
845 unsigned int hash;
846
David S. Miller71fceff2010-01-15 01:16:40 -0800847 if (!nexthop_nh->nh_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 continue;
David S. Miller71fceff2010-01-15 01:16:40 -0800849 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 head = &fib_info_devhash[hash];
David S. Miller71fceff2010-01-15 01:16:40 -0800851 hlist_add_head(&nexthop_nh->nh_hash, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700853 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 return fi;
855
856err_inval:
857 err = -EINVAL;
858
859failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900860 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700861 fi->fib_dead = 1;
862 free_fib_info(fi);
863 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700864
865 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866}
867
Robert Olssone5b43762005-08-25 13:01:03 -0700868/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Rami Rosene204a342009-05-18 01:19:12 +0000870 struct fib_result *res, int prefixlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871{
872 struct fib_alias *fa;
873 int nh_sel = 0;
874
Robert Olssone5b43762005-08-25 13:01:03 -0700875 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 int err;
877
878 if (fa->fa_tos &&
879 fa->fa_tos != flp->fl4_tos)
880 continue;
881
882 if (fa->fa_scope < flp->fl4_scope)
883 continue;
884
885 fa->fa_state |= FA_S_ACCESSED;
886
887 err = fib_props[fa->fa_type].error;
888 if (err == 0) {
889 struct fib_info *fi = fa->fa_info;
890
891 if (fi->fib_flags & RTNH_F_DEAD)
892 continue;
893
894 switch (fa->fa_type) {
895 case RTN_UNICAST:
896 case RTN_LOCAL:
897 case RTN_BROADCAST:
898 case RTN_ANYCAST:
899 case RTN_MULTICAST:
900 for_nexthops(fi) {
901 if (nh->nh_flags&RTNH_F_DEAD)
902 continue;
903 if (!flp->oif || flp->oif == nh->nh_oif)
904 break;
905 }
906#ifdef CONFIG_IP_ROUTE_MULTIPATH
907 if (nhsel < fi->fib_nhs) {
908 nh_sel = nhsel;
909 goto out_fill_res;
910 }
911#else
912 if (nhsel < 1) {
913 goto out_fill_res;
914 }
915#endif
916 endfor_nexthops(fi);
917 continue;
918
919 default:
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800920 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
921 fa->fa_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700923 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 }
925 return err;
926 }
927 return 1;
928
929out_fill_res:
930 res->prefixlen = prefixlen;
931 res->nh_sel = nh_sel;
932 res->type = fa->fa_type;
933 res->scope = fa->fa_scope;
934 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700935 atomic_inc(&res->fi->fib_clntref);
936 return 0;
937}
938
939/* Find appropriate source address to this destination */
940
Al Virob83738a2006-09-26 22:14:15 -0700941__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942{
943 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
944}
945
Thomas Grafbe403ea2006-08-17 18:15:17 -0700946int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700947 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700948 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700950 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700951 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
Thomas Grafbe403ea2006-08-17 18:15:17 -0700953 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
954 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800955 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700956
957 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700958 rtm->rtm_family = AF_INET;
959 rtm->rtm_dst_len = dst_len;
960 rtm->rtm_src_len = 0;
961 rtm->rtm_tos = tos;
Krzysztof Piotr Oledzki709772e2008-06-10 15:44:49 -0700962 if (tb_id < 256)
963 rtm->rtm_table = tb_id;
964 else
965 rtm->rtm_table = RT_TABLE_COMPAT;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700966 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 rtm->rtm_type = type;
968 rtm->rtm_flags = fi->fib_flags;
969 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700971
972 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700973 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700974
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700976 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
977
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700979 goto nla_put_failure;
980
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700982 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 if (fi->fib_nhs == 1) {
985 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700986 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700987
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700989 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700990#ifdef CONFIG_NET_CLS_ROUTE
991 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700992 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700993#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 }
995#ifdef CONFIG_IP_ROUTE_MULTIPATH
996 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700997 struct rtnexthop *rtnh;
998 struct nlattr *mp;
999
1000 mp = nla_nest_start(skb, RTA_MULTIPATH);
1001 if (mp == NULL)
1002 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003
1004 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001005 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1006 if (rtnh == NULL)
1007 goto nla_put_failure;
1008
1009 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1010 rtnh->rtnh_hops = nh->nh_weight - 1;
1011 rtnh->rtnh_ifindex = nh->nh_oif;
1012
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001014 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001015#ifdef CONFIG_NET_CLS_ROUTE
1016 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001017 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001018#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001019 /* length of rtnetlink header + attributes */
1020 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001022
1023 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 }
1025#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001026 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027
Thomas Grafbe403ea2006-08-17 18:15:17 -07001028nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001029 nlmsg_cancel(skb, nlh);
1030 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031}
1032
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033/*
1034 Update FIB if:
1035 - local address disappeared -> we must delete all the entries
1036 referring to it.
1037 - device went down -> we must shutdown all nexthops going via it.
1038 */
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001039int fib_sync_down_addr(struct net *net, __be32 local)
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001040{
1041 int ret = 0;
1042 unsigned int hash = fib_laddr_hashfn(local);
1043 struct hlist_head *head = &fib_info_laddrhash[hash];
1044 struct hlist_node *node;
1045 struct fib_info *fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001046
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001047 if (fib_info_laddrhash == NULL || local == 0)
1048 return 0;
1049
1050 hlist_for_each_entry(fi, node, head, fib_lhash) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08001051 if (!net_eq(fi->fib_net, net))
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001052 continue;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001053 if (fi->fib_prefsrc == local) {
1054 fi->fib_flags |= RTNH_F_DEAD;
1055 ret++;
1056 }
1057 }
1058 return ret;
1059}
1060
1061int fib_sync_down_dev(struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001062{
1063 int ret = 0;
1064 int scope = RT_SCOPE_NOWHERE;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001065 struct fib_info *prev_fi = NULL;
1066 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1067 struct hlist_head *head = &fib_info_devhash[hash];
1068 struct hlist_node *node;
1069 struct fib_nh *nh;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001070
Linus Torvalds1da177e2005-04-16 15:20:36 -07001071 if (force)
1072 scope = -1;
1073
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001074 hlist_for_each_entry(nh, node, head, nh_hash) {
1075 struct fib_info *fi = nh->nh_parent;
1076 int dead;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001078 BUG_ON(!fi->fib_nhs);
1079 if (nh->nh_dev != dev || fi == prev_fi)
1080 continue;
1081 prev_fi = fi;
1082 dead = 0;
1083 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001084 if (nexthop_nh->nh_flags&RTNH_F_DEAD)
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001085 dead++;
David S. Miller71fceff2010-01-15 01:16:40 -08001086 else if (nexthop_nh->nh_dev == dev &&
1087 nexthop_nh->nh_scope != scope) {
1088 nexthop_nh->nh_flags |= RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001090 spin_lock_bh(&fib_multipath_lock);
David S. Miller71fceff2010-01-15 01:16:40 -08001091 fi->fib_power -= nexthop_nh->nh_power;
1092 nexthop_nh->nh_power = 0;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001093 spin_unlock_bh(&fib_multipath_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094#endif
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001095 dead++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 }
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001097#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller71fceff2010-01-15 01:16:40 -08001098 if (force > 1 && nexthop_nh->nh_dev == dev) {
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001099 dead = fi->fib_nhs;
1100 break;
1101 }
1102#endif
1103 } endfor_nexthops(fi)
1104 if (dead == fi->fib_nhs) {
1105 fi->fib_flags |= RTNH_F_DEAD;
1106 ret++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107 }
1108 }
1109
1110 return ret;
1111}
1112
1113#ifdef CONFIG_IP_ROUTE_MULTIPATH
1114
1115/*
1116 Dead device goes up. We wake up dead nexthops.
1117 It takes sense only on multipath routes.
1118 */
1119
1120int fib_sync_up(struct net_device *dev)
1121{
1122 struct fib_info *prev_fi;
1123 unsigned int hash;
1124 struct hlist_head *head;
1125 struct hlist_node *node;
1126 struct fib_nh *nh;
1127 int ret;
1128
1129 if (!(dev->flags&IFF_UP))
1130 return 0;
1131
1132 prev_fi = NULL;
1133 hash = fib_devindex_hashfn(dev->ifindex);
1134 head = &fib_info_devhash[hash];
1135 ret = 0;
1136
1137 hlist_for_each_entry(nh, node, head, nh_hash) {
1138 struct fib_info *fi = nh->nh_parent;
1139 int alive;
1140
1141 BUG_ON(!fi->fib_nhs);
1142 if (nh->nh_dev != dev || fi == prev_fi)
1143 continue;
1144
1145 prev_fi = fi;
1146 alive = 0;
1147 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001148 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001149 alive++;
1150 continue;
1151 }
David S. Miller71fceff2010-01-15 01:16:40 -08001152 if (nexthop_nh->nh_dev == NULL ||
1153 !(nexthop_nh->nh_dev->flags&IFF_UP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 continue;
David S. Miller71fceff2010-01-15 01:16:40 -08001155 if (nexthop_nh->nh_dev != dev ||
1156 !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157 continue;
1158 alive++;
1159 spin_lock_bh(&fib_multipath_lock);
David S. Miller71fceff2010-01-15 01:16:40 -08001160 nexthop_nh->nh_power = 0;
1161 nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001162 spin_unlock_bh(&fib_multipath_lock);
1163 } endfor_nexthops(fi)
1164
1165 if (alive > 0) {
1166 fi->fib_flags &= ~RTNH_F_DEAD;
1167 ret++;
1168 }
1169 }
1170
1171 return ret;
1172}
1173
1174/*
1175 The algorithm is suboptimal, but it provides really
1176 fair weighted route distribution.
1177 */
1178
1179void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1180{
1181 struct fib_info *fi = res->fi;
1182 int w;
1183
1184 spin_lock_bh(&fib_multipath_lock);
1185 if (fi->fib_power <= 0) {
1186 int power = 0;
1187 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001188 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
1189 power += nexthop_nh->nh_weight;
1190 nexthop_nh->nh_power = nexthop_nh->nh_weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 }
1192 } endfor_nexthops(fi);
1193 fi->fib_power = power;
1194 if (power <= 0) {
1195 spin_unlock_bh(&fib_multipath_lock);
1196 /* Race condition: route has just become dead. */
1197 res->nh_sel = 0;
1198 return;
1199 }
1200 }
1201
1202
1203 /* w should be random number [0..fi->fib_power-1],
1204 it is pretty bad approximation.
1205 */
1206
1207 w = jiffies % fi->fib_power;
1208
1209 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001210 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
1211 nexthop_nh->nh_power) {
1212 if ((w -= nexthop_nh->nh_power) <= 0) {
1213 nexthop_nh->nh_power--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214 fi->fib_power--;
1215 res->nh_sel = nhsel;
1216 spin_unlock_bh(&fib_multipath_lock);
1217 return;
1218 }
1219 }
1220 } endfor_nexthops(fi);
1221
1222 /* Race condition: route has just become dead. */
1223 res->nh_sel = 0;
1224 spin_unlock_bh(&fib_multipath_lock);
1225}
1226#endif