blob: 20f09c5b31e8bba5e3580255fe5f8d541c14d374 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * IPv4 Forwarding Information Base: semantics.
7 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07008 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
14 */
15
Linus Torvalds1da177e2005-04-16 15:20:36 -070016#include <asm/uaccess.h>
17#include <asm/system.h>
18#include <linux/bitops.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
21#include <linux/jiffies.h>
22#include <linux/mm.h>
23#include <linux/string.h>
24#include <linux/socket.h>
25#include <linux/sockios.h>
26#include <linux/errno.h>
27#include <linux/in.h>
28#include <linux/inet.h>
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020029#include <linux/inetdevice.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include <linux/netdevice.h>
31#include <linux/if_arp.h>
32#include <linux/proc_fs.h>
33#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include <linux/init.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090035#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036
Arnaldo Carvalho de Melo14c85022005-12-27 02:43:12 -020037#include <net/arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include <net/ip.h>
39#include <net/protocol.h>
40#include <net/route.h>
41#include <net/tcp.h>
42#include <net/sock.h>
43#include <net/ip_fib.h>
Thomas Graff21c7bc2006-08-15 00:34:17 -070044#include <net/netlink.h>
Thomas Graf4e902c52006-08-17 18:14:52 -070045#include <net/nexthop.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070046
47#include "fib_lookup.h"
48
Stephen Hemminger832b4c52006-08-29 16:48:09 -070049static DEFINE_SPINLOCK(fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -070050static struct hlist_head *fib_info_hash;
51static struct hlist_head *fib_info_laddrhash;
52static unsigned int fib_hash_size;
53static unsigned int fib_info_cnt;
54
55#define DEVINDEX_HASHBITS 8
56#define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
57static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
58
59#ifdef CONFIG_IP_ROUTE_MULTIPATH
60
61static DEFINE_SPINLOCK(fib_multipath_lock);
62
63#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
64for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
65
David S. Miller71fceff2010-01-15 01:16:40 -080066#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \
67for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++)
Linus Torvalds1da177e2005-04-16 15:20:36 -070068
69#else /* CONFIG_IP_ROUTE_MULTIPATH */
70
71/* Hope, that gcc will optimize it to get rid of dummy loop */
72
Jianjun Kongd93191002008-11-03 00:23:42 -080073#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \
Linus Torvalds1da177e2005-04-16 15:20:36 -070074for (nhsel=0; nhsel < 1; nhsel++)
75
David S. Miller71fceff2010-01-15 01:16:40 -080076#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
Linus Torvalds1da177e2005-04-16 15:20:36 -070077for (nhsel=0; nhsel < 1; nhsel++)
78
79#endif /* CONFIG_IP_ROUTE_MULTIPATH */
80
81#define endfor_nexthops(fi) }
82
83
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090084static const struct
Linus Torvalds1da177e2005-04-16 15:20:36 -070085{
86 int error;
87 u8 scope;
Thomas Grafa0ee18b2007-03-24 20:32:54 -070088} fib_props[RTN_MAX + 1] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090089 {
Linus Torvalds1da177e2005-04-16 15:20:36 -070090 .error = 0,
91 .scope = RT_SCOPE_NOWHERE,
92 }, /* RTN_UNSPEC */
93 {
94 .error = 0,
95 .scope = RT_SCOPE_UNIVERSE,
96 }, /* RTN_UNICAST */
97 {
98 .error = 0,
99 .scope = RT_SCOPE_HOST,
100 }, /* RTN_LOCAL */
101 {
102 .error = 0,
103 .scope = RT_SCOPE_LINK,
104 }, /* RTN_BROADCAST */
105 {
106 .error = 0,
107 .scope = RT_SCOPE_LINK,
108 }, /* RTN_ANYCAST */
109 {
110 .error = 0,
111 .scope = RT_SCOPE_UNIVERSE,
112 }, /* RTN_MULTICAST */
113 {
114 .error = -EINVAL,
115 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_BLACKHOLE */
117 {
118 .error = -EHOSTUNREACH,
119 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_UNREACHABLE */
121 {
122 .error = -EACCES,
123 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_PROHIBIT */
125 {
126 .error = -EAGAIN,
127 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_THROW */
129 {
130 .error = -EINVAL,
131 .scope = RT_SCOPE_NOWHERE,
132 }, /* RTN_NAT */
133 {
134 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_XRESOLVE */
137};
138
139
140/* Release a nexthop info record */
141
142void free_fib_info(struct fib_info *fi)
143{
144 if (fi->fib_dead == 0) {
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800145 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 return;
147 }
148 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800149 if (nexthop_nh->nh_dev)
150 dev_put(nexthop_nh->nh_dev);
151 nexthop_nh->nh_dev = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700152 } endfor_nexthops(fi);
153 fib_info_cnt--;
Denis V. Lunev57d7a602008-04-16 02:00:50 -0700154 release_net(fi->fib_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700155 kfree(fi);
156}
157
158void fib_release_info(struct fib_info *fi)
159{
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700160 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700161 if (fi && --fi->fib_treeref == 0) {
162 hlist_del(&fi->fib_hash);
163 if (fi->fib_prefsrc)
164 hlist_del(&fi->fib_lhash);
165 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800166 if (!nexthop_nh->nh_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 continue;
David S. Miller71fceff2010-01-15 01:16:40 -0800168 hlist_del(&nexthop_nh->nh_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 } endfor_nexthops(fi)
170 fi->fib_dead = 1;
171 fib_info_put(fi);
172 }
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700173 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174}
175
176static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177{
178 const struct fib_nh *onh = ofi->fib_nh;
179
180 for_nexthops(fi) {
181 if (nh->nh_oif != onh->nh_oif ||
182 nh->nh_gw != onh->nh_gw ||
183 nh->nh_scope != onh->nh_scope ||
184#ifdef CONFIG_IP_ROUTE_MULTIPATH
185 nh->nh_weight != onh->nh_weight ||
186#endif
187#ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid ||
189#endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
191 return -1;
192 onh++;
193 } endfor_nexthops(fi);
194 return 0;
195}
196
David S. Miller88ebc722008-01-12 21:49:01 -0800197static inline unsigned int fib_devindex_hashfn(unsigned int val)
198{
199 unsigned int mask = DEVINDEX_HASHSIZE - 1;
200
201 return (val ^
202 (val >> DEVINDEX_HASHBITS) ^
203 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
204}
205
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
207{
208 unsigned int mask = (fib_hash_size - 1);
209 unsigned int val = fi->fib_nhs;
210
211 val ^= fi->fib_protocol;
Al Viro81f7bf62006-09-27 18:40:00 -0700212 val ^= (__force u32)fi->fib_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213 val ^= fi->fib_priority;
David S. Miller88ebc722008-01-12 21:49:01 -0800214 for_nexthops(fi) {
215 val ^= fib_devindex_hashfn(nh->nh_oif);
216 } endfor_nexthops(fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217
218 return (val ^ (val >> 7) ^ (val >> 12)) & mask;
219}
220
221static struct fib_info *fib_find_info(const struct fib_info *nfi)
222{
223 struct hlist_head *head;
224 struct hlist_node *node;
225 struct fib_info *fi;
226 unsigned int hash;
227
228 hash = fib_info_hashfn(nfi);
229 head = &fib_info_hash[hash];
230
231 hlist_for_each_entry(fi, node, head, fib_hash) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800232 if (!net_eq(fi->fib_net, nfi->fib_net))
Denis V. Lunev4814bdb2008-01-31 18:50:07 -0800233 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700234 if (fi->fib_nhs != nfi->fib_nhs)
235 continue;
236 if (nfi->fib_protocol == fi->fib_protocol &&
237 nfi->fib_prefsrc == fi->fib_prefsrc &&
238 nfi->fib_priority == fi->fib_priority &&
239 memcmp(nfi->fib_metrics, fi->fib_metrics,
240 sizeof(fi->fib_metrics)) == 0 &&
241 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
242 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
243 return fi;
244 }
245
246 return NULL;
247}
248
Linus Torvalds1da177e2005-04-16 15:20:36 -0700249/* Check, that the gateway is already configured.
250 Used only by redirect accept routine.
251 */
252
Al Virod878e72e2006-09-26 22:18:13 -0700253int ip_fib_check_default(__be32 gw, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700254{
255 struct hlist_head *head;
256 struct hlist_node *node;
257 struct fib_nh *nh;
258 unsigned int hash;
259
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700260 spin_lock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261
262 hash = fib_devindex_hashfn(dev->ifindex);
263 head = &fib_info_devhash[hash];
264 hlist_for_each_entry(nh, node, head, nh_hash) {
265 if (nh->nh_dev == dev &&
266 nh->nh_gw == gw &&
267 !(nh->nh_flags&RTNH_F_DEAD)) {
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700268 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269 return 0;
270 }
271 }
272
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700273 spin_unlock(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700274
275 return -1;
276}
277
Thomas Graf339bf982006-11-10 14:10:15 -0800278static inline size_t fib_nlmsg_size(struct fib_info *fi)
279{
280 size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
281 + nla_total_size(4) /* RTA_TABLE */
282 + nla_total_size(4) /* RTA_DST */
283 + nla_total_size(4) /* RTA_PRIORITY */
284 + nla_total_size(4); /* RTA_PREFSRC */
285
286 /* space for nested metrics */
287 payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
288
289 if (fi->fib_nhs) {
290 /* Also handles the special case fib_nhs == 1 */
291
292 /* each nexthop is packed in an attribute */
293 size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
294
295 /* may contain flow and gateway attribute */
296 nhsize += 2 * nla_total_size(4);
297
298 /* all nexthops are packed in a nested attribute */
299 payload += nla_total_size(fi->fib_nhs * nhsize);
300 }
301
302 return payload;
303}
304
Al Viro81f7bf62006-09-27 18:40:00 -0700305void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
Milan Kocianb8f55832007-05-23 14:55:06 -0700306 int dst_len, u32 tb_id, struct nl_info *info,
307 unsigned int nlm_flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308{
309 struct sk_buff *skb;
Thomas Graf4e902c52006-08-17 18:14:52 -0700310 u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700311 int err = -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312
Thomas Graf339bf982006-11-10 14:10:15 -0800313 skb = nlmsg_new(fib_nlmsg_size(fa->fa_info), GFP_KERNEL);
Thomas Graff21c7bc2006-08-15 00:34:17 -0700314 if (skb == NULL)
315 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700316
Thomas Graf4e902c52006-08-17 18:14:52 -0700317 err = fib_dump_info(skb, info->pid, seq, event, tb_id,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700318 fa->fa_type, fa->fa_scope, key, dst_len,
Milan Kocianb8f55832007-05-23 14:55:06 -0700319 fa->fa_tos, fa->fa_info, nlm_flags);
Patrick McHardy26932562007-01-31 23:16:40 -0800320 if (err < 0) {
321 /* -EMSGSIZE implies BUG in fib_nlmsg_size() */
322 WARN_ON(err == -EMSGSIZE);
323 kfree_skb(skb);
324 goto errout;
325 }
Pablo Neira Ayuso1ce85fe2009-02-24 23:18:28 -0800326 rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
327 info->nlh, GFP_KERNEL);
328 return;
Thomas Graff21c7bc2006-08-15 00:34:17 -0700329errout:
330 if (err < 0)
Denis V. Lunev4d1169c2008-01-10 03:26:13 -0800331 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332}
333
334/* Return the first fib alias matching TOS with
335 * priority less than or equal to PRIO.
336 */
337struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
338{
339 if (fah) {
340 struct fib_alias *fa;
341 list_for_each_entry(fa, fah, fa_list) {
342 if (fa->fa_tos > tos)
343 continue;
344 if (fa->fa_info->fib_priority >= prio ||
345 fa->fa_tos < tos)
346 return fa;
347 }
348 }
349 return NULL;
350}
351
352int fib_detect_death(struct fib_info *fi, int order,
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800353 struct fib_info **last_resort, int *last_idx, int dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700354{
355 struct neighbour *n;
356 int state = NUD_NONE;
357
358 n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
359 if (n) {
360 state = n->nud_state;
361 neigh_release(n);
362 }
Jianjun Kongd93191002008-11-03 00:23:42 -0800363 if (state == NUD_REACHABLE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 return 0;
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800365 if ((state&NUD_VALID) && order != dflt)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 return 0;
367 if ((state&NUD_VALID) ||
Denis V. Lunevc17860a2007-12-08 00:22:13 -0800368 (*last_idx<0 && order > dflt)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 *last_resort = fi;
370 *last_idx = order;
371 }
372 return 1;
373}
374
375#ifdef CONFIG_IP_ROUTE_MULTIPATH
376
Thomas Graf4e902c52006-08-17 18:14:52 -0700377static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378{
379 int nhs = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700380
Thomas Graf4e902c52006-08-17 18:14:52 -0700381 while (rtnh_ok(rtnh, remaining)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 nhs++;
Thomas Graf4e902c52006-08-17 18:14:52 -0700383 rtnh = rtnh_next(rtnh, &remaining);
384 }
385
386 /* leftover implies invalid nexthop configuration, discard it */
387 return remaining > 0 ? 0 : nhs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700388}
389
Thomas Graf4e902c52006-08-17 18:14:52 -0700390static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
391 int remaining, struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700392{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700393 change_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700394 int attrlen;
395
396 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700398
David S. Miller71fceff2010-01-15 01:16:40 -0800399 nexthop_nh->nh_flags =
400 (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
401 nexthop_nh->nh_oif = rtnh->rtnh_ifindex;
402 nexthop_nh->nh_weight = rtnh->rtnh_hops + 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700403
404 attrlen = rtnh_attrlen(rtnh);
405 if (attrlen > 0) {
406 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
407
408 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
David S. Miller71fceff2010-01-15 01:16:40 -0800409 nexthop_nh->nh_gw = nla ? nla_get_be32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700410#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700411 nla = nla_find(attrs, attrlen, RTA_FLOW);
David S. Miller71fceff2010-01-15 01:16:40 -0800412 nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413#endif
414 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700415
416 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700417 } endfor_nexthops(fi);
Thomas Graf4e902c52006-08-17 18:14:52 -0700418
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419 return 0;
420}
421
422#endif
423
Thomas Graf4e902c52006-08-17 18:14:52 -0700424int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425{
426#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700427 struct rtnexthop *rtnh;
428 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429#endif
430
Thomas Graf4e902c52006-08-17 18:14:52 -0700431 if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 return 1;
433
Thomas Graf4e902c52006-08-17 18:14:52 -0700434 if (cfg->fc_oif || cfg->fc_gw) {
435 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
436 (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 return 0;
438 return 1;
439 }
440
441#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700442 if (cfg->fc_mp == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 return 0;
Thomas Graf4e902c52006-08-17 18:14:52 -0700444
445 rtnh = cfg->fc_mp;
446 remaining = cfg->fc_mp_len;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900447
Linus Torvalds1da177e2005-04-16 15:20:36 -0700448 for_nexthops(fi) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700449 int attrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450
Thomas Graf4e902c52006-08-17 18:14:52 -0700451 if (!rtnh_ok(rtnh, remaining))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452 return -EINVAL;
Thomas Graf4e902c52006-08-17 18:14:52 -0700453
454 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 return 1;
Thomas Graf4e902c52006-08-17 18:14:52 -0700456
457 attrlen = rtnh_attrlen(rtnh);
458 if (attrlen < 0) {
459 struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
460
461 nla = nla_find(attrs, attrlen, RTA_GATEWAY);
Al Viro17fb2c62006-09-26 22:15:25 -0700462 if (nla && nla_get_be32(nla) != nh->nh_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700463 return 1;
464#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700465 nla = nla_find(attrs, attrlen, RTA_FLOW);
466 if (nla && nla_get_u32(nla) != nh->nh_tclassid)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 return 1;
468#endif
469 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700470
471 rtnh = rtnh_next(rtnh, &remaining);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700472 } endfor_nexthops(fi);
473#endif
474 return 0;
475}
476
477
478/*
479 Picture
480 -------
481
482 Semantics of nexthop is very messy by historical reasons.
483 We have to take into account, that:
484 a) gateway can be actually local interface address,
485 so that gatewayed route is direct.
486 b) gateway must be on-link address, possibly
487 described not by an ifaddr, but also by a direct route.
488 c) If both gateway and interface are specified, they should not
489 contradict.
490 d) If we use tunnel routes, gateway could be not on-link.
491
492 Attempt to reconcile all of these (alas, self-contradictory) conditions
493 results in pretty ugly and hairy code with obscure logic.
494
495 I chose to generalized it instead, so that the size
496 of code does not increase practically, but it becomes
497 much more general.
498 Every prefix is assigned a "scope" value: "host" is local address,
499 "link" is direct route,
500 [ ... "site" ... "interior" ... ]
501 and "universe" is true gateway route with global meaning.
502
503 Every prefix refers to a set of "nexthop"s (gw, oif),
504 where gw must have narrower scope. This recursion stops
505 when gw has LOCAL scope or if "nexthop" is declared ONLINK,
506 which means that gw is forced to be on link.
507
508 Code is still hairy, but now it is apparently logically
509 consistent and very flexible. F.e. as by-product it allows
510 to co-exists in peace independent exterior and interior
511 routing processes.
512
513 Normally it looks as following.
514
515 {universe prefix} -> (gw, oif) [scope link]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900516 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517 |-> {link prefix} -> (gw, oif) [scope local]
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900518 |
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 |-> {local prefix} (terminal node)
520 */
521
Thomas Graf4e902c52006-08-17 18:14:52 -0700522static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
523 struct fib_nh *nh)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524{
525 int err;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800526 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700527
Denis V. Lunev86167a32008-01-21 17:34:00 -0800528 net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 if (nh->nh_gw) {
530 struct fib_result res;
531
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 if (nh->nh_flags&RTNH_F_ONLINK) {
533 struct net_device *dev;
534
Thomas Graf4e902c52006-08-17 18:14:52 -0700535 if (cfg->fc_scope >= RT_SCOPE_LINK)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700536 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800537 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700538 return -EINVAL;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800539 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700540 return -ENODEV;
541 if (!(dev->flags&IFF_UP))
542 return -ENETDOWN;
543 nh->nh_dev = dev;
544 dev_hold(dev);
545 nh->nh_scope = RT_SCOPE_LINK;
546 return 0;
547 }
548 {
Thomas Graf4e902c52006-08-17 18:14:52 -0700549 struct flowi fl = {
550 .nl_u = {
551 .ip4_u = {
552 .daddr = nh->nh_gw,
553 .scope = cfg->fc_scope + 1,
554 },
555 },
556 .oif = nh->nh_oif,
557 };
Linus Torvalds1da177e2005-04-16 15:20:36 -0700558
559 /* It is not necessary, but requires a bit of thinking */
560 if (fl.fl4_scope < RT_SCOPE_LINK)
561 fl.fl4_scope = RT_SCOPE_LINK;
Denis V. Lunev86167a32008-01-21 17:34:00 -0800562 if ((err = fib_lookup(net, &fl, &res)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700563 return err;
564 }
565 err = -EINVAL;
566 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567 goto out;
568 nh->nh_scope = res.scope;
569 nh->nh_oif = FIB_RES_OIF(res);
570 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
571 goto out;
572 dev_hold(nh->nh_dev);
573 err = -ENETDOWN;
574 if (!(nh->nh_dev->flags & IFF_UP))
575 goto out;
576 err = 0;
577out:
578 fib_res_put(&res);
579 return err;
580 } else {
581 struct in_device *in_dev;
582
583 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
584 return -EINVAL;
585
Denis V. Lunev86167a32008-01-21 17:34:00 -0800586 in_dev = inetdev_by_index(net, nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700587 if (in_dev == NULL)
588 return -ENODEV;
589 if (!(in_dev->dev->flags&IFF_UP)) {
590 in_dev_put(in_dev);
591 return -ENETDOWN;
592 }
593 nh->nh_dev = in_dev->dev;
594 dev_hold(nh->nh_dev);
595 nh->nh_scope = RT_SCOPE_HOST;
596 in_dev_put(in_dev);
597 }
598 return 0;
599}
600
Al Viro81f7bf62006-09-27 18:40:00 -0700601static inline unsigned int fib_laddr_hashfn(__be32 val)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602{
603 unsigned int mask = (fib_hash_size - 1);
604
Al Viro81f7bf62006-09-27 18:40:00 -0700605 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700606}
607
608static struct hlist_head *fib_hash_alloc(int bytes)
609{
610 if (bytes <= PAGE_SIZE)
Joonwoo Park88f83492007-11-26 23:29:32 +0800611 return kzalloc(bytes, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700612 else
613 return (struct hlist_head *)
Joonwoo Park88f83492007-11-26 23:29:32 +0800614 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615}
616
617static void fib_hash_free(struct hlist_head *hash, int bytes)
618{
619 if (!hash)
620 return;
621
622 if (bytes <= PAGE_SIZE)
623 kfree(hash);
624 else
625 free_pages((unsigned long) hash, get_order(bytes));
626}
627
628static void fib_hash_move(struct hlist_head *new_info_hash,
629 struct hlist_head *new_laddrhash,
630 unsigned int new_size)
631{
David S. Millerb7656e72005-08-05 04:12:48 -0700632 struct hlist_head *old_info_hash, *old_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 unsigned int old_size = fib_hash_size;
David S. Millerb7656e72005-08-05 04:12:48 -0700634 unsigned int i, bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700636 spin_lock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700637 old_info_hash = fib_info_hash;
638 old_laddrhash = fib_info_laddrhash;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 fib_hash_size = new_size;
640
641 for (i = 0; i < old_size; i++) {
642 struct hlist_head *head = &fib_info_hash[i];
643 struct hlist_node *node, *n;
644 struct fib_info *fi;
645
646 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
647 struct hlist_head *dest;
648 unsigned int new_hash;
649
650 hlist_del(&fi->fib_hash);
651
652 new_hash = fib_info_hashfn(fi);
653 dest = &new_info_hash[new_hash];
654 hlist_add_head(&fi->fib_hash, dest);
655 }
656 }
657 fib_info_hash = new_info_hash;
658
659 for (i = 0; i < old_size; i++) {
660 struct hlist_head *lhead = &fib_info_laddrhash[i];
661 struct hlist_node *node, *n;
662 struct fib_info *fi;
663
664 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
665 struct hlist_head *ldest;
666 unsigned int new_hash;
667
668 hlist_del(&fi->fib_lhash);
669
670 new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
671 ldest = &new_laddrhash[new_hash];
672 hlist_add_head(&fi->fib_lhash, ldest);
673 }
674 }
675 fib_info_laddrhash = new_laddrhash;
676
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700677 spin_unlock_bh(&fib_info_lock);
David S. Millerb7656e72005-08-05 04:12:48 -0700678
679 bytes = old_size * sizeof(struct hlist_head *);
680 fib_hash_free(old_info_hash, bytes);
681 fib_hash_free(old_laddrhash, bytes);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682}
683
Thomas Graf4e902c52006-08-17 18:14:52 -0700684struct fib_info *fib_create_info(struct fib_config *cfg)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700685{
686 int err;
687 struct fib_info *fi = NULL;
688 struct fib_info *ofi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689 int nhs = 1;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800690 struct net *net = cfg->fc_nlinfo.nl_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700691
692 /* Fast check to catch the most weird cases */
Thomas Graf4e902c52006-08-17 18:14:52 -0700693 if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694 goto err_inval;
695
696#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700697 if (cfg->fc_mp) {
698 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 if (nhs == 0)
700 goto err_inval;
701 }
702#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703
704 err = -ENOBUFS;
705 if (fib_info_cnt >= fib_hash_size) {
706 unsigned int new_size = fib_hash_size << 1;
707 struct hlist_head *new_info_hash;
708 struct hlist_head *new_laddrhash;
709 unsigned int bytes;
710
711 if (!new_size)
712 new_size = 1;
713 bytes = new_size * sizeof(struct hlist_head *);
714 new_info_hash = fib_hash_alloc(bytes);
715 new_laddrhash = fib_hash_alloc(bytes);
716 if (!new_info_hash || !new_laddrhash) {
717 fib_hash_free(new_info_hash, bytes);
718 fib_hash_free(new_laddrhash, bytes);
Joonwoo Park88f83492007-11-26 23:29:32 +0800719 } else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 fib_hash_move(new_info_hash, new_laddrhash, new_size);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700721
722 if (!fib_hash_size)
723 goto failure;
724 }
725
Panagiotis Issaris0da974f2006-07-21 14:51:30 -0700726 fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 if (fi == NULL)
728 goto failure;
729 fib_info_cnt++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730
Denis V. Lunev57d7a602008-04-16 02:00:50 -0700731 fi->fib_net = hold_net(net);
Thomas Graf4e902c52006-08-17 18:14:52 -0700732 fi->fib_protocol = cfg->fc_protocol;
733 fi->fib_flags = cfg->fc_flags;
734 fi->fib_priority = cfg->fc_priority;
735 fi->fib_prefsrc = cfg->fc_prefsrc;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
737 fi->fib_nhs = nhs;
738 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800739 nexthop_nh->nh_parent = fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 } endfor_nexthops(fi)
741
Thomas Graf4e902c52006-08-17 18:14:52 -0700742 if (cfg->fc_mx) {
743 struct nlattr *nla;
744 int remaining;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745
Thomas Graf4e902c52006-08-17 18:14:52 -0700746 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
Thomas Graf8f4c1f92007-09-12 14:44:36 +0200747 int type = nla_type(nla);
Thomas Graf4e902c52006-08-17 18:14:52 -0700748
749 if (type) {
750 if (type > RTAX_MAX)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700752 fi->fib_metrics[type - 1] = nla_get_u32(nla);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700753 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 }
755 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756
Thomas Graf4e902c52006-08-17 18:14:52 -0700757 if (cfg->fc_mp) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700758#ifdef CONFIG_IP_ROUTE_MULTIPATH
Thomas Graf4e902c52006-08-17 18:14:52 -0700759 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
760 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761 goto failure;
Thomas Graf4e902c52006-08-17 18:14:52 -0700762 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 goto err_inval;
Thomas Graf4e902c52006-08-17 18:14:52 -0700764 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700765 goto err_inval;
766#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700767 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 goto err_inval;
769#endif
770#else
771 goto err_inval;
772#endif
773 } else {
774 struct fib_nh *nh = fi->fib_nh;
Thomas Graf4e902c52006-08-17 18:14:52 -0700775
776 nh->nh_oif = cfg->fc_oif;
777 nh->nh_gw = cfg->fc_gw;
778 nh->nh_flags = cfg->fc_flags;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779#ifdef CONFIG_NET_CLS_ROUTE
Thomas Graf4e902c52006-08-17 18:14:52 -0700780 nh->nh_tclassid = cfg->fc_flow;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782#ifdef CONFIG_IP_ROUTE_MULTIPATH
783 nh->nh_weight = 1;
784#endif
785 }
786
Thomas Graf4e902c52006-08-17 18:14:52 -0700787 if (fib_props[cfg->fc_type].error) {
788 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789 goto err_inval;
790 goto link_it;
791 }
792
Thomas Graf4e902c52006-08-17 18:14:52 -0700793 if (cfg->fc_scope > RT_SCOPE_HOST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 goto err_inval;
795
Thomas Graf4e902c52006-08-17 18:14:52 -0700796 if (cfg->fc_scope == RT_SCOPE_HOST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700797 struct fib_nh *nh = fi->fib_nh;
798
799 /* Local address is added. */
800 if (nhs != 1 || nh->nh_gw)
801 goto err_inval;
802 nh->nh_scope = RT_SCOPE_NOWHERE;
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800803 nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 err = -ENODEV;
805 if (nh->nh_dev == NULL)
806 goto failure;
807 } else {
808 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -0800809 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700810 goto failure;
811 } endfor_nexthops(fi)
812 }
813
814 if (fi->fib_prefsrc) {
Thomas Graf4e902c52006-08-17 18:14:52 -0700815 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
816 fi->fib_prefsrc != cfg->fc_dst)
Denis V. Lunev7462bd742008-01-31 18:49:32 -0800817 if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700818 goto err_inval;
819 }
820
821link_it:
822 if ((ofi = fib_find_info(fi)) != NULL) {
823 fi->fib_dead = 1;
824 free_fib_info(fi);
825 ofi->fib_treeref++;
826 return ofi;
827 }
828
829 fi->fib_treeref++;
830 atomic_inc(&fi->fib_clntref);
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700831 spin_lock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700832 hlist_add_head(&fi->fib_hash,
833 &fib_info_hash[fib_info_hashfn(fi)]);
834 if (fi->fib_prefsrc) {
835 struct hlist_head *head;
836
837 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
838 hlist_add_head(&fi->fib_lhash, head);
839 }
840 change_nexthops(fi) {
841 struct hlist_head *head;
842 unsigned int hash;
843
David S. Miller71fceff2010-01-15 01:16:40 -0800844 if (!nexthop_nh->nh_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 continue;
David S. Miller71fceff2010-01-15 01:16:40 -0800846 hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 head = &fib_info_devhash[hash];
David S. Miller71fceff2010-01-15 01:16:40 -0800848 hlist_add_head(&nexthop_nh->nh_hash, head);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 } endfor_nexthops(fi)
Stephen Hemminger832b4c52006-08-29 16:48:09 -0700850 spin_unlock_bh(&fib_info_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 return fi;
852
853err_inval:
854 err = -EINVAL;
855
856failure:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900857 if (fi) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858 fi->fib_dead = 1;
859 free_fib_info(fi);
860 }
Thomas Graf4e902c52006-08-17 18:14:52 -0700861
862 return ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863}
864
Robert Olssone5b43762005-08-25 13:01:03 -0700865/* Note! fib_semantic_match intentionally uses RCU list functions. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866int fib_semantic_match(struct list_head *head, const struct flowi *flp,
Rami Rosene204a342009-05-18 01:19:12 +0000867 struct fib_result *res, int prefixlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868{
869 struct fib_alias *fa;
870 int nh_sel = 0;
871
Robert Olssone5b43762005-08-25 13:01:03 -0700872 list_for_each_entry_rcu(fa, head, fa_list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873 int err;
874
875 if (fa->fa_tos &&
876 fa->fa_tos != flp->fl4_tos)
877 continue;
878
879 if (fa->fa_scope < flp->fl4_scope)
880 continue;
881
882 fa->fa_state |= FA_S_ACCESSED;
883
884 err = fib_props[fa->fa_type].error;
885 if (err == 0) {
886 struct fib_info *fi = fa->fa_info;
887
888 if (fi->fib_flags & RTNH_F_DEAD)
889 continue;
890
891 switch (fa->fa_type) {
892 case RTN_UNICAST:
893 case RTN_LOCAL:
894 case RTN_BROADCAST:
895 case RTN_ANYCAST:
896 case RTN_MULTICAST:
897 for_nexthops(fi) {
898 if (nh->nh_flags&RTNH_F_DEAD)
899 continue;
900 if (!flp->oif || flp->oif == nh->nh_oif)
901 break;
902 }
903#ifdef CONFIG_IP_ROUTE_MULTIPATH
904 if (nhsel < fi->fib_nhs) {
905 nh_sel = nhsel;
906 goto out_fill_res;
907 }
908#else
909 if (nhsel < 1) {
910 goto out_fill_res;
911 }
912#endif
913 endfor_nexthops(fi);
914 continue;
915
916 default:
Stephen Hemmingera6db9012008-01-12 20:58:35 -0800917 printk(KERN_WARNING "fib_semantic_match bad type %#x\n",
918 fa->fa_type);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 return -EINVAL;
Stephen Hemminger3ff50b72007-04-20 17:09:22 -0700920 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 }
922 return err;
923 }
924 return 1;
925
926out_fill_res:
927 res->prefixlen = prefixlen;
928 res->nh_sel = nh_sel;
929 res->type = fa->fa_type;
930 res->scope = fa->fa_scope;
931 res->fi = fa->fa_info;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932 atomic_inc(&res->fi->fib_clntref);
933 return 0;
934}
935
936/* Find appropriate source address to this destination */
937
Al Virob83738a2006-09-26 22:14:15 -0700938__be32 __fib_res_prefsrc(struct fib_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700939{
940 return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
941}
942
Thomas Grafbe403ea2006-08-17 18:15:17 -0700943int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
Al Viro81f7bf62006-09-27 18:40:00 -0700944 u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos,
Thomas Grafbe403ea2006-08-17 18:15:17 -0700945 struct fib_info *fi, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700946{
Thomas Grafbe403ea2006-08-17 18:15:17 -0700947 struct nlmsghdr *nlh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 struct rtmsg *rtm;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700949
Thomas Grafbe403ea2006-08-17 18:15:17 -0700950 nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
951 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -0800952 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700953
954 rtm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955 rtm->rtm_family = AF_INET;
956 rtm->rtm_dst_len = dst_len;
957 rtm->rtm_src_len = 0;
958 rtm->rtm_tos = tos;
Krzysztof Piotr Oledzki709772e2008-06-10 15:44:49 -0700959 if (tb_id < 256)
960 rtm->rtm_table = tb_id;
961 else
962 rtm->rtm_table = RT_TABLE_COMPAT;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700963 NLA_PUT_U32(skb, RTA_TABLE, tb_id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 rtm->rtm_type = type;
965 rtm->rtm_flags = fi->fib_flags;
966 rtm->rtm_scope = scope;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700967 rtm->rtm_protocol = fi->fib_protocol;
Thomas Grafbe403ea2006-08-17 18:15:17 -0700968
969 if (rtm->rtm_dst_len)
Al Viro17fb2c62006-09-26 22:15:25 -0700970 NLA_PUT_BE32(skb, RTA_DST, dst);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 if (fi->fib_priority)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700973 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
974
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975 if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700976 goto nla_put_failure;
977
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978 if (fi->fib_prefsrc)
Al Viro17fb2c62006-09-26 22:15:25 -0700979 NLA_PUT_BE32(skb, RTA_PREFSRC, fi->fib_prefsrc);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700980
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 if (fi->fib_nhs == 1) {
982 if (fi->fib_nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -0700983 NLA_PUT_BE32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
Thomas Grafbe403ea2006-08-17 18:15:17 -0700984
Linus Torvalds1da177e2005-04-16 15:20:36 -0700985 if (fi->fib_nh->nh_oif)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700986 NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700987#ifdef CONFIG_NET_CLS_ROUTE
988 if (fi->fib_nh[0].nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -0700989 NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -0700990#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 }
992#ifdef CONFIG_IP_ROUTE_MULTIPATH
993 if (fi->fib_nhs > 1) {
Thomas Grafbe403ea2006-08-17 18:15:17 -0700994 struct rtnexthop *rtnh;
995 struct nlattr *mp;
996
997 mp = nla_nest_start(skb, RTA_MULTIPATH);
998 if (mp == NULL)
999 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000
1001 for_nexthops(fi) {
Thomas Grafbe403ea2006-08-17 18:15:17 -07001002 rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
1003 if (rtnh == NULL)
1004 goto nla_put_failure;
1005
1006 rtnh->rtnh_flags = nh->nh_flags & 0xFF;
1007 rtnh->rtnh_hops = nh->nh_weight - 1;
1008 rtnh->rtnh_ifindex = nh->nh_oif;
1009
Linus Torvalds1da177e2005-04-16 15:20:36 -07001010 if (nh->nh_gw)
Al Viro17fb2c62006-09-26 22:15:25 -07001011 NLA_PUT_BE32(skb, RTA_GATEWAY, nh->nh_gw);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001012#ifdef CONFIG_NET_CLS_ROUTE
1013 if (nh->nh_tclassid)
Thomas Grafbe403ea2006-08-17 18:15:17 -07001014 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
Patrick McHardy8265abc2006-07-21 15:09:55 -07001015#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001016 /* length of rtnetlink header + attributes */
1017 rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 } endfor_nexthops(fi);
Thomas Grafbe403ea2006-08-17 18:15:17 -07001019
1020 nla_nest_end(skb, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001021 }
1022#endif
Thomas Grafbe403ea2006-08-17 18:15:17 -07001023 return nlmsg_end(skb, nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024
Thomas Grafbe403ea2006-08-17 18:15:17 -07001025nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08001026 nlmsg_cancel(skb, nlh);
1027 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001028}
1029
Linus Torvalds1da177e2005-04-16 15:20:36 -07001030/*
1031 Update FIB if:
1032 - local address disappeared -> we must delete all the entries
1033 referring to it.
1034 - device went down -> we must shutdown all nexthops going via it.
1035 */
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001036int fib_sync_down_addr(struct net *net, __be32 local)
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001037{
1038 int ret = 0;
1039 unsigned int hash = fib_laddr_hashfn(local);
1040 struct hlist_head *head = &fib_info_laddrhash[hash];
1041 struct hlist_node *node;
1042 struct fib_info *fi;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001043
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001044 if (fib_info_laddrhash == NULL || local == 0)
1045 return 0;
1046
1047 hlist_for_each_entry(fi, node, head, fib_lhash) {
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08001048 if (!net_eq(fi->fib_net, net))
Denis V. Lunev4814bdb2008-01-31 18:50:07 -08001049 continue;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001050 if (fi->fib_prefsrc == local) {
1051 fi->fib_flags |= RTNH_F_DEAD;
1052 ret++;
1053 }
1054 }
1055 return ret;
1056}
1057
1058int fib_sync_down_dev(struct net_device *dev, int force)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001059{
1060 int ret = 0;
1061 int scope = RT_SCOPE_NOWHERE;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001062 struct fib_info *prev_fi = NULL;
1063 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1064 struct hlist_head *head = &fib_info_devhash[hash];
1065 struct hlist_node *node;
1066 struct fib_nh *nh;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001067
Linus Torvalds1da177e2005-04-16 15:20:36 -07001068 if (force)
1069 scope = -1;
1070
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001071 hlist_for_each_entry(nh, node, head, nh_hash) {
1072 struct fib_info *fi = nh->nh_parent;
1073 int dead;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001075 BUG_ON(!fi->fib_nhs);
1076 if (nh->nh_dev != dev || fi == prev_fi)
1077 continue;
1078 prev_fi = fi;
1079 dead = 0;
1080 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001081 if (nexthop_nh->nh_flags&RTNH_F_DEAD)
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001082 dead++;
David S. Miller71fceff2010-01-15 01:16:40 -08001083 else if (nexthop_nh->nh_dev == dev &&
1084 nexthop_nh->nh_scope != scope) {
1085 nexthop_nh->nh_flags |= RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001086#ifdef CONFIG_IP_ROUTE_MULTIPATH
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001087 spin_lock_bh(&fib_multipath_lock);
David S. Miller71fceff2010-01-15 01:16:40 -08001088 fi->fib_power -= nexthop_nh->nh_power;
1089 nexthop_nh->nh_power = 0;
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001090 spin_unlock_bh(&fib_multipath_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001091#endif
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001092 dead++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093 }
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001094#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller71fceff2010-01-15 01:16:40 -08001095 if (force > 1 && nexthop_nh->nh_dev == dev) {
Denis V. Lunev85326fa2008-01-31 18:48:47 -08001096 dead = fi->fib_nhs;
1097 break;
1098 }
1099#endif
1100 } endfor_nexthops(fi)
1101 if (dead == fi->fib_nhs) {
1102 fi->fib_flags |= RTNH_F_DEAD;
1103 ret++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001104 }
1105 }
1106
1107 return ret;
1108}
1109
1110#ifdef CONFIG_IP_ROUTE_MULTIPATH
1111
1112/*
1113 Dead device goes up. We wake up dead nexthops.
1114 It takes sense only on multipath routes.
1115 */
1116
1117int fib_sync_up(struct net_device *dev)
1118{
1119 struct fib_info *prev_fi;
1120 unsigned int hash;
1121 struct hlist_head *head;
1122 struct hlist_node *node;
1123 struct fib_nh *nh;
1124 int ret;
1125
1126 if (!(dev->flags&IFF_UP))
1127 return 0;
1128
1129 prev_fi = NULL;
1130 hash = fib_devindex_hashfn(dev->ifindex);
1131 head = &fib_info_devhash[hash];
1132 ret = 0;
1133
1134 hlist_for_each_entry(nh, node, head, nh_hash) {
1135 struct fib_info *fi = nh->nh_parent;
1136 int alive;
1137
1138 BUG_ON(!fi->fib_nhs);
1139 if (nh->nh_dev != dev || fi == prev_fi)
1140 continue;
1141
1142 prev_fi = fi;
1143 alive = 0;
1144 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001145 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 alive++;
1147 continue;
1148 }
David S. Miller71fceff2010-01-15 01:16:40 -08001149 if (nexthop_nh->nh_dev == NULL ||
1150 !(nexthop_nh->nh_dev->flags&IFF_UP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001151 continue;
David S. Miller71fceff2010-01-15 01:16:40 -08001152 if (nexthop_nh->nh_dev != dev ||
1153 !__in_dev_get_rtnl(dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 continue;
1155 alive++;
1156 spin_lock_bh(&fib_multipath_lock);
David S. Miller71fceff2010-01-15 01:16:40 -08001157 nexthop_nh->nh_power = 0;
1158 nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 spin_unlock_bh(&fib_multipath_lock);
1160 } endfor_nexthops(fi)
1161
1162 if (alive > 0) {
1163 fi->fib_flags &= ~RTNH_F_DEAD;
1164 ret++;
1165 }
1166 }
1167
1168 return ret;
1169}
1170
1171/*
1172 The algorithm is suboptimal, but it provides really
1173 fair weighted route distribution.
1174 */
1175
1176void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1177{
1178 struct fib_info *fi = res->fi;
1179 int w;
1180
1181 spin_lock_bh(&fib_multipath_lock);
1182 if (fi->fib_power <= 0) {
1183 int power = 0;
1184 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001185 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) {
1186 power += nexthop_nh->nh_weight;
1187 nexthop_nh->nh_power = nexthop_nh->nh_weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001188 }
1189 } endfor_nexthops(fi);
1190 fi->fib_power = power;
1191 if (power <= 0) {
1192 spin_unlock_bh(&fib_multipath_lock);
1193 /* Race condition: route has just become dead. */
1194 res->nh_sel = 0;
1195 return;
1196 }
1197 }
1198
1199
1200 /* w should be random number [0..fi->fib_power-1],
1201 it is pretty bad approximation.
1202 */
1203
1204 w = jiffies % fi->fib_power;
1205
1206 change_nexthops(fi) {
David S. Miller71fceff2010-01-15 01:16:40 -08001207 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) &&
1208 nexthop_nh->nh_power) {
1209 if ((w -= nexthop_nh->nh_power) <= 0) {
1210 nexthop_nh->nh_power--;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 fi->fib_power--;
1212 res->nh_sel = nhsel;
1213 spin_unlock_bh(&fib_multipath_lock);
1214 return;
1215 }
1216 }
1217 } endfor_nexthops(fi);
1218
1219 /* Race condition: route has just become dead. */
1220 res->nh_sel = 0;
1221 spin_unlock_bh(&fib_multipath_lock);
1222}
1223#endif