blob: f3fa42eac461520393c3be14345e4a5f33485b2d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
126static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
127static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129/*
130 * Interface to generic destination cache.
131 */
132
133static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800134static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000135static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
137static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700138static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb, u32 mtu);
140static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700142static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000144static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
145 int how)
146{
147}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
David S. Miller62fa8a82011-01-26 20:51:05 -0800149static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
150{
David S. Miller31248732012-07-10 07:08:18 -0700151 WARN_ON(1);
152 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800153}
154
David S. Millerf894cbf2012-07-02 21:52:24 -0700155static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
156 struct sk_buff *skb,
157 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700158
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159static struct dst_ops ipv4_dst_ops = {
160 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800161 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800163 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000164 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800165 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700166 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .ifdown = ipv4_dst_ifdown,
168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700171 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
176#define ECN_OR_COST(class) TC_PRIO_##class
177
Philippe De Muyter4839c522007-07-09 15:32:57 -0700178const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000180 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
182 ECN_OR_COST(BESTEFFORT),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK)
195};
Amir Vadaid4a96862012-04-04 21:33:28 +0000196EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Eric Dumazet2f970d82006-01-17 02:54:36 -0800198static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000199#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
203{
Eric Dumazet29e75252008-01-31 17:05:09 -0800204 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700205 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207}
208
209static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700212 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215static void rt_cache_seq_stop(struct seq_file *seq, void *v)
216{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static int rt_cache_seq_show(struct seq_file *seq, void *v)
220{
221 if (v == SEQ_START_TOKEN)
222 seq_printf(seq, "%-127s\n",
223 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
224 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
225 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900226 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227}
228
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700229static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 .start = rt_cache_seq_start,
231 .next = rt_cache_seq_next,
232 .stop = rt_cache_seq_stop,
233 .show = rt_cache_seq_show,
234};
235
236static int rt_cache_seq_open(struct inode *inode, struct file *file)
237{
David S. Miller89aef892012-07-17 11:00:09 -0700238 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239}
240
Arjan van de Ven9a321442007-02-12 00:55:35 -0800241static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .owner = THIS_MODULE,
243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700246 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
Rusty Russell0f23174a2008-12-29 12:23:42 +0000257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800261 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return 0;
292 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000296 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 st->in_hit,
298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
305 st->out_hit,
306 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900307 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 st->gc_total,
310 st->gc_ignored,
311 st->gc_goal_miss,
312 st->gc_dst_overflow,
313 st->in_hlist_search,
314 st->out_hlist_search
315 );
316 return 0;
317}
318
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700319static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
Arjan van de Ven9a321442007-02-12 00:55:35 -0800332static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .owner = THIS_MODULE,
334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
Patrick McHardyc7066f72011-01-14 13:36:42 +0100340#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800357 }
358 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800363}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800364
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
368}
369
370static const struct file_operations rt_acct_proc_fops = {
371 .owner = THIS_MODULE,
372 .open = rt_acct_proc_open,
373 .read = seq_read,
374 .llseek = seq_lseek,
375 .release = single_release,
376};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800377#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378
Denis V. Lunev73b38712008-02-28 20:51:18 -0800379static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800380{
381 struct proc_dir_entry *pde;
382
Gao fengd4beaa62013-02-18 01:34:54 +0000383 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
384 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800385 if (!pde)
386 goto err1;
387
Wang Chen77020722008-02-28 14:14:25 -0800388 pde = proc_create("rt_cache", S_IRUGO,
389 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800390 if (!pde)
391 goto err2;
392
Patrick McHardyc7066f72011-01-14 13:36:42 +0100393#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800394 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800395 if (!pde)
396 goto err3;
397#endif
398 return 0;
399
Patrick McHardyc7066f72011-01-14 13:36:42 +0100400#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800401err3:
402 remove_proc_entry("rt_cache", net->proc_net_stat);
403#endif
404err2:
405 remove_proc_entry("rt_cache", net->proc_net);
406err1:
407 return -ENOMEM;
408}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800409
410static void __net_exit ip_rt_do_proc_exit(struct net *net)
411{
412 remove_proc_entry("rt_cache", net->proc_net_stat);
413 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100414#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800415 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000416#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800417}
418
419static struct pernet_operations ip_rt_proc_ops __net_initdata = {
420 .init = ip_rt_do_proc_init,
421 .exit = ip_rt_do_proc_exit,
422};
423
424static int __init ip_rt_proc_init(void)
425{
426 return register_pernet_subsys(&ip_rt_proc_ops);
427}
428
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800429#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800430static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800431{
432 return 0;
433}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900435
Eric Dumazet4331deb2012-07-25 05:11:23 +0000436static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700437{
Changli Gaod8d1f302010-06-10 23:31:35 -0700438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700439}
440
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000441void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800442{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000443 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000444}
445
David S. Millerf894cbf2012-07-02 21:52:24 -0700446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
447 struct sk_buff *skb,
448 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000449{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700450 struct net_device *dev = dst->dev;
451 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500452 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000453 struct neighbour *n;
454
David S. Miller39232972012-01-26 15:22:32 -0500455 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700456 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500457 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700458 else if (skb)
459 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700460
David S. Miller80703d22012-02-15 17:48:35 -0500461 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462 if (n)
463 return n;
David Miller32092ec2011-07-25 00:01:41 +0000464 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700465}
466
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467/*
468 * Peer allocation may fail only in serious out-of-memory conditions. However
469 * we still can generate some output.
470 * Random ID selection looks a bit dangerous because we have no chances to
471 * select ID being unique in a reasonable period of time.
472 * But broken packet identifier may be better than no packet at all.
473 */
474static void ip_select_fb_ident(struct iphdr *iph)
475{
476 static DEFINE_SPINLOCK(ip_fb_id_lock);
477 static u32 ip_fallback_id;
478 u32 salt;
479
480 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700481 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 iph->id = htons(salt & 0xFFFF);
483 ip_fallback_id = salt;
484 spin_unlock_bh(&ip_fb_id_lock);
485}
486
487void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
488{
David S. Miller1d861aa2012-07-10 03:58:16 -0700489 struct net *net = dev_net(dst->dev);
490 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
David S. Miller1d861aa2012-07-10 03:58:16 -0700492 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
493 if (peer) {
494 iph->id = htons(inet_getid(peer, more));
495 inet_putpeer(peer);
496 return;
497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 ip_select_fb_ident(iph);
500}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000501EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200503static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700504 const struct iphdr *iph,
505 int oif, u8 tos,
506 u8 prot, u32 mark, int flow_flags)
507{
508 if (sk) {
509 const struct inet_sock *inet = inet_sk(sk);
510
511 oif = sk->sk_bound_dev_if;
512 mark = sk->sk_mark;
513 tos = RT_CONN_FLAGS(sk);
514 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
515 }
516 flowi4_init_output(fl4, oif, mark, tos,
517 RT_SCOPE_UNIVERSE, prot,
518 flow_flags,
519 iph->daddr, iph->saddr, 0, 0);
520}
521
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200522static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
523 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700524{
525 const struct iphdr *iph = ip_hdr(skb);
526 int oif = skb->dev->ifindex;
527 u8 tos = RT_TOS(iph->tos);
528 u8 prot = iph->protocol;
529 u32 mark = skb->mark;
530
531 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
532}
533
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200534static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700535{
536 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200537 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700538 __be32 daddr = inet->inet_daddr;
539
540 rcu_read_lock();
541 inet_opt = rcu_dereference(inet->inet_opt);
542 if (inet_opt && inet_opt->opt.srr)
543 daddr = inet_opt->opt.faddr;
544 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
545 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
547 inet_sk_flowi_flags(sk),
548 daddr, inet->inet_saddr, 0, 0);
549 rcu_read_unlock();
550}
551
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200552static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
553 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700554{
555 if (skb)
556 build_skb_flow_key(fl4, skb, sk);
557 else
558 build_sk_flow_key(fl4, sk);
559}
560
David S. Millerc5038a82012-07-31 15:02:02 -0700561static inline void rt_free(struct rtable *rt)
562{
563 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
564}
565
566static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700567
Julian Anastasovaee06da2012-07-18 10:15:35 +0000568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700569{
570 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700571 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700572
573 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
575 fnhe = rcu_dereference(fnhe->fnhe_next)) {
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe;
578 }
David S. Millerc5038a82012-07-31 15:02:02 -0700579 orig = rcu_dereference(oldest->fnhe_rth);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
David S. Miller4895c772012-07-17 04:19:00 -0700584 return oldest;
585}
586
David S. Millerd3a25c92012-07-17 13:23:08 -0700587static inline u32 fnhe_hashfun(__be32 daddr)
588{
589 u32 hval;
590
591 hval = (__force u32) daddr;
592 hval ^= (hval >> 11) ^ (hval >> 22);
593
594 return hval & (FNHE_HASH_SIZE - 1);
595}
596
Timo Teräs387aa652013-05-27 20:46:31 +0000597static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
598{
599 rt->rt_pmtu = fnhe->fnhe_pmtu;
600 rt->dst.expires = fnhe->fnhe_expires;
601
602 if (fnhe->fnhe_gw) {
603 rt->rt_flags |= RTCF_REDIRECTED;
604 rt->rt_gateway = fnhe->fnhe_gw;
605 rt->rt_uses_gateway = 1;
606 }
607}
608
Julian Anastasovaee06da2012-07-18 10:15:35 +0000609static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
610 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700611{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000612 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700613 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000614 struct rtable *rt;
615 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700616 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000617 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700618
David S. Millerc5038a82012-07-31 15:02:02 -0700619 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000620
621 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700622 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000623 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700624 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000625 goto out_unlock;
626 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700627 }
628
David S. Miller4895c772012-07-17 04:19:00 -0700629 hash += hval;
630
631 depth = 0;
632 for (fnhe = rcu_dereference(hash->chain); fnhe;
633 fnhe = rcu_dereference(fnhe->fnhe_next)) {
634 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000635 break;
David S. Miller4895c772012-07-17 04:19:00 -0700636 depth++;
637 }
638
Julian Anastasovaee06da2012-07-18 10:15:35 +0000639 if (fnhe) {
640 if (gw)
641 fnhe->fnhe_gw = gw;
642 if (pmtu) {
643 fnhe->fnhe_pmtu = pmtu;
Timo Teräs387aa652013-05-27 20:46:31 +0000644 fnhe->fnhe_expires = max(1UL, expires);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000645 }
Timo Teräs387aa652013-05-27 20:46:31 +0000646 /* Update all cached dsts too */
647 rt = rcu_dereference(fnhe->fnhe_rth);
648 if (rt)
649 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000650 } else {
651 if (depth > FNHE_RECLAIM_DEPTH)
652 fnhe = fnhe_oldest(hash);
653 else {
654 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
655 if (!fnhe)
656 goto out_unlock;
657
658 fnhe->fnhe_next = hash->chain;
659 rcu_assign_pointer(hash->chain, fnhe);
660 }
Timo Teräs5aad1de2013-05-27 20:46:33 +0000661 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
Julian Anastasovaee06da2012-07-18 10:15:35 +0000662 fnhe->fnhe_daddr = daddr;
663 fnhe->fnhe_gw = gw;
664 fnhe->fnhe_pmtu = pmtu;
665 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000666
667 /* Exception created; mark the cached routes for the nexthop
668 * stale, so anyone caching it rechecks if this exception
669 * applies to them.
670 */
671 for_each_possible_cpu(i) {
672 struct rtable __rcu **prt;
673 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
674 rt = rcu_dereference(*prt);
675 if (rt)
676 rt->dst.obsolete = DST_OBSOLETE_KILL;
677 }
David S. Miller4895c772012-07-17 04:19:00 -0700678 }
David S. Miller4895c772012-07-17 04:19:00 -0700679
David S. Miller4895c772012-07-17 04:19:00 -0700680 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000681
682out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700683 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000684 return;
David S. Miller4895c772012-07-17 04:19:00 -0700685}
686
David S. Millerceb33202012-07-17 11:31:28 -0700687static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
688 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689{
David S. Millere47a1852012-07-11 20:55:47 -0700690 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700691 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700692 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700693 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700694 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700695 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800696 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
David S. Miller94206122012-07-11 20:38:08 -0700698 switch (icmp_hdr(skb)->code & 7) {
699 case ICMP_REDIR_NET:
700 case ICMP_REDIR_NETTOS:
701 case ICMP_REDIR_HOST:
702 case ICMP_REDIR_HOSTTOS:
703 break;
704
705 default:
706 return;
707 }
708
David S. Millere47a1852012-07-11 20:55:47 -0700709 if (rt->rt_gateway != old_gw)
710 return;
711
712 in_dev = __in_dev_get_rcu(dev);
713 if (!in_dev)
714 return;
715
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900716 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800717 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
718 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
719 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 goto reject_redirect;
721
722 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
723 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
724 goto reject_redirect;
725 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
726 goto reject_redirect;
727 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800728 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 goto reject_redirect;
730 }
731
David S. Miller4895c772012-07-17 04:19:00 -0700732 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700733 if (n) {
734 if (!(n->nud_state & NUD_VALID)) {
735 neigh_event_send(n, NULL);
736 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700737 if (fib_lookup(net, fl4, &res) == 0) {
738 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700739
Julian Anastasovaee06da2012-07-18 10:15:35 +0000740 update_or_create_fnhe(nh, fl4->daddr, new_gw,
741 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700742 }
David S. Millerceb33202012-07-17 11:31:28 -0700743 if (kill_route)
744 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700745 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
746 }
747 neigh_release(n);
748 }
749 return;
750
751reject_redirect:
752#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700753 if (IN_DEV_LOG_MARTIANS(in_dev)) {
754 const struct iphdr *iph = (const struct iphdr *) skb->data;
755 __be32 daddr = iph->daddr;
756 __be32 saddr = iph->saddr;
757
David S. Millere47a1852012-07-11 20:55:47 -0700758 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
759 " Advised path = %pI4 -> %pI4\n",
760 &old_gw, dev->name, &new_gw,
761 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700762 }
David S. Millere47a1852012-07-11 20:55:47 -0700763#endif
764 ;
765}
766
David S. Miller4895c772012-07-17 04:19:00 -0700767static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
768{
769 struct rtable *rt;
770 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200771 const struct iphdr *iph = (const struct iphdr *) skb->data;
772 int oif = skb->dev->ifindex;
773 u8 tos = RT_TOS(iph->tos);
774 u8 prot = iph->protocol;
775 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700776
777 rt = (struct rtable *) dst;
778
Michal Kubecekf96ef982013-05-28 08:26:49 +0200779 __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700780 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700781}
782
Linus Torvalds1da177e2005-04-16 15:20:36 -0700783static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
784{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800785 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 struct dst_entry *ret = dst;
787
788 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000789 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 ip_rt_put(rt);
791 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700792 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
793 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700794 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 ret = NULL;
796 }
797 }
798 return ret;
799}
800
801/*
802 * Algorithm:
803 * 1. The first ip_rt_redirect_number redirects are sent
804 * with exponential backoff, then we stop sending them at all,
805 * assuming that the host ignores our redirects.
806 * 2. If we did not see packets requiring redirects
807 * during ip_rt_redirect_silence, we assume that the host
808 * forgot redirected route and start to send redirects again.
809 *
810 * This algorithm is much cheaper and more intelligent than dumb load limiting
811 * in icmp.c.
812 *
813 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
814 * and "frag. need" (breaks PMTU discovery) in icmp.c.
815 */
816
817void ip_rt_send_redirect(struct sk_buff *skb)
818{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000819 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700820 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800821 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700822 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700823 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824
Eric Dumazet30038fc2009-08-28 23:52:01 -0700825 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700826 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700827 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
828 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700829 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700830 }
831 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
832 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700833
David S. Miller1d861aa2012-07-10 03:58:16 -0700834 net = dev_net(rt->dst.dev);
835 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800836 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000837 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
838 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800839 return;
840 }
841
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842 /* No redirected packets during ip_rt_redirect_silence;
843 * reset the algorithm.
844 */
David S. Miller92d86822011-02-04 15:55:25 -0800845 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
846 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847
848 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700849 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 */
David S. Miller92d86822011-02-04 15:55:25 -0800851 if (peer->rate_tokens >= ip_rt_redirect_number) {
852 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700853 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 }
855
856 /* Check for load limit; set rate_last to the latest sent
857 * redirect.
858 */
David S. Miller92d86822011-02-04 15:55:25 -0800859 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800860 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800861 (peer->rate_last +
862 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000863 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
864
865 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800866 peer->rate_last = jiffies;
867 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700869 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000870 peer->rate_tokens == ip_rt_redirect_number)
871 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700872 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000873 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874#endif
875 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700876out_put_peer:
877 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878}
879
880static int ip_error(struct sk_buff *skb)
881{
David S. Miller251da412012-06-26 16:27:09 -0700882 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000883 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800884 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700885 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700886 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800887 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700888 int code;
889
David S. Miller251da412012-06-26 16:27:09 -0700890 net = dev_net(rt->dst.dev);
891 if (!IN_DEV_FORWARD(in_dev)) {
892 switch (rt->dst.error) {
893 case EHOSTUNREACH:
894 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
895 break;
896
897 case ENETUNREACH:
898 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
899 break;
900 }
901 goto out;
902 }
903
Changli Gaod8d1f302010-06-10 23:31:35 -0700904 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000905 case EINVAL:
906 default:
907 goto out;
908 case EHOSTUNREACH:
909 code = ICMP_HOST_UNREACH;
910 break;
911 case ENETUNREACH:
912 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700913 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000914 break;
915 case EACCES:
916 code = ICMP_PKT_FILTERED;
917 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 }
919
David S. Miller1d861aa2012-07-10 03:58:16 -0700920 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800921
922 send = true;
923 if (peer) {
924 now = jiffies;
925 peer->rate_tokens += now - peer->rate_last;
926 if (peer->rate_tokens > ip_rt_error_burst)
927 peer->rate_tokens = ip_rt_error_burst;
928 peer->rate_last = now;
929 if (peer->rate_tokens >= ip_rt_error_cost)
930 peer->rate_tokens -= ip_rt_error_cost;
931 else
932 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700933 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 }
David S. Miller92d86822011-02-04 15:55:25 -0800935 if (send)
936 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937
938out: kfree_skb(skb);
939 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900940}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700941
Steffen Klassertd851c122012-10-07 22:47:25 +0000942static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943{
Steffen Klassertd851c122012-10-07 22:47:25 +0000944 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700945 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800946
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000947 if (dst_metric_locked(dst, RTAX_MTU))
948 return;
949
Steffen Klassert7f92d332012-10-07 22:48:18 +0000950 if (dst->dev->mtu < mtu)
951 return;
952
David S. Miller59436342012-07-10 06:58:42 -0700953 if (mtu < ip_rt_min_pmtu)
954 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000955
Timo Teräsf0162292013-05-27 20:46:32 +0000956 if (rt->rt_pmtu == mtu &&
957 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
958 return;
959
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000960 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000961 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700962 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700963
Julian Anastasovaee06da2012-07-18 10:15:35 +0000964 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
965 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700966 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000967 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700968}
969
David S. Miller4895c772012-07-17 04:19:00 -0700970static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
971 struct sk_buff *skb, u32 mtu)
972{
973 struct rtable *rt = (struct rtable *) dst;
974 struct flowi4 fl4;
975
976 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000977 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700978}
979
David S. Miller36393392012-06-14 22:21:46 -0700980void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
981 int oif, u32 mark, u8 protocol, int flow_flags)
982{
David S. Miller4895c772012-07-17 04:19:00 -0700983 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700984 struct flowi4 fl4;
985 struct rtable *rt;
986
David S. Miller4895c772012-07-17 04:19:00 -0700987 __build_flow_key(&fl4, NULL, iph, oif,
988 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700989 rt = __ip_route_output_key(net, &fl4);
990 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700991 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700992 ip_rt_put(rt);
993 }
994}
995EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
996
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000997static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -0700998{
David S. Miller4895c772012-07-17 04:19:00 -0700999 const struct iphdr *iph = (const struct iphdr *) skb->data;
1000 struct flowi4 fl4;
1001 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001002
David S. Miller4895c772012-07-17 04:19:00 -07001003 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1004 rt = __ip_route_output_key(sock_net(sk), &fl4);
1005 if (!IS_ERR(rt)) {
1006 __ip_rt_update_pmtu(rt, &fl4, mtu);
1007 ip_rt_put(rt);
1008 }
David S. Miller36393392012-06-14 22:21:46 -07001009}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001010
1011void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1012{
1013 const struct iphdr *iph = (const struct iphdr *) skb->data;
1014 struct flowi4 fl4;
1015 struct rtable *rt;
1016 struct dst_entry *dst;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001017 bool new = false;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001018
1019 bh_lock_sock(sk);
1020 rt = (struct rtable *) __sk_dst_get(sk);
1021
1022 if (sock_owned_by_user(sk) || !rt) {
1023 __ipv4_sk_update_pmtu(skb, sk, mtu);
1024 goto out;
1025 }
1026
1027 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1028
1029 if (!__sk_dst_check(sk, 0)) {
1030 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1031 if (IS_ERR(rt))
1032 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001033
1034 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001035 }
1036
1037 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1038
1039 dst = dst_check(&rt->dst, 0);
1040 if (!dst) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001041 if (new)
1042 dst_release(&rt->dst);
1043
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001044 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1045 if (IS_ERR(rt))
1046 goto out;
1047
Steffen Klassertb44108d2013-01-22 00:01:28 +00001048 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001049 }
1050
Steffen Klassertb44108d2013-01-22 00:01:28 +00001051 if (new)
1052 __sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001053
1054out:
1055 bh_unlock_sock(sk);
1056}
David S. Miller36393392012-06-14 22:21:46 -07001057EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001058
David S. Millerb42597e2012-07-11 21:25:45 -07001059void ipv4_redirect(struct sk_buff *skb, struct net *net,
1060 int oif, u32 mark, u8 protocol, int flow_flags)
1061{
David S. Miller4895c772012-07-17 04:19:00 -07001062 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001063 struct flowi4 fl4;
1064 struct rtable *rt;
1065
David S. Miller4895c772012-07-17 04:19:00 -07001066 __build_flow_key(&fl4, NULL, iph, oif,
1067 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001068 rt = __ip_route_output_key(net, &fl4);
1069 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001070 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001071 ip_rt_put(rt);
1072 }
1073}
1074EXPORT_SYMBOL_GPL(ipv4_redirect);
1075
1076void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1077{
David S. Miller4895c772012-07-17 04:19:00 -07001078 const struct iphdr *iph = (const struct iphdr *) skb->data;
1079 struct flowi4 fl4;
1080 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001081
David S. Miller4895c772012-07-17 04:19:00 -07001082 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1083 rt = __ip_route_output_key(sock_net(sk), &fl4);
1084 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001085 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001086 ip_rt_put(rt);
1087 }
David S. Millerb42597e2012-07-11 21:25:45 -07001088}
1089EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1090
David S. Millerefbc3682011-12-01 13:38:59 -05001091static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1092{
1093 struct rtable *rt = (struct rtable *) dst;
1094
David S. Millerceb33202012-07-17 11:31:28 -07001095 /* All IPV4 dsts are created with ->obsolete set to the value
1096 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1097 * into this function always.
1098 *
Timo Teräs387aa652013-05-27 20:46:31 +00001099 * When a PMTU/redirect information update invalidates a route,
1100 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1101 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001102 */
Timo Teräs387aa652013-05-27 20:46:31 +00001103 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001104 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001105 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106}
1107
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108static void ipv4_link_failure(struct sk_buff *skb)
1109{
1110 struct rtable *rt;
1111
1112 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1113
Eric Dumazet511c3f92009-06-02 05:14:27 +00001114 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001115 if (rt)
1116 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117}
1118
1119static int ip_rt_bug(struct sk_buff *skb)
1120{
Joe Perches91df42b2012-05-15 14:11:54 +00001121 pr_debug("%s: %pI4 -> %pI4, %s\n",
1122 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1123 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001125 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 return 0;
1127}
1128
1129/*
1130 We do not cache source address of outgoing interface,
1131 because it is used only by IP RR, TS and SRR options,
1132 so that it out of fast path.
1133
1134 BTW remember: "addr" is allowed to be not aligned
1135 in IP options!
1136 */
1137
David S. Miller8e363602011-05-13 17:29:41 -04001138void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139{
Al Viroa61ced52006-09-26 21:27:54 -07001140 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
David S. Millerc7537962010-11-11 17:07:48 -08001142 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001143 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001144 else {
David S. Miller8e363602011-05-13 17:29:41 -04001145 struct fib_result res;
1146 struct flowi4 fl4;
1147 struct iphdr *iph;
1148
1149 iph = ip_hdr(skb);
1150
1151 memset(&fl4, 0, sizeof(fl4));
1152 fl4.daddr = iph->daddr;
1153 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001154 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001155 fl4.flowi4_oif = rt->dst.dev->ifindex;
1156 fl4.flowi4_iif = skb->dev->ifindex;
1157 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001158
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001159 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001160 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001161 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001162 else
David S. Millerf8126f12012-07-13 05:03:45 -07001163 src = inet_select_addr(rt->dst.dev,
1164 rt_nexthop(rt, iph->daddr),
1165 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001166 rcu_read_unlock();
1167 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168 memcpy(addr, &src, 4);
1169}
1170
Patrick McHardyc7066f72011-01-14 13:36:42 +01001171#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001172static void set_class_tag(struct rtable *rt, u32 tag)
1173{
Changli Gaod8d1f302010-06-10 23:31:35 -07001174 if (!(rt->dst.tclassid & 0xFFFF))
1175 rt->dst.tclassid |= tag & 0xFFFF;
1176 if (!(rt->dst.tclassid & 0xFFFF0000))
1177 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178}
1179#endif
1180
David S. Miller0dbaee32010-12-13 12:52:14 -08001181static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1182{
1183 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1184
1185 if (advmss == 0) {
1186 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1187 ip_rt_min_advmss);
1188 if (advmss > 65535 - 40)
1189 advmss = 65535 - 40;
1190 }
1191 return advmss;
1192}
1193
Steffen Klassertebb762f2011-11-23 02:12:51 +00001194static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001195{
Steffen Klassert261663b2011-11-23 02:14:50 +00001196 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001197 unsigned int mtu = rt->rt_pmtu;
1198
Alexander Duyck98d75c32012-08-27 06:30:01 +00001199 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001200 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001201
Steffen Klassert38d523e2013-01-16 20:55:01 +00001202 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001203 return mtu;
1204
1205 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001206
1207 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001208 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001209 mtu = 576;
1210 }
1211
1212 if (mtu > IP_MAX_MTU)
1213 mtu = IP_MAX_MTU;
1214
1215 return mtu;
1216}
1217
David S. Millerf2bb4be2012-07-17 12:20:47 -07001218static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001219{
1220 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1221 struct fib_nh_exception *fnhe;
1222 u32 hval;
1223
David S. Millerf2bb4be2012-07-17 12:20:47 -07001224 if (!hash)
1225 return NULL;
1226
David S. Millerd3a25c92012-07-17 13:23:08 -07001227 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001228
1229 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1230 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001231 if (fnhe->fnhe_daddr == daddr)
1232 return fnhe;
1233 }
1234 return NULL;
1235}
David S. Miller4895c772012-07-17 04:19:00 -07001236
David S. Millercaacf052012-07-31 15:06:50 -07001237static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001238 __be32 daddr)
1239{
David S. Millercaacf052012-07-31 15:06:50 -07001240 bool ret = false;
1241
David S. Millerc5038a82012-07-31 15:02:02 -07001242 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001243
David S. Millerc5038a82012-07-31 15:02:02 -07001244 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00001245 int genid = fnhe_genid(dev_net(rt->dst.dev));
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001246 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001247
1248 if (fnhe->fnhe_genid != genid) {
1249 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001250 fnhe->fnhe_gw = 0;
1251 fnhe->fnhe_pmtu = 0;
1252 fnhe->fnhe_expires = 0;
1253 }
Timo Teräs387aa652013-05-27 20:46:31 +00001254 fill_route_from_fnhe(rt, fnhe);
1255 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001256 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001257
David S. Millerc5038a82012-07-31 15:02:02 -07001258 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1259 if (orig)
1260 rt_free(orig);
1261
1262 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001263 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001264 }
1265 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001266
1267 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001268}
1269
David S. Millercaacf052012-07-31 15:06:50 -07001270static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001271{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001272 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001273 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001274
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001275 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001276 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001277 } else {
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001278 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1279 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001280 orig = *p;
1281
1282 prev = cmpxchg(p, orig, rt);
1283 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001284 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001285 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001286 } else
David S. Millercaacf052012-07-31 15:06:50 -07001287 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001288
1289 return ret;
1290}
1291
1292static DEFINE_SPINLOCK(rt_uncached_lock);
1293static LIST_HEAD(rt_uncached_list);
1294
1295static void rt_add_uncached_list(struct rtable *rt)
1296{
1297 spin_lock_bh(&rt_uncached_lock);
1298 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1299 spin_unlock_bh(&rt_uncached_lock);
1300}
1301
1302static void ipv4_dst_destroy(struct dst_entry *dst)
1303{
1304 struct rtable *rt = (struct rtable *) dst;
1305
Eric Dumazet78df76a2012-08-24 05:40:47 +00001306 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001307 spin_lock_bh(&rt_uncached_lock);
1308 list_del(&rt->rt_uncached);
1309 spin_unlock_bh(&rt_uncached_lock);
1310 }
1311}
1312
1313void rt_flush_dev(struct net_device *dev)
1314{
1315 if (!list_empty(&rt_uncached_list)) {
1316 struct net *net = dev_net(dev);
1317 struct rtable *rt;
1318
1319 spin_lock_bh(&rt_uncached_lock);
1320 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1321 if (rt->dst.dev != dev)
1322 continue;
1323 rt->dst.dev = net->loopback_dev;
1324 dev_hold(rt->dst.dev);
1325 dev_put(dev);
1326 }
1327 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001328 }
1329}
1330
Eric Dumazet4331deb2012-07-25 05:11:23 +00001331static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001332{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001333 return rt &&
1334 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1335 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001336}
1337
David S. Millerf2bb4be2012-07-17 12:20:47 -07001338static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001339 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001340 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001341 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342{
David S. Millercaacf052012-07-31 15:06:50 -07001343 bool cached = false;
1344
Linus Torvalds1da177e2005-04-16 15:20:36 -07001345 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001346 struct fib_nh *nh = &FIB_RES_NH(*res);
1347
Julian Anastasov155e8332012-10-08 11:41:18 +00001348 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001349 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001350 rt->rt_uses_gateway = 1;
1351 }
David S. Miller28605832012-07-17 14:55:59 -07001352 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001353#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001354 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001355#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001356 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001357 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001358 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001359 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001360 if (unlikely(!cached)) {
1361 /* Routes we intend to cache in nexthop exception or
1362 * FIB nexthop have the DST_NOCACHE bit clear.
1363 * However, if we are unsuccessful at storing this
1364 * route into the cache we really need to set it.
1365 */
1366 rt->dst.flags |= DST_NOCACHE;
1367 if (!rt->rt_gateway)
1368 rt->rt_gateway = daddr;
1369 rt_add_uncached_list(rt);
1370 }
1371 } else
David S. Millercaacf052012-07-31 15:06:50 -07001372 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373
Patrick McHardyc7066f72011-01-14 13:36:42 +01001374#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001376 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377#endif
1378 set_class_tag(rt, itag);
1379#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001380}
1381
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001382static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001383 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001384{
David S. Millerf5b0a872012-07-19 12:31:33 -07001385 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001386 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001387 (nopolicy ? DST_NOPOLICY : 0) |
1388 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001389}
1390
Eric Dumazet96d36222010-06-02 19:21:31 +00001391/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001392static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001393 u8 tos, struct net_device *dev, int our)
1394{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001395 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001396 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001398 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399
1400 /* Primary sanity checks. */
1401
1402 if (in_dev == NULL)
1403 return -EINVAL;
1404
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001405 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001406 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 goto e_inval;
1408
Thomas Grafd0daebc32012-06-12 00:44:01 +00001409 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1410 if (ipv4_is_loopback(saddr))
1411 goto e_inval;
1412
Joe Perchesf97c1e02007-12-16 13:45:43 -08001413 if (ipv4_is_zeronet(saddr)) {
1414 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001415 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001416 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001417 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1418 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001419 if (err < 0)
1420 goto e_err;
1421 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001422 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001423 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 if (!rth)
1425 goto e_nobufs;
1426
Patrick McHardyc7066f72011-01-14 13:36:42 +01001427#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001428 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429#endif
David S. Millercf911662011-04-28 14:31:47 -07001430 rth->dst.output = ip_rt_bug;
1431
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001432 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001434 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001435 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001436 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001437 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001438 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001439 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001440 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001442 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 rth->rt_flags |= RTCF_LOCAL;
1444 }
1445
1446#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001447 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001448 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449#endif
1450 RT_CACHE_STAT_INC(in_slow_mc);
1451
David S. Miller89aef892012-07-17 11:00:09 -07001452 skb_dst_set(skb, &rth->dst);
1453 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454
1455e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001458 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001459e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001460 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461}
1462
1463
1464static void ip_handle_martian_source(struct net_device *dev,
1465 struct in_device *in_dev,
1466 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001467 __be32 daddr,
1468 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469{
1470 RT_CACHE_STAT_INC(in_martian_src);
1471#ifdef CONFIG_IP_ROUTE_VERBOSE
1472 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1473 /*
1474 * RFC1812 recommendation, if source is martian,
1475 * the only hint is MAC header.
1476 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001477 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001478 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001479 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001480 print_hex_dump(KERN_WARNING, "ll header: ",
1481 DUMP_PREFIX_OFFSET, 16, 1,
1482 skb_mac_header(skb),
1483 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 }
1485 }
1486#endif
1487}
1488
Eric Dumazet47360222010-06-03 04:13:21 +00001489/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001490static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001491 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001492 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001493 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001494{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001495 struct rtable *rth;
1496 int err;
1497 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001498 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001499 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001500 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501
1502 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001503 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001505 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 return -EINVAL;
1507 }
1508
Michael Smith5c04c812011-04-07 04:51:50 +00001509 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001510 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001511 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001512 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001514
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 goto cleanup;
1516 }
1517
Julian Anastasove81da0e2012-10-08 11:41:15 +00001518 do_cache = res->fi && !itag;
1519 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520 (IN_DEV_SHARED_MEDIA(out_dev) ||
Julian Anastasove81da0e2012-10-08 11:41:15 +00001521 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001522 flags |= RTCF_DOREDIRECT;
Julian Anastasove81da0e2012-10-08 11:41:15 +00001523 do_cache = false;
1524 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525
1526 if (skb->protocol != htons(ETH_P_IP)) {
1527 /* Not IP (i.e. ARP). Do not create route, if it is
1528 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001529 *
1530 * Proxy arp feature have been extended to allow, ARP
1531 * replies back to the same interface, to support
1532 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001534 if (out_dev == in_dev &&
1535 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 err = -EINVAL;
1537 goto cleanup;
1538 }
1539 }
1540
Julian Anastasove81da0e2012-10-08 11:41:15 +00001541 if (do_cache) {
1542 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1543 if (rt_cache_valid(rth)) {
1544 skb_dst_set_noref(skb, &rth->dst);
1545 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001546 }
1547 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001548
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001549 rth = rt_dst_alloc(out_dev->dev,
1550 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001551 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 if (!rth) {
1553 err = -ENOBUFS;
1554 goto cleanup;
1555 }
1556
David S. Millercf911662011-04-28 14:31:47 -07001557 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1558 rth->rt_flags = flags;
1559 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001560 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001561 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001562 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001563 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001564 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001565 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566
Changli Gaod8d1f302010-06-10 23:31:35 -07001567 rth->dst.input = ip_forward;
1568 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569
David S. Millerd2d68ba2012-07-17 12:58:50 -07001570 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001571 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001572out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 err = 0;
1574 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001576}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001577
Stephen Hemminger5969f712008-04-10 01:52:09 -07001578static int ip_mkroute_input(struct sk_buff *skb,
1579 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001580 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001581 struct in_device *in_dev,
1582 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001585 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001586 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587#endif
1588
1589 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001590 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591}
1592
Linus Torvalds1da177e2005-04-16 15:20:36 -07001593/*
1594 * NOTE. We drop all the packets that has local source
1595 * addresses, because every properly looped back packet
1596 * must have correct destination already attached by output routine.
1597 *
1598 * Such approach solves two big problems:
1599 * 1. Not simplex devices are handled properly.
1600 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001601 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 */
1603
Al Viro9e12bb22006-09-26 21:25:20 -07001604static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001605 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606{
1607 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001608 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001609 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001610 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001612 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001614 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001615 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616
1617 /* IP on this device is disabled. */
1618
1619 if (!in_dev)
1620 goto out;
1621
1622 /* Check for the most weird martians, which can be not detected
1623 by fib_lookup.
1624 */
1625
Thomas Grafd0daebc32012-06-12 00:44:01 +00001626 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 goto martian_source;
1628
David S. Millerd2d68ba2012-07-17 12:58:50 -07001629 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001630 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 goto brd_input;
1632
1633 /* Accept zero addresses only to limited broadcast;
1634 * I even do not know to fix it or not. Waiting for complains :-)
1635 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001636 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 goto martian_source;
1638
Thomas Grafd0daebc32012-06-12 00:44:01 +00001639 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640 goto martian_destination;
1641
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001642 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1643 * and call it once if daddr or/and saddr are loopback addresses
1644 */
1645 if (ipv4_is_loopback(daddr)) {
1646 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001647 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001648 } else if (ipv4_is_loopback(saddr)) {
1649 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001650 goto martian_source;
1651 }
1652
Linus Torvalds1da177e2005-04-16 15:20:36 -07001653 /*
1654 * Now we are ready to route packet.
1655 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001656 fl4.flowi4_oif = 0;
1657 fl4.flowi4_iif = dev->ifindex;
1658 fl4.flowi4_mark = skb->mark;
1659 fl4.flowi4_tos = tos;
1660 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1661 fl4.daddr = daddr;
1662 fl4.saddr = saddr;
1663 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001664 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666
1667 RT_CACHE_STAT_INC(in_slow_tot);
1668
1669 if (res.type == RTN_BROADCAST)
1670 goto brd_input;
1671
1672 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001673 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001674 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001675 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001676 if (err < 0)
1677 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 goto local_input;
1679 }
1680
1681 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001682 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 if (res.type != RTN_UNICAST)
1684 goto martian_destination;
1685
David S. Miller68a5e3d2011-03-11 20:07:33 -05001686 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001687out: return err;
1688
1689brd_input:
1690 if (skb->protocol != htons(ETH_P_IP))
1691 goto e_inval;
1692
David S. Miller41347dc2012-06-28 04:05:27 -07001693 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001694 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1695 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001697 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 }
1699 flags |= RTCF_BROADCAST;
1700 res.type = RTN_BROADCAST;
1701 RT_CACHE_STAT_INC(in_brd);
1702
1703local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001704 do_cache = false;
1705 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001706 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001707 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001708 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001709 skb_dst_set_noref(skb, &rth->dst);
1710 err = 0;
1711 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001712 }
1713 do_cache = true;
1714 }
1715 }
1716
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001717 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001718 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719 if (!rth)
1720 goto e_nobufs;
1721
David S. Millercf911662011-04-28 14:31:47 -07001722 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001723 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001724#ifdef CONFIG_IP_ROUTE_CLASSID
1725 rth->dst.tclassid = itag;
1726#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001727
David S. Millercf911662011-04-28 14:31:47 -07001728 rth->rt_genid = rt_genid(net);
1729 rth->rt_flags = flags|RTCF_LOCAL;
1730 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001731 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001732 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001733 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001734 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001735 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001736 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001737 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001738 rth->dst.input= ip_error;
1739 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740 rth->rt_flags &= ~RTCF_LOCAL;
1741 }
David S. Millerd2d68ba2012-07-17 12:58:50 -07001742 if (do_cache)
1743 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001744 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001745 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001746 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747
1748no_route:
1749 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001751 if (err == -ESRCH)
1752 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 goto local_input;
1754
1755 /*
1756 * Do not cache martian addresses: they should be logged (RFC1812)
1757 */
1758martian_destination:
1759 RT_CACHE_STAT_INC(in_martian_dst);
1760#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001761 if (IN_DEV_LOG_MARTIANS(in_dev))
1762 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1763 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001765
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766e_inval:
1767 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001768 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
1770e_nobufs:
1771 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001772 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
1774martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001775 err = -EINVAL;
1776martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001778 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779}
1780
David S. Millerc6cffba2012-07-26 11:14:38 +00001781int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1782 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783{
Eric Dumazet96d36222010-06-02 19:21:31 +00001784 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785
Eric Dumazet96d36222010-06-02 19:21:31 +00001786 rcu_read_lock();
1787
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 /* Multicast recognition logic is moved from route cache to here.
1789 The problem was that too many Ethernet cards have broken/missing
1790 hardware multicast filters :-( As result the host on multicasting
1791 network acquires a lot of useless route cache entries, sort of
1792 SDR messages from all the world. Now we try to get rid of them.
1793 Really, provided software IP multicast filter is organized
1794 reasonably (at least, hashed), it does not result in a slowdown
1795 comparing with route cache reject entries.
1796 Note, that multicast routers are not affected, because
1797 route cache entry is created eventually.
1798 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001799 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001800 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
Eric Dumazet96d36222010-06-02 19:21:31 +00001802 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001803 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1804 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 if (our
1806#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001807 ||
1808 (!ipv4_is_local_multicast(daddr) &&
1809 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001811 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001812 int res = ip_route_input_mc(skb, daddr, saddr,
1813 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001815 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 }
1817 }
1818 rcu_read_unlock();
1819 return -EINVAL;
1820 }
David S. Millerc10237e2012-06-27 17:05:06 -07001821 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001822 rcu_read_unlock();
1823 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824}
David S. Millerc6cffba2012-07-26 11:14:38 +00001825EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001827/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001828static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001829 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001830 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001831 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832{
David S. Miller982721f2011-02-16 21:44:24 -08001833 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001834 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001835 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001836 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001837 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001838 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839
Thomas Grafd0daebc32012-06-12 00:44:01 +00001840 in_dev = __in_dev_get_rcu(dev_out);
1841 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001842 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843
Thomas Grafd0daebc32012-06-12 00:44:01 +00001844 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1845 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1846 return ERR_PTR(-EINVAL);
1847
David S. Miller68a5e3d2011-03-11 20:07:33 -05001848 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001849 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001850 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001851 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001852 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001853 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
1855 if (dev_out->flags & IFF_LOOPBACK)
1856 flags |= RTCF_LOCAL;
1857
Julian Anastasov63617422012-11-22 23:04:14 +02001858 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08001859 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001861 fi = NULL;
1862 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001863 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001864 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1865 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02001867 else
1868 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001870 * default one, but do not gateway in this case.
1871 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 */
David S. Miller982721f2011-02-16 21:44:24 -08001873 if (fi && res->prefixlen < 4)
1874 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 }
1876
David S. Millerf2bb4be2012-07-17 12:20:47 -07001877 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02001878 do_cache &= fi != NULL;
1879 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07001880 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001881 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001882
Julian Anastasovc92b9652012-10-08 11:41:19 +00001883 fnhe = find_exception(nh, fl4->daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001884 if (fnhe)
1885 prth = &fnhe->fnhe_rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001886 else {
1887 if (unlikely(fl4->flowi4_flags &
1888 FLOWI_FLAG_KNOWN_NH &&
1889 !(nh->nh_gw &&
1890 nh->nh_scope == RT_SCOPE_LINK))) {
1891 do_cache = false;
1892 goto add;
1893 }
1894 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1895 }
David S. Millerc5038a82012-07-31 15:02:02 -07001896 rth = rcu_dereference(*prth);
1897 if (rt_cache_valid(rth)) {
1898 dst_hold(&rth->dst);
1899 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001900 }
1901 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00001902
1903add:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001904 rth = rt_dst_alloc(dev_out,
1905 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001906 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00001907 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001908 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001909 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001910
David S. Millercf911662011-04-28 14:31:47 -07001911 rth->dst.output = ip_output;
1912
David S. Millercf911662011-04-28 14:31:47 -07001913 rth->rt_genid = rt_genid(dev_net(dev_out));
1914 rth->rt_flags = flags;
1915 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001916 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001917 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001918 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001919 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001920 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001921 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922
1923 RT_CACHE_STAT_INC(out_slow_tot);
1924
David S. Miller41347dc2012-06-28 04:05:27 -07001925 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001926 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001928 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001930 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 RT_CACHE_STAT_INC(out_slow_mc);
1932 }
1933#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001934 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001936 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001937 rth->dst.input = ip_mr_input;
1938 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 }
1940 }
1941#endif
1942 }
1943
David S. Millerf2bb4be2012-07-17 12:20:47 -07001944 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945
David S. Miller5ada5522011-02-17 15:29:00 -08001946 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947}
1948
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949/*
1950 * Major route resolver routine.
1951 */
1952
David S. Miller89aef892012-07-17 11:00:09 -07001953struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001956 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001957 unsigned int flags = 0;
1958 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001959 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001960 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961
David S. Miller85b91b02012-07-13 08:21:29 -07001962 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001964 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965
David S. Miller813b3b52011-04-28 14:48:42 -07001966 orig_oif = fl4->flowi4_oif;
1967
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001968 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07001969 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1970 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1971 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001972
David S. Miller010c2702011-02-17 15:37:09 -08001973 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001974 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001975 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001976 if (ipv4_is_multicast(fl4->saddr) ||
1977 ipv4_is_lbcast(fl4->saddr) ||
1978 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 goto out;
1980
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 /* I removed check for oif == dev_out->oif here.
1982 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001983 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1984 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 2. Moreover, we are allowed to send packets with saddr
1986 of another iface. --ANK
1987 */
1988
David S. Miller813b3b52011-04-28 14:48:42 -07001989 if (fl4->flowi4_oif == 0 &&
1990 (ipv4_is_multicast(fl4->daddr) ||
1991 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001992 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001993 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001994 if (dev_out == NULL)
1995 goto out;
1996
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 /* Special hack: user can direct multicasts
1998 and limited broadcast via necessary interface
1999 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2000 This hack is not just for fun, it allows
2001 vic,vat and friends to work.
2002 They bind socket to loopback, set ttl to zero
2003 and expect that it will work.
2004 From the viewpoint of routing cache they are broken,
2005 because we are not allowed to build multicast path
2006 with loopback source addr (look, routing cache
2007 cannot know, that ttl is zero, so that packet
2008 will not leave this host and route is valid).
2009 Luckily, this hack is good workaround.
2010 */
2011
David S. Miller813b3b52011-04-28 14:48:42 -07002012 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 goto make_route;
2014 }
Julian Anastasova210d012008-10-01 07:28:28 -07002015
David S. Miller813b3b52011-04-28 14:48:42 -07002016 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002017 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002018 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002019 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002020 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021 }
2022
2023
David S. Miller813b3b52011-04-28 14:48:42 -07002024 if (fl4->flowi4_oif) {
2025 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002026 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 if (dev_out == NULL)
2028 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002029
2030 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002031 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002032 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002033 goto out;
2034 }
David S. Miller813b3b52011-04-28 14:48:42 -07002035 if (ipv4_is_local_multicast(fl4->daddr) ||
2036 ipv4_is_lbcast(fl4->daddr)) {
2037 if (!fl4->saddr)
2038 fl4->saddr = inet_select_addr(dev_out, 0,
2039 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 goto make_route;
2041 }
David S. Miller813b3b52011-04-28 14:48:42 -07002042 if (fl4->saddr) {
2043 if (ipv4_is_multicast(fl4->daddr))
2044 fl4->saddr = inet_select_addr(dev_out, 0,
2045 fl4->flowi4_scope);
2046 else if (!fl4->daddr)
2047 fl4->saddr = inet_select_addr(dev_out, 0,
2048 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 }
2050 }
2051
David S. Miller813b3b52011-04-28 14:48:42 -07002052 if (!fl4->daddr) {
2053 fl4->daddr = fl4->saddr;
2054 if (!fl4->daddr)
2055 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002056 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002057 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 res.type = RTN_LOCAL;
2059 flags |= RTCF_LOCAL;
2060 goto make_route;
2061 }
2062
David S. Miller813b3b52011-04-28 14:48:42 -07002063 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002065 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002066 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067 /* Apparently, routing tables are wrong. Assume,
2068 that the destination is on link.
2069
2070 WHY? DW.
2071 Because we are allowed to send to iface
2072 even if it has NO routes and NO assigned
2073 addresses. When oif is specified, routing
2074 tables are looked up with only one purpose:
2075 to catch if destination is gatewayed, rather than
2076 direct. Moreover, if MSG_DONTROUTE is set,
2077 we send packet, ignoring both routing tables
2078 and ifaddr state. --ANK
2079
2080
2081 We could make it even if oif is unknown,
2082 likely IPv6, but we do not.
2083 */
2084
David S. Miller813b3b52011-04-28 14:48:42 -07002085 if (fl4->saddr == 0)
2086 fl4->saddr = inet_select_addr(dev_out, 0,
2087 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 res.type = RTN_UNICAST;
2089 goto make_route;
2090 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002091 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092 goto out;
2093 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
2095 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002096 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002097 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002098 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002099 else
David S. Miller813b3b52011-04-28 14:48:42 -07002100 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002101 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002102 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002103 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 flags |= RTCF_LOCAL;
2105 goto make_route;
2106 }
2107
2108#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002109 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002110 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 else
2112#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002113 if (!res.prefixlen &&
2114 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002115 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002116 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117
David S. Miller813b3b52011-04-28 14:48:42 -07002118 if (!fl4->saddr)
2119 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002122 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
2124
2125make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002126 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127
David S. Miller010c2702011-02-17 15:37:09 -08002128out:
2129 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002130 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002132EXPORT_SYMBOL_GPL(__ip_route_output_key);
2133
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002134static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2135{
2136 return NULL;
2137}
2138
Steffen Klassertebb762f2011-11-23 02:12:51 +00002139static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002140{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002141 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2142
2143 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002144}
2145
David S. Miller6700c272012-07-17 03:29:28 -07002146static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2147 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002148{
2149}
2150
David S. Miller6700c272012-07-17 03:29:28 -07002151static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2152 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002153{
2154}
2155
Held Bernhard0972ddb2011-04-24 22:07:32 +00002156static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2157 unsigned long old)
2158{
2159 return NULL;
2160}
2161
David S. Miller14e50e52007-05-24 18:17:54 -07002162static struct dst_ops ipv4_dst_blackhole_ops = {
2163 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002164 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002165 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002166 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002167 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002168 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002169 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002170 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002171 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002172};
2173
David S. Miller2774c132011-03-01 14:59:04 -08002174struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002175{
David S. Miller2774c132011-03-01 14:59:04 -08002176 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002177 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002178
David S. Millerf5b0a872012-07-19 12:31:33 -07002179 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002180 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002181 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002182
David S. Miller14e50e52007-05-24 18:17:54 -07002183 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002184 new->input = dst_discard;
2185 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002186
Changli Gaod8d1f302010-06-10 23:31:35 -07002187 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002188 if (new->dev)
2189 dev_hold(new->dev);
2190
David S. Miller9917e1e82012-07-17 14:44:26 -07002191 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002192 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002193 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002194
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002195 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002196 rt->rt_flags = ort->rt_flags;
2197 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002198 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002199 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002200
David S. Millercaacf052012-07-31 15:06:50 -07002201 INIT_LIST_HEAD(&rt->rt_uncached);
2202
David S. Miller14e50e52007-05-24 18:17:54 -07002203 dst_free(new);
2204 }
2205
David S. Miller2774c132011-03-01 14:59:04 -08002206 dst_release(dst_orig);
2207
2208 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002209}
2210
David S. Miller9d6ec932011-03-12 01:12:47 -05002211struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002212 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213{
David S. Miller9d6ec932011-03-12 01:12:47 -05002214 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215
David S. Millerb23dd4f2011-03-02 14:31:35 -08002216 if (IS_ERR(rt))
2217 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218
David S. Miller56157872011-05-02 14:37:45 -07002219 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002220 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2221 flowi4_to_flowi(flp4),
2222 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223
David S. Millerb23dd4f2011-03-02 14:31:35 -08002224 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002226EXPORT_SYMBOL_GPL(ip_route_output_flow);
2227
David S. Millerf1ce3062012-07-12 10:10:17 -07002228static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002229 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002230 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002232 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002234 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002235 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002236 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002237 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002238
Eric W. Biederman15e47302012-09-07 20:12:54 +00002239 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002240 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002241 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002242
2243 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 r->rtm_family = AF_INET;
2245 r->rtm_dst_len = 32;
2246 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002247 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002249 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2250 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 r->rtm_type = rt->rt_type;
2252 r->rtm_scope = RT_SCOPE_UNIVERSE;
2253 r->rtm_protocol = RTPROT_UNSPEC;
2254 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2255 if (rt->rt_flags & RTCF_NOTIFY)
2256 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002257
David S. Millerf1ce3062012-07-12 10:10:17 -07002258 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002259 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002260 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002262 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002263 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264 }
David S. Millerf3756b72012-04-01 20:39:02 -04002265 if (rt->dst.dev &&
2266 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2267 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002268#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002269 if (rt->dst.tclassid &&
2270 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2271 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002272#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002273 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002274 fl4->saddr != src) {
2275 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002276 goto nla_put_failure;
2277 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002278 if (rt->rt_uses_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002279 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2280 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002281
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002282 expires = rt->dst.expires;
2283 if (expires) {
2284 unsigned long now = jiffies;
2285
2286 if (time_before(now, expires))
2287 expires -= now;
2288 else
2289 expires = 0;
2290 }
2291
Julian Anastasov521f5492012-07-20 12:02:08 +03002292 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002293 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002294 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2295 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002296 goto nla_put_failure;
2297
David Millerb4869882012-07-01 02:03:01 +00002298 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002299 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002300 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002301
Changli Gaod8d1f302010-06-10 23:31:35 -07002302 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002303
David S. Millerc7537962010-11-11 17:07:48 -08002304 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002305#ifdef CONFIG_IP_MROUTE
2306 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2307 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2308 int err = ipmr_get_route(net, skb,
2309 fl4->saddr, fl4->daddr,
2310 r, nowait);
2311 if (err <= 0) {
2312 if (!nowait) {
2313 if (err == 0)
2314 return 0;
2315 goto nla_put_failure;
2316 } else {
2317 if (err == -EMSGSIZE)
2318 goto nla_put_failure;
2319 error = err;
2320 }
2321 }
2322 } else
2323#endif
2324 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2325 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 }
2327
David S. Millerf1850712012-07-10 07:26:01 -07002328 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002329 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330
Thomas Grafbe403ea2006-08-17 18:15:17 -07002331 return nlmsg_end(skb, nlh);
2332
2333nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002334 nlmsg_cancel(skb, nlh);
2335 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336}
2337
Thomas Graf661d2962013-03-21 07:45:29 +00002338static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002340 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002341 struct rtmsg *rtm;
2342 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002344 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002345 __be32 dst = 0;
2346 __be32 src = 0;
2347 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002348 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002349 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002350 struct sk_buff *skb;
2351
Thomas Grafd889ce32006-08-17 18:15:44 -07002352 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2353 if (err < 0)
2354 goto errout;
2355
2356 rtm = nlmsg_data(nlh);
2357
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002359 if (skb == NULL) {
2360 err = -ENOBUFS;
2361 goto errout;
2362 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363
2364 /* Reserve room for dummy headers, this skb can pass
2365 through good chunk of routing engine.
2366 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002367 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002368 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002369
2370 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002371 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002372 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2373
Al Viro17fb2c62006-09-26 22:15:25 -07002374 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2375 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002376 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002377 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378
David Millerd6c0a4f2012-07-01 02:02:59 +00002379 memset(&fl4, 0, sizeof(fl4));
2380 fl4.daddr = dst;
2381 fl4.saddr = src;
2382 fl4.flowi4_tos = rtm->rtm_tos;
2383 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2384 fl4.flowi4_mark = mark;
2385
Linus Torvalds1da177e2005-04-16 15:20:36 -07002386 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002387 struct net_device *dev;
2388
Denis V. Lunev19375042008-02-28 20:52:04 -08002389 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002390 if (dev == NULL) {
2391 err = -ENODEV;
2392 goto errout_free;
2393 }
2394
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 skb->protocol = htons(ETH_P_IP);
2396 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002397 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398 local_bh_disable();
2399 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2400 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002401
Eric Dumazet511c3f92009-06-02 05:14:27 +00002402 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002403 if (err == 0 && rt->dst.error)
2404 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002406 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002407
2408 err = 0;
2409 if (IS_ERR(rt))
2410 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002411 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002412
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002414 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415
Changli Gaod8d1f302010-06-10 23:31:35 -07002416 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417 if (rtm->rtm_flags & RTM_F_NOTIFY)
2418 rt->rt_flags |= RTCF_NOTIFY;
2419
David S. Millerf1ce3062012-07-12 10:10:17 -07002420 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002421 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002422 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002423 if (err <= 0)
2424 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002425
Eric W. Biederman15e47302012-09-07 20:12:54 +00002426 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002427errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002428 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002429
Thomas Grafd889ce32006-08-17 18:15:44 -07002430errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002432 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433}
2434
2435int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2436{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002437 return skb->len;
2438}
2439
2440void ip_rt_multicast_event(struct in_device *in_dev)
2441{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002442 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443}
2444
2445#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002446static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2447static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2448static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2449static int ip_rt_gc_elasticity __read_mostly = 8;
2450
Joe Perchesfe2c6332013-06-11 23:04:25 -07002451static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002452 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002453 size_t *lenp, loff_t *ppos)
2454{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002455 struct net *net = (struct net *)__ctl->extra1;
2456
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002458 rt_cache_flush(net);
2459 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002461 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462
2463 return -EINVAL;
2464}
2465
Joe Perchesfe2c6332013-06-11 23:04:25 -07002466static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002467 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 .procname = "gc_thresh",
2469 .data = &ipv4_dst_ops.gc_thresh,
2470 .maxlen = sizeof(int),
2471 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002472 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 },
2474 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 .procname = "max_size",
2476 .data = &ip_rt_max_size,
2477 .maxlen = sizeof(int),
2478 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002479 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480 },
2481 {
2482 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002483
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 .procname = "gc_min_interval",
2485 .data = &ip_rt_gc_min_interval,
2486 .maxlen = sizeof(int),
2487 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002488 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 },
2490 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 .procname = "gc_min_interval_ms",
2492 .data = &ip_rt_gc_min_interval,
2493 .maxlen = sizeof(int),
2494 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002495 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002496 },
2497 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 .procname = "gc_timeout",
2499 .data = &ip_rt_gc_timeout,
2500 .maxlen = sizeof(int),
2501 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002502 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 },
2504 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002505 .procname = "gc_interval",
2506 .data = &ip_rt_gc_interval,
2507 .maxlen = sizeof(int),
2508 .mode = 0644,
2509 .proc_handler = proc_dointvec_jiffies,
2510 },
2511 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512 .procname = "redirect_load",
2513 .data = &ip_rt_redirect_load,
2514 .maxlen = sizeof(int),
2515 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002516 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517 },
2518 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 .procname = "redirect_number",
2520 .data = &ip_rt_redirect_number,
2521 .maxlen = sizeof(int),
2522 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002523 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 },
2525 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 .procname = "redirect_silence",
2527 .data = &ip_rt_redirect_silence,
2528 .maxlen = sizeof(int),
2529 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002530 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 },
2532 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 .procname = "error_cost",
2534 .data = &ip_rt_error_cost,
2535 .maxlen = sizeof(int),
2536 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002537 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 },
2539 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 .procname = "error_burst",
2541 .data = &ip_rt_error_burst,
2542 .maxlen = sizeof(int),
2543 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002544 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002545 },
2546 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 .procname = "gc_elasticity",
2548 .data = &ip_rt_gc_elasticity,
2549 .maxlen = sizeof(int),
2550 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002551 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 },
2553 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 .procname = "mtu_expires",
2555 .data = &ip_rt_mtu_expires,
2556 .maxlen = sizeof(int),
2557 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002558 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 },
2560 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 .procname = "min_pmtu",
2562 .data = &ip_rt_min_pmtu,
2563 .maxlen = sizeof(int),
2564 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002565 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 },
2567 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 .procname = "min_adv_mss",
2569 .data = &ip_rt_min_advmss,
2570 .maxlen = sizeof(int),
2571 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002572 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002574 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002576
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002577static struct ctl_table ipv4_route_flush_table[] = {
2578 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002579 .procname = "flush",
2580 .maxlen = sizeof(int),
2581 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002582 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002583 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002584 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002585};
2586
2587static __net_init int sysctl_route_net_init(struct net *net)
2588{
2589 struct ctl_table *tbl;
2590
2591 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002592 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002593 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2594 if (tbl == NULL)
2595 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002596
2597 /* Don't export sysctls to unprivileged users */
2598 if (net->user_ns != &init_user_ns)
2599 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002600 }
2601 tbl[0].extra1 = net;
2602
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002603 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002604 if (net->ipv4.route_hdr == NULL)
2605 goto err_reg;
2606 return 0;
2607
2608err_reg:
2609 if (tbl != ipv4_route_flush_table)
2610 kfree(tbl);
2611err_dup:
2612 return -ENOMEM;
2613}
2614
2615static __net_exit void sysctl_route_net_exit(struct net *net)
2616{
2617 struct ctl_table *tbl;
2618
2619 tbl = net->ipv4.route_hdr->ctl_table_arg;
2620 unregister_net_sysctl_table(net->ipv4.route_hdr);
2621 BUG_ON(tbl == ipv4_route_flush_table);
2622 kfree(tbl);
2623}
2624
2625static __net_initdata struct pernet_operations sysctl_route_ops = {
2626 .init = sysctl_route_net_init,
2627 .exit = sysctl_route_net_exit,
2628};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629#endif
2630
Neil Horman3ee94372010-05-08 01:57:52 -07002631static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002632{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002633 atomic_set(&net->rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00002634 atomic_set(&net->fnhe_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002635 get_random_bytes(&net->ipv4.dev_addr_genid,
2636 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002637 return 0;
2638}
2639
Neil Horman3ee94372010-05-08 01:57:52 -07002640static __net_initdata struct pernet_operations rt_genid_ops = {
2641 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002642};
2643
David S. Millerc3426b42012-06-09 16:27:05 -07002644static int __net_init ipv4_inetpeer_init(struct net *net)
2645{
2646 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2647
2648 if (!bp)
2649 return -ENOMEM;
2650 inet_peer_base_init(bp);
2651 net->ipv4.peers = bp;
2652 return 0;
2653}
2654
2655static void __net_exit ipv4_inetpeer_exit(struct net *net)
2656{
2657 struct inet_peer_base *bp = net->ipv4.peers;
2658
2659 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002660 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002661 kfree(bp);
2662}
2663
2664static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2665 .init = ipv4_inetpeer_init,
2666 .exit = ipv4_inetpeer_exit,
2667};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002668
Patrick McHardyc7066f72011-01-14 13:36:42 +01002669#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002670struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002671#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672
Linus Torvalds1da177e2005-04-16 15:20:36 -07002673int __init ip_rt_init(void)
2674{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002675 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002676
Patrick McHardyc7066f72011-01-14 13:36:42 +01002677#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002678 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679 if (!ip_rt_acct)
2680 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681#endif
2682
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002683 ipv4_dst_ops.kmem_cachep =
2684 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002685 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686
David S. Miller14e50e52007-05-24 18:17:54 -07002687 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2688
Eric Dumazetfc66f952010-10-08 06:37:34 +00002689 if (dst_entries_init(&ipv4_dst_ops) < 0)
2690 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2691
2692 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2693 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2694
David S. Miller89aef892012-07-17 11:00:09 -07002695 ipv4_dst_ops.gc_thresh = ~0;
2696 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002697
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698 devinet_init();
2699 ip_fib_init();
2700
Denis V. Lunev73b38712008-02-28 20:51:18 -08002701 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002702 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703#ifdef CONFIG_XFRM
2704 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002705 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002706#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002707 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002708
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002709#ifdef CONFIG_SYSCTL
2710 register_pernet_subsys(&sysctl_route_ops);
2711#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002712 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002713 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714 return rc;
2715}
2716
Al Viroa1bc6eb2008-07-30 06:32:52 -04002717#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002718/*
2719 * We really need to sanitize the damn ipv4 init order, then all
2720 * this nonsense will go away.
2721 */
2722void __init ip_static_sysctl_init(void)
2723{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002724 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002725}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002726#endif