blob: a9a54a2368323243be30f536d45646d48ec6c42d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
126static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
127static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129/*
130 * Interface to generic destination cache.
131 */
132
133static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800134static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000135static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
137static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700138static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb, u32 mtu);
140static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700142static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000144static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
145 int how)
146{
147}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
David S. Miller62fa8a82011-01-26 20:51:05 -0800149static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
150{
David S. Miller31248732012-07-10 07:08:18 -0700151 WARN_ON(1);
152 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800153}
154
David S. Millerf894cbf2012-07-02 21:52:24 -0700155static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
156 struct sk_buff *skb,
157 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700158
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159static struct dst_ops ipv4_dst_ops = {
160 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800161 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800163 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000164 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800165 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700166 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .ifdown = ipv4_dst_ifdown,
168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700171 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
176#define ECN_OR_COST(class) TC_PRIO_##class
177
Philippe De Muyter4839c522007-07-09 15:32:57 -0700178const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000180 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
182 ECN_OR_COST(BESTEFFORT),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK)
195};
Amir Vadaid4a96862012-04-04 21:33:28 +0000196EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Eric Dumazet2f970d82006-01-17 02:54:36 -0800198static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000199#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
203{
Eric Dumazet29e75252008-01-31 17:05:09 -0800204 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700205 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207}
208
209static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700212 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215static void rt_cache_seq_stop(struct seq_file *seq, void *v)
216{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static int rt_cache_seq_show(struct seq_file *seq, void *v)
220{
221 if (v == SEQ_START_TOKEN)
222 seq_printf(seq, "%-127s\n",
223 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
224 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
225 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900226 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227}
228
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700229static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 .start = rt_cache_seq_start,
231 .next = rt_cache_seq_next,
232 .stop = rt_cache_seq_stop,
233 .show = rt_cache_seq_show,
234};
235
236static int rt_cache_seq_open(struct inode *inode, struct file *file)
237{
David S. Miller89aef892012-07-17 11:00:09 -0700238 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239}
240
Arjan van de Ven9a321442007-02-12 00:55:35 -0800241static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .owner = THIS_MODULE,
243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700246 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
Rusty Russell0f23174a2008-12-29 12:23:42 +0000257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800261 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return 0;
292 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000296 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 st->in_hit,
298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
305 st->out_hit,
306 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900307 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 st->gc_total,
310 st->gc_ignored,
311 st->gc_goal_miss,
312 st->gc_dst_overflow,
313 st->in_hlist_search,
314 st->out_hlist_search
315 );
316 return 0;
317}
318
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700319static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
Arjan van de Ven9a321442007-02-12 00:55:35 -0800332static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .owner = THIS_MODULE,
334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
Patrick McHardyc7066f72011-01-14 13:36:42 +0100340#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800357 }
358 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800363}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800364
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
368}
369
370static const struct file_operations rt_acct_proc_fops = {
371 .owner = THIS_MODULE,
372 .open = rt_acct_proc_open,
373 .read = seq_read,
374 .llseek = seq_lseek,
375 .release = single_release,
376};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800377#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378
Denis V. Lunev73b38712008-02-28 20:51:18 -0800379static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800380{
381 struct proc_dir_entry *pde;
382
Gao fengd4beaa62013-02-18 01:34:54 +0000383 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
384 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800385 if (!pde)
386 goto err1;
387
Wang Chen77020722008-02-28 14:14:25 -0800388 pde = proc_create("rt_cache", S_IRUGO,
389 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800390 if (!pde)
391 goto err2;
392
Patrick McHardyc7066f72011-01-14 13:36:42 +0100393#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800394 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800395 if (!pde)
396 goto err3;
397#endif
398 return 0;
399
Patrick McHardyc7066f72011-01-14 13:36:42 +0100400#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800401err3:
402 remove_proc_entry("rt_cache", net->proc_net_stat);
403#endif
404err2:
405 remove_proc_entry("rt_cache", net->proc_net);
406err1:
407 return -ENOMEM;
408}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800409
410static void __net_exit ip_rt_do_proc_exit(struct net *net)
411{
412 remove_proc_entry("rt_cache", net->proc_net_stat);
413 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100414#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800415 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000416#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800417}
418
419static struct pernet_operations ip_rt_proc_ops __net_initdata = {
420 .init = ip_rt_do_proc_init,
421 .exit = ip_rt_do_proc_exit,
422};
423
424static int __init ip_rt_proc_init(void)
425{
426 return register_pernet_subsys(&ip_rt_proc_ops);
427}
428
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800429#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800430static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800431{
432 return 0;
433}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900435
Eric Dumazet4331deb2012-07-25 05:11:23 +0000436static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700437{
Changli Gaod8d1f302010-06-10 23:31:35 -0700438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700439}
440
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000441void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800442{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000443 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000444}
445
David S. Millerf894cbf2012-07-02 21:52:24 -0700446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
447 struct sk_buff *skb,
448 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000449{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700450 struct net_device *dev = dst->dev;
451 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500452 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000453 struct neighbour *n;
454
David S. Miller39232972012-01-26 15:22:32 -0500455 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700456 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500457 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700458 else if (skb)
459 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700460
David S. Miller80703d22012-02-15 17:48:35 -0500461 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462 if (n)
463 return n;
David Miller32092ec2011-07-25 00:01:41 +0000464 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700465}
466
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467/*
468 * Peer allocation may fail only in serious out-of-memory conditions. However
469 * we still can generate some output.
470 * Random ID selection looks a bit dangerous because we have no chances to
471 * select ID being unique in a reasonable period of time.
472 * But broken packet identifier may be better than no packet at all.
473 */
474static void ip_select_fb_ident(struct iphdr *iph)
475{
476 static DEFINE_SPINLOCK(ip_fb_id_lock);
477 static u32 ip_fallback_id;
478 u32 salt;
479
480 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700481 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 iph->id = htons(salt & 0xFFFF);
483 ip_fallback_id = salt;
484 spin_unlock_bh(&ip_fb_id_lock);
485}
486
487void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
488{
David S. Miller1d861aa2012-07-10 03:58:16 -0700489 struct net *net = dev_net(dst->dev);
490 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
David S. Miller1d861aa2012-07-10 03:58:16 -0700492 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
493 if (peer) {
494 iph->id = htons(inet_getid(peer, more));
495 inet_putpeer(peer);
496 return;
497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 ip_select_fb_ident(iph);
500}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000501EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200503static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700504 const struct iphdr *iph,
505 int oif, u8 tos,
506 u8 prot, u32 mark, int flow_flags)
507{
508 if (sk) {
509 const struct inet_sock *inet = inet_sk(sk);
510
511 oif = sk->sk_bound_dev_if;
512 mark = sk->sk_mark;
513 tos = RT_CONN_FLAGS(sk);
514 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
515 }
516 flowi4_init_output(fl4, oif, mark, tos,
517 RT_SCOPE_UNIVERSE, prot,
518 flow_flags,
519 iph->daddr, iph->saddr, 0, 0);
520}
521
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200522static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
523 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700524{
525 const struct iphdr *iph = ip_hdr(skb);
526 int oif = skb->dev->ifindex;
527 u8 tos = RT_TOS(iph->tos);
528 u8 prot = iph->protocol;
529 u32 mark = skb->mark;
530
531 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
532}
533
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200534static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700535{
536 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200537 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700538 __be32 daddr = inet->inet_daddr;
539
540 rcu_read_lock();
541 inet_opt = rcu_dereference(inet->inet_opt);
542 if (inet_opt && inet_opt->opt.srr)
543 daddr = inet_opt->opt.faddr;
544 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
545 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
547 inet_sk_flowi_flags(sk),
548 daddr, inet->inet_saddr, 0, 0);
549 rcu_read_unlock();
550}
551
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200552static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
553 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700554{
555 if (skb)
556 build_skb_flow_key(fl4, skb, sk);
557 else
558 build_sk_flow_key(fl4, sk);
559}
560
David S. Millerc5038a82012-07-31 15:02:02 -0700561static inline void rt_free(struct rtable *rt)
562{
563 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
564}
565
566static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700567
Timo Teräs2ffae992013-06-27 10:27:05 +0300568static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
569{
570 struct rtable *rt;
571
572 rt = rcu_dereference(fnhe->fnhe_rth_input);
573 if (rt) {
574 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
575 rt_free(rt);
576 }
577 rt = rcu_dereference(fnhe->fnhe_rth_output);
578 if (rt) {
579 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
580 rt_free(rt);
581 }
582}
583
Julian Anastasovaee06da2012-07-18 10:15:35 +0000584static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700585{
586 struct fib_nh_exception *fnhe, *oldest;
587
588 oldest = rcu_dereference(hash->chain);
589 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
590 fnhe = rcu_dereference(fnhe->fnhe_next)) {
591 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
592 oldest = fnhe;
593 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300594 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700595 return oldest;
596}
597
David S. Millerd3a25c92012-07-17 13:23:08 -0700598static inline u32 fnhe_hashfun(__be32 daddr)
599{
600 u32 hval;
601
602 hval = (__force u32) daddr;
603 hval ^= (hval >> 11) ^ (hval >> 22);
604
605 return hval & (FNHE_HASH_SIZE - 1);
606}
607
Timo Teräs387aa652013-05-27 20:46:31 +0000608static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
609{
610 rt->rt_pmtu = fnhe->fnhe_pmtu;
611 rt->dst.expires = fnhe->fnhe_expires;
612
613 if (fnhe->fnhe_gw) {
614 rt->rt_flags |= RTCF_REDIRECTED;
615 rt->rt_gateway = fnhe->fnhe_gw;
616 rt->rt_uses_gateway = 1;
617 }
618}
619
Julian Anastasovaee06da2012-07-18 10:15:35 +0000620static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
621 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700622{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000623 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700624 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000625 struct rtable *rt;
626 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700627 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000628 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700629
David S. Millerc5038a82012-07-31 15:02:02 -0700630 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000631
632 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700633 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000634 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700635 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000636 goto out_unlock;
637 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700638 }
639
David S. Miller4895c772012-07-17 04:19:00 -0700640 hash += hval;
641
642 depth = 0;
643 for (fnhe = rcu_dereference(hash->chain); fnhe;
644 fnhe = rcu_dereference(fnhe->fnhe_next)) {
645 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000646 break;
David S. Miller4895c772012-07-17 04:19:00 -0700647 depth++;
648 }
649
Julian Anastasovaee06da2012-07-18 10:15:35 +0000650 if (fnhe) {
651 if (gw)
652 fnhe->fnhe_gw = gw;
653 if (pmtu) {
654 fnhe->fnhe_pmtu = pmtu;
Timo Teräs387aa652013-05-27 20:46:31 +0000655 fnhe->fnhe_expires = max(1UL, expires);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000656 }
Timo Teräs387aa652013-05-27 20:46:31 +0000657 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300658 rt = rcu_dereference(fnhe->fnhe_rth_input);
659 if (rt)
660 fill_route_from_fnhe(rt, fnhe);
661 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000662 if (rt)
663 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000664 } else {
665 if (depth > FNHE_RECLAIM_DEPTH)
666 fnhe = fnhe_oldest(hash);
667 else {
668 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
669 if (!fnhe)
670 goto out_unlock;
671
672 fnhe->fnhe_next = hash->chain;
673 rcu_assign_pointer(hash->chain, fnhe);
674 }
Timo Teräs5aad1de2013-05-27 20:46:33 +0000675 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
Julian Anastasovaee06da2012-07-18 10:15:35 +0000676 fnhe->fnhe_daddr = daddr;
677 fnhe->fnhe_gw = gw;
678 fnhe->fnhe_pmtu = pmtu;
679 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000680
681 /* Exception created; mark the cached routes for the nexthop
682 * stale, so anyone caching it rechecks if this exception
683 * applies to them.
684 */
Timo Teräs2ffae992013-06-27 10:27:05 +0300685 rt = rcu_dereference(nh->nh_rth_input);
686 if (rt)
687 rt->dst.obsolete = DST_OBSOLETE_KILL;
688
Timo Teräs387aa652013-05-27 20:46:31 +0000689 for_each_possible_cpu(i) {
690 struct rtable __rcu **prt;
691 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
692 rt = rcu_dereference(*prt);
693 if (rt)
694 rt->dst.obsolete = DST_OBSOLETE_KILL;
695 }
David S. Miller4895c772012-07-17 04:19:00 -0700696 }
David S. Miller4895c772012-07-17 04:19:00 -0700697
David S. Miller4895c772012-07-17 04:19:00 -0700698 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000699
700out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700701 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000702 return;
David S. Miller4895c772012-07-17 04:19:00 -0700703}
704
David S. Millerceb33202012-07-17 11:31:28 -0700705static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
706 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700707{
David S. Millere47a1852012-07-11 20:55:47 -0700708 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700709 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700710 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700711 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700712 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700713 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800714 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715
David S. Miller94206122012-07-11 20:38:08 -0700716 switch (icmp_hdr(skb)->code & 7) {
717 case ICMP_REDIR_NET:
718 case ICMP_REDIR_NETTOS:
719 case ICMP_REDIR_HOST:
720 case ICMP_REDIR_HOSTTOS:
721 break;
722
723 default:
724 return;
725 }
726
David S. Millere47a1852012-07-11 20:55:47 -0700727 if (rt->rt_gateway != old_gw)
728 return;
729
730 in_dev = __in_dev_get_rcu(dev);
731 if (!in_dev)
732 return;
733
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900734 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800735 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
736 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
737 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738 goto reject_redirect;
739
740 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
741 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
742 goto reject_redirect;
743 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
744 goto reject_redirect;
745 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800746 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 goto reject_redirect;
748 }
749
David S. Miller4895c772012-07-17 04:19:00 -0700750 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700751 if (n) {
752 if (!(n->nud_state & NUD_VALID)) {
753 neigh_event_send(n, NULL);
754 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700755 if (fib_lookup(net, fl4, &res) == 0) {
756 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700757
Julian Anastasovaee06da2012-07-18 10:15:35 +0000758 update_or_create_fnhe(nh, fl4->daddr, new_gw,
759 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700760 }
David S. Millerceb33202012-07-17 11:31:28 -0700761 if (kill_route)
762 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700763 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
764 }
765 neigh_release(n);
766 }
767 return;
768
769reject_redirect:
770#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700771 if (IN_DEV_LOG_MARTIANS(in_dev)) {
772 const struct iphdr *iph = (const struct iphdr *) skb->data;
773 __be32 daddr = iph->daddr;
774 __be32 saddr = iph->saddr;
775
David S. Millere47a1852012-07-11 20:55:47 -0700776 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
777 " Advised path = %pI4 -> %pI4\n",
778 &old_gw, dev->name, &new_gw,
779 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700780 }
David S. Millere47a1852012-07-11 20:55:47 -0700781#endif
782 ;
783}
784
David S. Miller4895c772012-07-17 04:19:00 -0700785static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
786{
787 struct rtable *rt;
788 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200789 const struct iphdr *iph = (const struct iphdr *) skb->data;
790 int oif = skb->dev->ifindex;
791 u8 tos = RT_TOS(iph->tos);
792 u8 prot = iph->protocol;
793 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700794
795 rt = (struct rtable *) dst;
796
Michal Kubecekf96ef982013-05-28 08:26:49 +0200797 __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700798 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700799}
800
Linus Torvalds1da177e2005-04-16 15:20:36 -0700801static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
802{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800803 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 struct dst_entry *ret = dst;
805
806 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000807 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808 ip_rt_put(rt);
809 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700810 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
811 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700812 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 ret = NULL;
814 }
815 }
816 return ret;
817}
818
819/*
820 * Algorithm:
821 * 1. The first ip_rt_redirect_number redirects are sent
822 * with exponential backoff, then we stop sending them at all,
823 * assuming that the host ignores our redirects.
824 * 2. If we did not see packets requiring redirects
825 * during ip_rt_redirect_silence, we assume that the host
826 * forgot redirected route and start to send redirects again.
827 *
828 * This algorithm is much cheaper and more intelligent than dumb load limiting
829 * in icmp.c.
830 *
831 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
832 * and "frag. need" (breaks PMTU discovery) in icmp.c.
833 */
834
835void ip_rt_send_redirect(struct sk_buff *skb)
836{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000837 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700838 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800839 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700840 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700841 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842
Eric Dumazet30038fc2009-08-28 23:52:01 -0700843 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700844 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700845 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
846 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700848 }
849 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
850 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851
David S. Miller1d861aa2012-07-10 03:58:16 -0700852 net = dev_net(rt->dst.dev);
853 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800854 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000855 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
856 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800857 return;
858 }
859
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 /* No redirected packets during ip_rt_redirect_silence;
861 * reset the algorithm.
862 */
David S. Miller92d86822011-02-04 15:55:25 -0800863 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
864 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
866 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700867 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 */
David S. Miller92d86822011-02-04 15:55:25 -0800869 if (peer->rate_tokens >= ip_rt_redirect_number) {
870 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700871 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 }
873
874 /* Check for load limit; set rate_last to the latest sent
875 * redirect.
876 */
David S. Miller92d86822011-02-04 15:55:25 -0800877 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800878 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800879 (peer->rate_last +
880 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000881 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
882
883 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800884 peer->rate_last = jiffies;
885 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700887 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000888 peer->rate_tokens == ip_rt_redirect_number)
889 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700890 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000891 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700892#endif
893 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700894out_put_peer:
895 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896}
897
898static int ip_error(struct sk_buff *skb)
899{
David S. Miller251da412012-06-26 16:27:09 -0700900 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000901 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800902 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700903 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700904 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800905 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906 int code;
907
David S. Miller251da412012-06-26 16:27:09 -0700908 net = dev_net(rt->dst.dev);
909 if (!IN_DEV_FORWARD(in_dev)) {
910 switch (rt->dst.error) {
911 case EHOSTUNREACH:
912 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
913 break;
914
915 case ENETUNREACH:
916 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
917 break;
918 }
919 goto out;
920 }
921
Changli Gaod8d1f302010-06-10 23:31:35 -0700922 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000923 case EINVAL:
924 default:
925 goto out;
926 case EHOSTUNREACH:
927 code = ICMP_HOST_UNREACH;
928 break;
929 case ENETUNREACH:
930 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700931 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000932 break;
933 case EACCES:
934 code = ICMP_PKT_FILTERED;
935 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936 }
937
David S. Miller1d861aa2012-07-10 03:58:16 -0700938 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800939
940 send = true;
941 if (peer) {
942 now = jiffies;
943 peer->rate_tokens += now - peer->rate_last;
944 if (peer->rate_tokens > ip_rt_error_burst)
945 peer->rate_tokens = ip_rt_error_burst;
946 peer->rate_last = now;
947 if (peer->rate_tokens >= ip_rt_error_cost)
948 peer->rate_tokens -= ip_rt_error_cost;
949 else
950 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700951 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952 }
David S. Miller92d86822011-02-04 15:55:25 -0800953 if (send)
954 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955
956out: kfree_skb(skb);
957 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900958}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959
Steffen Klassertd851c122012-10-07 22:47:25 +0000960static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961{
Steffen Klassertd851c122012-10-07 22:47:25 +0000962 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700963 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800964
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000965 if (dst_metric_locked(dst, RTAX_MTU))
966 return;
967
Steffen Klassert7f92d332012-10-07 22:48:18 +0000968 if (dst->dev->mtu < mtu)
969 return;
970
David S. Miller59436342012-07-10 06:58:42 -0700971 if (mtu < ip_rt_min_pmtu)
972 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000973
Timo Teräsf0162292013-05-27 20:46:32 +0000974 if (rt->rt_pmtu == mtu &&
975 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
976 return;
977
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000978 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000979 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700980 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700981
Julian Anastasovaee06da2012-07-18 10:15:35 +0000982 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
983 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700984 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000985 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700986}
987
David S. Miller4895c772012-07-17 04:19:00 -0700988static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
989 struct sk_buff *skb, u32 mtu)
990{
991 struct rtable *rt = (struct rtable *) dst;
992 struct flowi4 fl4;
993
994 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000995 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700996}
997
David S. Miller36393392012-06-14 22:21:46 -0700998void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
999 int oif, u32 mark, u8 protocol, int flow_flags)
1000{
David S. Miller4895c772012-07-17 04:19:00 -07001001 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001002 struct flowi4 fl4;
1003 struct rtable *rt;
1004
David S. Miller4895c772012-07-17 04:19:00 -07001005 __build_flow_key(&fl4, NULL, iph, oif,
1006 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -07001007 rt = __ip_route_output_key(net, &fl4);
1008 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001009 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001010 ip_rt_put(rt);
1011 }
1012}
1013EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1014
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001015static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001016{
David S. Miller4895c772012-07-17 04:19:00 -07001017 const struct iphdr *iph = (const struct iphdr *) skb->data;
1018 struct flowi4 fl4;
1019 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001020
David S. Miller4895c772012-07-17 04:19:00 -07001021 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1022 rt = __ip_route_output_key(sock_net(sk), &fl4);
1023 if (!IS_ERR(rt)) {
1024 __ip_rt_update_pmtu(rt, &fl4, mtu);
1025 ip_rt_put(rt);
1026 }
David S. Miller36393392012-06-14 22:21:46 -07001027}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001028
1029void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1030{
1031 const struct iphdr *iph = (const struct iphdr *) skb->data;
1032 struct flowi4 fl4;
1033 struct rtable *rt;
1034 struct dst_entry *dst;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001035 bool new = false;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001036
1037 bh_lock_sock(sk);
1038 rt = (struct rtable *) __sk_dst_get(sk);
1039
1040 if (sock_owned_by_user(sk) || !rt) {
1041 __ipv4_sk_update_pmtu(skb, sk, mtu);
1042 goto out;
1043 }
1044
1045 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1046
1047 if (!__sk_dst_check(sk, 0)) {
1048 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1049 if (IS_ERR(rt))
1050 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001051
1052 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001053 }
1054
1055 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1056
1057 dst = dst_check(&rt->dst, 0);
1058 if (!dst) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001059 if (new)
1060 dst_release(&rt->dst);
1061
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001062 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1063 if (IS_ERR(rt))
1064 goto out;
1065
Steffen Klassertb44108d2013-01-22 00:01:28 +00001066 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001067 }
1068
Steffen Klassertb44108d2013-01-22 00:01:28 +00001069 if (new)
1070 __sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001071
1072out:
1073 bh_unlock_sock(sk);
1074}
David S. Miller36393392012-06-14 22:21:46 -07001075EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001076
David S. Millerb42597e2012-07-11 21:25:45 -07001077void ipv4_redirect(struct sk_buff *skb, struct net *net,
1078 int oif, u32 mark, u8 protocol, int flow_flags)
1079{
David S. Miller4895c772012-07-17 04:19:00 -07001080 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001081 struct flowi4 fl4;
1082 struct rtable *rt;
1083
David S. Miller4895c772012-07-17 04:19:00 -07001084 __build_flow_key(&fl4, NULL, iph, oif,
1085 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001086 rt = __ip_route_output_key(net, &fl4);
1087 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001088 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001089 ip_rt_put(rt);
1090 }
1091}
1092EXPORT_SYMBOL_GPL(ipv4_redirect);
1093
1094void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1095{
David S. Miller4895c772012-07-17 04:19:00 -07001096 const struct iphdr *iph = (const struct iphdr *) skb->data;
1097 struct flowi4 fl4;
1098 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001099
David S. Miller4895c772012-07-17 04:19:00 -07001100 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1101 rt = __ip_route_output_key(sock_net(sk), &fl4);
1102 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001103 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001104 ip_rt_put(rt);
1105 }
David S. Millerb42597e2012-07-11 21:25:45 -07001106}
1107EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1108
David S. Millerefbc3682011-12-01 13:38:59 -05001109static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1110{
1111 struct rtable *rt = (struct rtable *) dst;
1112
David S. Millerceb33202012-07-17 11:31:28 -07001113 /* All IPV4 dsts are created with ->obsolete set to the value
1114 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1115 * into this function always.
1116 *
Timo Teräs387aa652013-05-27 20:46:31 +00001117 * When a PMTU/redirect information update invalidates a route,
1118 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1119 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001120 */
Timo Teräs387aa652013-05-27 20:46:31 +00001121 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001122 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001123 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001124}
1125
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126static void ipv4_link_failure(struct sk_buff *skb)
1127{
1128 struct rtable *rt;
1129
1130 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1131
Eric Dumazet511c3f92009-06-02 05:14:27 +00001132 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001133 if (rt)
1134 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135}
1136
1137static int ip_rt_bug(struct sk_buff *skb)
1138{
Joe Perches91df42b2012-05-15 14:11:54 +00001139 pr_debug("%s: %pI4 -> %pI4, %s\n",
1140 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1141 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001143 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 return 0;
1145}
1146
1147/*
1148 We do not cache source address of outgoing interface,
1149 because it is used only by IP RR, TS and SRR options,
1150 so that it out of fast path.
1151
1152 BTW remember: "addr" is allowed to be not aligned
1153 in IP options!
1154 */
1155
David S. Miller8e363602011-05-13 17:29:41 -04001156void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157{
Al Viroa61ced52006-09-26 21:27:54 -07001158 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
David S. Millerc7537962010-11-11 17:07:48 -08001160 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001161 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001162 else {
David S. Miller8e363602011-05-13 17:29:41 -04001163 struct fib_result res;
1164 struct flowi4 fl4;
1165 struct iphdr *iph;
1166
1167 iph = ip_hdr(skb);
1168
1169 memset(&fl4, 0, sizeof(fl4));
1170 fl4.daddr = iph->daddr;
1171 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001172 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001173 fl4.flowi4_oif = rt->dst.dev->ifindex;
1174 fl4.flowi4_iif = skb->dev->ifindex;
1175 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001176
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001177 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001178 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001179 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001180 else
David S. Millerf8126f12012-07-13 05:03:45 -07001181 src = inet_select_addr(rt->dst.dev,
1182 rt_nexthop(rt, iph->daddr),
1183 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001184 rcu_read_unlock();
1185 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001186 memcpy(addr, &src, 4);
1187}
1188
Patrick McHardyc7066f72011-01-14 13:36:42 +01001189#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190static void set_class_tag(struct rtable *rt, u32 tag)
1191{
Changli Gaod8d1f302010-06-10 23:31:35 -07001192 if (!(rt->dst.tclassid & 0xFFFF))
1193 rt->dst.tclassid |= tag & 0xFFFF;
1194 if (!(rt->dst.tclassid & 0xFFFF0000))
1195 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001196}
1197#endif
1198
David S. Miller0dbaee32010-12-13 12:52:14 -08001199static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1200{
1201 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1202
1203 if (advmss == 0) {
1204 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1205 ip_rt_min_advmss);
1206 if (advmss > 65535 - 40)
1207 advmss = 65535 - 40;
1208 }
1209 return advmss;
1210}
1211
Steffen Klassertebb762f2011-11-23 02:12:51 +00001212static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001213{
Steffen Klassert261663b2011-11-23 02:14:50 +00001214 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001215 unsigned int mtu = rt->rt_pmtu;
1216
Alexander Duyck98d75c32012-08-27 06:30:01 +00001217 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001218 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001219
Steffen Klassert38d523e2013-01-16 20:55:01 +00001220 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001221 return mtu;
1222
1223 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001224
1225 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001226 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001227 mtu = 576;
1228 }
1229
1230 if (mtu > IP_MAX_MTU)
1231 mtu = IP_MAX_MTU;
1232
1233 return mtu;
1234}
1235
David S. Millerf2bb4be2012-07-17 12:20:47 -07001236static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001237{
1238 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1239 struct fib_nh_exception *fnhe;
1240 u32 hval;
1241
David S. Millerf2bb4be2012-07-17 12:20:47 -07001242 if (!hash)
1243 return NULL;
1244
David S. Millerd3a25c92012-07-17 13:23:08 -07001245 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001246
1247 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1248 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001249 if (fnhe->fnhe_daddr == daddr)
1250 return fnhe;
1251 }
1252 return NULL;
1253}
David S. Miller4895c772012-07-17 04:19:00 -07001254
David S. Millercaacf052012-07-31 15:06:50 -07001255static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001256 __be32 daddr)
1257{
David S. Millercaacf052012-07-31 15:06:50 -07001258 bool ret = false;
1259
David S. Millerc5038a82012-07-31 15:02:02 -07001260 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001261
David S. Millerc5038a82012-07-31 15:02:02 -07001262 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001263 struct rtable __rcu **porig;
1264 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001265 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001266
1267 if (rt_is_input_route(rt))
1268 porig = &fnhe->fnhe_rth_input;
1269 else
1270 porig = &fnhe->fnhe_rth_output;
1271 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001272
1273 if (fnhe->fnhe_genid != genid) {
1274 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001275 fnhe->fnhe_gw = 0;
1276 fnhe->fnhe_pmtu = 0;
1277 fnhe->fnhe_expires = 0;
Timo Teräs2ffae992013-06-27 10:27:05 +03001278 fnhe_flush_routes(fnhe);
1279 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001280 }
Timo Teräs387aa652013-05-27 20:46:31 +00001281 fill_route_from_fnhe(rt, fnhe);
1282 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001283 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001284
Timo Teräs2ffae992013-06-27 10:27:05 +03001285 if (!(rt->dst.flags & DST_NOCACHE)) {
1286 rcu_assign_pointer(*porig, rt);
1287 if (orig)
1288 rt_free(orig);
1289 ret = true;
1290 }
David S. Millerc5038a82012-07-31 15:02:02 -07001291
1292 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001293 }
1294 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001295
1296 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001297}
1298
David S. Millercaacf052012-07-31 15:06:50 -07001299static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001300{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001301 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001302 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001303
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001304 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001305 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001306 } else {
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001307 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1308 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001309 orig = *p;
1310
1311 prev = cmpxchg(p, orig, rt);
1312 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001313 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001314 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001315 } else
David S. Millercaacf052012-07-31 15:06:50 -07001316 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001317
1318 return ret;
1319}
1320
1321static DEFINE_SPINLOCK(rt_uncached_lock);
1322static LIST_HEAD(rt_uncached_list);
1323
1324static void rt_add_uncached_list(struct rtable *rt)
1325{
1326 spin_lock_bh(&rt_uncached_lock);
1327 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1328 spin_unlock_bh(&rt_uncached_lock);
1329}
1330
1331static void ipv4_dst_destroy(struct dst_entry *dst)
1332{
1333 struct rtable *rt = (struct rtable *) dst;
1334
Eric Dumazet78df76a2012-08-24 05:40:47 +00001335 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001336 spin_lock_bh(&rt_uncached_lock);
1337 list_del(&rt->rt_uncached);
1338 spin_unlock_bh(&rt_uncached_lock);
1339 }
1340}
1341
1342void rt_flush_dev(struct net_device *dev)
1343{
1344 if (!list_empty(&rt_uncached_list)) {
1345 struct net *net = dev_net(dev);
1346 struct rtable *rt;
1347
1348 spin_lock_bh(&rt_uncached_lock);
1349 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1350 if (rt->dst.dev != dev)
1351 continue;
1352 rt->dst.dev = net->loopback_dev;
1353 dev_hold(rt->dst.dev);
1354 dev_put(dev);
1355 }
1356 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001357 }
1358}
1359
Eric Dumazet4331deb2012-07-25 05:11:23 +00001360static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001361{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001362 return rt &&
1363 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1364 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001365}
1366
David S. Millerf2bb4be2012-07-17 12:20:47 -07001367static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001368 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001369 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001370 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371{
David S. Millercaacf052012-07-31 15:06:50 -07001372 bool cached = false;
1373
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001375 struct fib_nh *nh = &FIB_RES_NH(*res);
1376
Julian Anastasov155e8332012-10-08 11:41:18 +00001377 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001378 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001379 rt->rt_uses_gateway = 1;
1380 }
David S. Miller28605832012-07-17 14:55:59 -07001381 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001382#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001383 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001384#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001385 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001386 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001387 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001388 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001389 if (unlikely(!cached)) {
1390 /* Routes we intend to cache in nexthop exception or
1391 * FIB nexthop have the DST_NOCACHE bit clear.
1392 * However, if we are unsuccessful at storing this
1393 * route into the cache we really need to set it.
1394 */
1395 rt->dst.flags |= DST_NOCACHE;
1396 if (!rt->rt_gateway)
1397 rt->rt_gateway = daddr;
1398 rt_add_uncached_list(rt);
1399 }
1400 } else
David S. Millercaacf052012-07-31 15:06:50 -07001401 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402
Patrick McHardyc7066f72011-01-14 13:36:42 +01001403#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001405 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001406#endif
1407 set_class_tag(rt, itag);
1408#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001409}
1410
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001411static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001412 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001413{
David S. Millerf5b0a872012-07-19 12:31:33 -07001414 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001415 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001416 (nopolicy ? DST_NOPOLICY : 0) |
1417 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001418}
1419
Eric Dumazet96d36222010-06-02 19:21:31 +00001420/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001421static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001422 u8 tos, struct net_device *dev, int our)
1423{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001425 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001427 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428
1429 /* Primary sanity checks. */
1430
1431 if (in_dev == NULL)
1432 return -EINVAL;
1433
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001434 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001435 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436 goto e_inval;
1437
Thomas Grafd0daebc32012-06-12 00:44:01 +00001438 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1439 if (ipv4_is_loopback(saddr))
1440 goto e_inval;
1441
Joe Perchesf97c1e02007-12-16 13:45:43 -08001442 if (ipv4_is_zeronet(saddr)) {
1443 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001445 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001446 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1447 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001448 if (err < 0)
1449 goto e_err;
1450 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001451 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001452 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 if (!rth)
1454 goto e_nobufs;
1455
Patrick McHardyc7066f72011-01-14 13:36:42 +01001456#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001457 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458#endif
David S. Millercf911662011-04-28 14:31:47 -07001459 rth->dst.output = ip_rt_bug;
1460
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001461 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001463 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001464 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001465 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001466 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001467 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001468 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001469 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001471 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 rth->rt_flags |= RTCF_LOCAL;
1473 }
1474
1475#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001476 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001477 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478#endif
1479 RT_CACHE_STAT_INC(in_slow_mc);
1480
David S. Miller89aef892012-07-17 11:00:09 -07001481 skb_dst_set(skb, &rth->dst);
1482 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001483
1484e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001487 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001488e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001489 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490}
1491
1492
1493static void ip_handle_martian_source(struct net_device *dev,
1494 struct in_device *in_dev,
1495 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001496 __be32 daddr,
1497 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498{
1499 RT_CACHE_STAT_INC(in_martian_src);
1500#ifdef CONFIG_IP_ROUTE_VERBOSE
1501 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1502 /*
1503 * RFC1812 recommendation, if source is martian,
1504 * the only hint is MAC header.
1505 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001506 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001507 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001508 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001509 print_hex_dump(KERN_WARNING, "ll header: ",
1510 DUMP_PREFIX_OFFSET, 16, 1,
1511 skb_mac_header(skb),
1512 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 }
1514 }
1515#endif
1516}
1517
Eric Dumazet47360222010-06-03 04:13:21 +00001518/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001519static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001520 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001521 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001522 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523{
Timo Teräs2ffae992013-06-27 10:27:05 +03001524 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 struct rtable *rth;
1526 int err;
1527 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001528 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001529 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001530 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531
1532 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001533 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001534 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001535 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001536 return -EINVAL;
1537 }
1538
Michael Smith5c04c812011-04-07 04:51:50 +00001539 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001540 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001542 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001544
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545 goto cleanup;
1546 }
1547
Julian Anastasove81da0e2012-10-08 11:41:15 +00001548 do_cache = res->fi && !itag;
1549 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550 (IN_DEV_SHARED_MEDIA(out_dev) ||
Julian Anastasove81da0e2012-10-08 11:41:15 +00001551 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001552 flags |= RTCF_DOREDIRECT;
Julian Anastasove81da0e2012-10-08 11:41:15 +00001553 do_cache = false;
1554 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001555
1556 if (skb->protocol != htons(ETH_P_IP)) {
1557 /* Not IP (i.e. ARP). Do not create route, if it is
1558 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001559 *
1560 * Proxy arp feature have been extended to allow, ARP
1561 * replies back to the same interface, to support
1562 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001564 if (out_dev == in_dev &&
1565 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566 err = -EINVAL;
1567 goto cleanup;
1568 }
1569 }
1570
Timo Teräs2ffae992013-06-27 10:27:05 +03001571 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001572 if (do_cache) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001573 if (fnhe != NULL)
1574 rth = rcu_dereference(fnhe->fnhe_rth_input);
1575 else
1576 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1577
Julian Anastasove81da0e2012-10-08 11:41:15 +00001578 if (rt_cache_valid(rth)) {
1579 skb_dst_set_noref(skb, &rth->dst);
1580 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001581 }
1582 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001583
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001584 rth = rt_dst_alloc(out_dev->dev,
1585 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001586 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001587 if (!rth) {
1588 err = -ENOBUFS;
1589 goto cleanup;
1590 }
1591
David S. Millercf911662011-04-28 14:31:47 -07001592 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1593 rth->rt_flags = flags;
1594 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001595 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001596 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001597 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001598 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001599 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001600 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601
Changli Gaod8d1f302010-06-10 23:31:35 -07001602 rth->dst.input = ip_forward;
1603 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604
Timo Teräs2ffae992013-06-27 10:27:05 +03001605 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001606 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001607out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 err = 0;
1609 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001611}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
Stephen Hemminger5969f712008-04-10 01:52:09 -07001613static int ip_mkroute_input(struct sk_buff *skb,
1614 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001615 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001616 struct in_device *in_dev,
1617 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001620 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001621 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622#endif
1623
1624 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001625 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626}
1627
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628/*
1629 * NOTE. We drop all the packets that has local source
1630 * addresses, because every properly looped back packet
1631 * must have correct destination already attached by output routine.
1632 *
1633 * Such approach solves two big problems:
1634 * 1. Not simplex devices are handled properly.
1635 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001636 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 */
1638
Al Viro9e12bb22006-09-26 21:25:20 -07001639static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001640 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641{
1642 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001643 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001644 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001645 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001647 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001649 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001650 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001651
1652 /* IP on this device is disabled. */
1653
1654 if (!in_dev)
1655 goto out;
1656
1657 /* Check for the most weird martians, which can be not detected
1658 by fib_lookup.
1659 */
1660
Thomas Grafd0daebc32012-06-12 00:44:01 +00001661 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 goto martian_source;
1663
David S. Millerd2d68ba2012-07-17 12:58:50 -07001664 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001665 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 goto brd_input;
1667
1668 /* Accept zero addresses only to limited broadcast;
1669 * I even do not know to fix it or not. Waiting for complains :-)
1670 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001671 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001672 goto martian_source;
1673
Thomas Grafd0daebc32012-06-12 00:44:01 +00001674 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 goto martian_destination;
1676
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001677 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1678 * and call it once if daddr or/and saddr are loopback addresses
1679 */
1680 if (ipv4_is_loopback(daddr)) {
1681 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001682 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001683 } else if (ipv4_is_loopback(saddr)) {
1684 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001685 goto martian_source;
1686 }
1687
Linus Torvalds1da177e2005-04-16 15:20:36 -07001688 /*
1689 * Now we are ready to route packet.
1690 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001691 fl4.flowi4_oif = 0;
1692 fl4.flowi4_iif = dev->ifindex;
1693 fl4.flowi4_mark = skb->mark;
1694 fl4.flowi4_tos = tos;
1695 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1696 fl4.daddr = daddr;
1697 fl4.saddr = saddr;
1698 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001699 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701
1702 RT_CACHE_STAT_INC(in_slow_tot);
1703
1704 if (res.type == RTN_BROADCAST)
1705 goto brd_input;
1706
1707 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001708 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001709 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001710 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001711 if (err < 0)
1712 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 goto local_input;
1714 }
1715
1716 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001717 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 if (res.type != RTN_UNICAST)
1719 goto martian_destination;
1720
David S. Miller68a5e3d2011-03-11 20:07:33 -05001721 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722out: return err;
1723
1724brd_input:
1725 if (skb->protocol != htons(ETH_P_IP))
1726 goto e_inval;
1727
David S. Miller41347dc2012-06-28 04:05:27 -07001728 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001729 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1730 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001732 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 }
1734 flags |= RTCF_BROADCAST;
1735 res.type = RTN_BROADCAST;
1736 RT_CACHE_STAT_INC(in_brd);
1737
1738local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001739 do_cache = false;
1740 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001741 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001742 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001743 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001744 skb_dst_set_noref(skb, &rth->dst);
1745 err = 0;
1746 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001747 }
1748 do_cache = true;
1749 }
1750 }
1751
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001752 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001753 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001754 if (!rth)
1755 goto e_nobufs;
1756
David S. Millercf911662011-04-28 14:31:47 -07001757 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001758 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001759#ifdef CONFIG_IP_ROUTE_CLASSID
1760 rth->dst.tclassid = itag;
1761#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762
David S. Millercf911662011-04-28 14:31:47 -07001763 rth->rt_genid = rt_genid(net);
1764 rth->rt_flags = flags|RTCF_LOCAL;
1765 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001766 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001767 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001768 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001769 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001770 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001771 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001773 rth->dst.input= ip_error;
1774 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 rth->rt_flags &= ~RTCF_LOCAL;
1776 }
David S. Millerd2d68ba2012-07-17 12:58:50 -07001777 if (do_cache)
1778 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001779 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001780 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001781 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782
1783no_route:
1784 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001786 if (err == -ESRCH)
1787 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 goto local_input;
1789
1790 /*
1791 * Do not cache martian addresses: they should be logged (RFC1812)
1792 */
1793martian_destination:
1794 RT_CACHE_STAT_INC(in_martian_dst);
1795#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001796 if (IN_DEV_LOG_MARTIANS(in_dev))
1797 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1798 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001800
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801e_inval:
1802 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001803 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
1805e_nobufs:
1806 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001807 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808
1809martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001810 err = -EINVAL;
1811martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001813 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814}
1815
David S. Millerc6cffba2012-07-26 11:14:38 +00001816int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1817 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818{
Eric Dumazet96d36222010-06-02 19:21:31 +00001819 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820
Eric Dumazet96d36222010-06-02 19:21:31 +00001821 rcu_read_lock();
1822
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 /* Multicast recognition logic is moved from route cache to here.
1824 The problem was that too many Ethernet cards have broken/missing
1825 hardware multicast filters :-( As result the host on multicasting
1826 network acquires a lot of useless route cache entries, sort of
1827 SDR messages from all the world. Now we try to get rid of them.
1828 Really, provided software IP multicast filter is organized
1829 reasonably (at least, hashed), it does not result in a slowdown
1830 comparing with route cache reject entries.
1831 Note, that multicast routers are not affected, because
1832 route cache entry is created eventually.
1833 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001834 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001835 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836
Eric Dumazet96d36222010-06-02 19:21:31 +00001837 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001838 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1839 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 if (our
1841#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001842 ||
1843 (!ipv4_is_local_multicast(daddr) &&
1844 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001846 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001847 int res = ip_route_input_mc(skb, daddr, saddr,
1848 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001850 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 }
1852 }
1853 rcu_read_unlock();
1854 return -EINVAL;
1855 }
David S. Millerc10237e2012-06-27 17:05:06 -07001856 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001857 rcu_read_unlock();
1858 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001859}
David S. Millerc6cffba2012-07-26 11:14:38 +00001860EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001862/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001863static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001864 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001865 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001866 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867{
David S. Miller982721f2011-02-16 21:44:24 -08001868 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001869 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001870 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001871 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001872 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001873 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001874
Thomas Grafd0daebc32012-06-12 00:44:01 +00001875 in_dev = __in_dev_get_rcu(dev_out);
1876 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001877 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878
Thomas Grafd0daebc32012-06-12 00:44:01 +00001879 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1880 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1881 return ERR_PTR(-EINVAL);
1882
David S. Miller68a5e3d2011-03-11 20:07:33 -05001883 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001884 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001885 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001886 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001887 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001888 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889
1890 if (dev_out->flags & IFF_LOOPBACK)
1891 flags |= RTCF_LOCAL;
1892
Julian Anastasov63617422012-11-22 23:04:14 +02001893 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08001894 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001896 fi = NULL;
1897 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001898 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001899 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1900 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02001902 else
1903 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001905 * default one, but do not gateway in this case.
1906 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 */
David S. Miller982721f2011-02-16 21:44:24 -08001908 if (fi && res->prefixlen < 4)
1909 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 }
1911
David S. Millerf2bb4be2012-07-17 12:20:47 -07001912 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02001913 do_cache &= fi != NULL;
1914 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07001915 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001916 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001917
Julian Anastasovc92b9652012-10-08 11:41:19 +00001918 fnhe = find_exception(nh, fl4->daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001919 if (fnhe)
Timo Teräs2ffae992013-06-27 10:27:05 +03001920 prth = &fnhe->fnhe_rth_output;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001921 else {
1922 if (unlikely(fl4->flowi4_flags &
1923 FLOWI_FLAG_KNOWN_NH &&
1924 !(nh->nh_gw &&
1925 nh->nh_scope == RT_SCOPE_LINK))) {
1926 do_cache = false;
1927 goto add;
1928 }
1929 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1930 }
David S. Millerc5038a82012-07-31 15:02:02 -07001931 rth = rcu_dereference(*prth);
1932 if (rt_cache_valid(rth)) {
1933 dst_hold(&rth->dst);
1934 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001935 }
1936 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00001937
1938add:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001939 rth = rt_dst_alloc(dev_out,
1940 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001941 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00001942 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001943 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001944 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001945
David S. Millercf911662011-04-28 14:31:47 -07001946 rth->dst.output = ip_output;
1947
David S. Millercf911662011-04-28 14:31:47 -07001948 rth->rt_genid = rt_genid(dev_net(dev_out));
1949 rth->rt_flags = flags;
1950 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001951 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001952 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001953 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001954 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001955 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001956 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001957
1958 RT_CACHE_STAT_INC(out_slow_tot);
1959
David S. Miller41347dc2012-06-28 04:05:27 -07001960 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001961 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001962 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001963 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001964 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001965 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 RT_CACHE_STAT_INC(out_slow_mc);
1967 }
1968#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001969 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001971 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001972 rth->dst.input = ip_mr_input;
1973 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974 }
1975 }
1976#endif
1977 }
1978
David S. Millerf2bb4be2012-07-17 12:20:47 -07001979 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980
David S. Miller5ada5522011-02-17 15:29:00 -08001981 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982}
1983
Linus Torvalds1da177e2005-04-16 15:20:36 -07001984/*
1985 * Major route resolver routine.
1986 */
1987
David S. Miller89aef892012-07-17 11:00:09 -07001988struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001991 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001992 unsigned int flags = 0;
1993 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001994 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001995 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996
David S. Miller85b91b02012-07-13 08:21:29 -07001997 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001998 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001999 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000
David S. Miller813b3b52011-04-28 14:48:42 -07002001 orig_oif = fl4->flowi4_oif;
2002
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002003 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002004 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2005 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2006 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002007
David S. Miller010c2702011-02-17 15:37:09 -08002008 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07002009 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002010 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002011 if (ipv4_is_multicast(fl4->saddr) ||
2012 ipv4_is_lbcast(fl4->saddr) ||
2013 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 goto out;
2015
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 /* I removed check for oif == dev_out->oif here.
2017 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002018 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2019 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020 2. Moreover, we are allowed to send packets with saddr
2021 of another iface. --ANK
2022 */
2023
David S. Miller813b3b52011-04-28 14:48:42 -07002024 if (fl4->flowi4_oif == 0 &&
2025 (ipv4_is_multicast(fl4->daddr) ||
2026 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002027 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002028 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07002029 if (dev_out == NULL)
2030 goto out;
2031
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032 /* Special hack: user can direct multicasts
2033 and limited broadcast via necessary interface
2034 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2035 This hack is not just for fun, it allows
2036 vic,vat and friends to work.
2037 They bind socket to loopback, set ttl to zero
2038 and expect that it will work.
2039 From the viewpoint of routing cache they are broken,
2040 because we are not allowed to build multicast path
2041 with loopback source addr (look, routing cache
2042 cannot know, that ttl is zero, so that packet
2043 will not leave this host and route is valid).
2044 Luckily, this hack is good workaround.
2045 */
2046
David S. Miller813b3b52011-04-28 14:48:42 -07002047 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048 goto make_route;
2049 }
Julian Anastasova210d012008-10-01 07:28:28 -07002050
David S. Miller813b3b52011-04-28 14:48:42 -07002051 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002052 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002053 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002054 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002055 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 }
2057
2058
David S. Miller813b3b52011-04-28 14:48:42 -07002059 if (fl4->flowi4_oif) {
2060 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002061 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 if (dev_out == NULL)
2063 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002064
2065 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002066 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002067 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002068 goto out;
2069 }
David S. Miller813b3b52011-04-28 14:48:42 -07002070 if (ipv4_is_local_multicast(fl4->daddr) ||
2071 ipv4_is_lbcast(fl4->daddr)) {
2072 if (!fl4->saddr)
2073 fl4->saddr = inet_select_addr(dev_out, 0,
2074 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002075 goto make_route;
2076 }
David S. Miller813b3b52011-04-28 14:48:42 -07002077 if (fl4->saddr) {
2078 if (ipv4_is_multicast(fl4->daddr))
2079 fl4->saddr = inet_select_addr(dev_out, 0,
2080 fl4->flowi4_scope);
2081 else if (!fl4->daddr)
2082 fl4->saddr = inet_select_addr(dev_out, 0,
2083 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084 }
2085 }
2086
David S. Miller813b3b52011-04-28 14:48:42 -07002087 if (!fl4->daddr) {
2088 fl4->daddr = fl4->saddr;
2089 if (!fl4->daddr)
2090 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002091 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002092 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093 res.type = RTN_LOCAL;
2094 flags |= RTCF_LOCAL;
2095 goto make_route;
2096 }
2097
David S. Miller813b3b52011-04-28 14:48:42 -07002098 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002100 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002101 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102 /* Apparently, routing tables are wrong. Assume,
2103 that the destination is on link.
2104
2105 WHY? DW.
2106 Because we are allowed to send to iface
2107 even if it has NO routes and NO assigned
2108 addresses. When oif is specified, routing
2109 tables are looked up with only one purpose:
2110 to catch if destination is gatewayed, rather than
2111 direct. Moreover, if MSG_DONTROUTE is set,
2112 we send packet, ignoring both routing tables
2113 and ifaddr state. --ANK
2114
2115
2116 We could make it even if oif is unknown,
2117 likely IPv6, but we do not.
2118 */
2119
David S. Miller813b3b52011-04-28 14:48:42 -07002120 if (fl4->saddr == 0)
2121 fl4->saddr = inet_select_addr(dev_out, 0,
2122 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123 res.type = RTN_UNICAST;
2124 goto make_route;
2125 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002126 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 goto out;
2128 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002129
2130 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002131 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002132 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002133 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002134 else
David S. Miller813b3b52011-04-28 14:48:42 -07002135 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002136 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002137 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002138 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 flags |= RTCF_LOCAL;
2140 goto make_route;
2141 }
2142
2143#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002144 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002145 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 else
2147#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002148 if (!res.prefixlen &&
2149 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002150 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002151 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002152
David S. Miller813b3b52011-04-28 14:48:42 -07002153 if (!fl4->saddr)
2154 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155
Linus Torvalds1da177e2005-04-16 15:20:36 -07002156 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002157 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002158
2159
2160make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002161 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002162
David S. Miller010c2702011-02-17 15:37:09 -08002163out:
2164 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002165 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002166}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002167EXPORT_SYMBOL_GPL(__ip_route_output_key);
2168
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002169static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2170{
2171 return NULL;
2172}
2173
Steffen Klassertebb762f2011-11-23 02:12:51 +00002174static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002175{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002176 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2177
2178 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002179}
2180
David S. Miller6700c272012-07-17 03:29:28 -07002181static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2182 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002183{
2184}
2185
David S. Miller6700c272012-07-17 03:29:28 -07002186static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2187 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002188{
2189}
2190
Held Bernhard0972ddb2011-04-24 22:07:32 +00002191static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2192 unsigned long old)
2193{
2194 return NULL;
2195}
2196
David S. Miller14e50e52007-05-24 18:17:54 -07002197static struct dst_ops ipv4_dst_blackhole_ops = {
2198 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002199 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002200 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002201 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002202 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002203 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002204 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002205 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002206 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002207};
2208
David S. Miller2774c132011-03-01 14:59:04 -08002209struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002210{
David S. Miller2774c132011-03-01 14:59:04 -08002211 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002212 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002213
David S. Millerf5b0a872012-07-19 12:31:33 -07002214 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002215 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002216 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002217
David S. Miller14e50e52007-05-24 18:17:54 -07002218 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002219 new->input = dst_discard;
2220 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002221
Changli Gaod8d1f302010-06-10 23:31:35 -07002222 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002223 if (new->dev)
2224 dev_hold(new->dev);
2225
David S. Miller9917e1e82012-07-17 14:44:26 -07002226 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002227 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002228 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002229
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002230 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002231 rt->rt_flags = ort->rt_flags;
2232 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002233 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002234 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002235
David S. Millercaacf052012-07-31 15:06:50 -07002236 INIT_LIST_HEAD(&rt->rt_uncached);
2237
David S. Miller14e50e52007-05-24 18:17:54 -07002238 dst_free(new);
2239 }
2240
David S. Miller2774c132011-03-01 14:59:04 -08002241 dst_release(dst_orig);
2242
2243 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002244}
2245
David S. Miller9d6ec932011-03-12 01:12:47 -05002246struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002247 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248{
David S. Miller9d6ec932011-03-12 01:12:47 -05002249 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002250
David S. Millerb23dd4f2011-03-02 14:31:35 -08002251 if (IS_ERR(rt))
2252 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002253
David S. Miller56157872011-05-02 14:37:45 -07002254 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002255 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2256 flowi4_to_flowi(flp4),
2257 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002258
David S. Millerb23dd4f2011-03-02 14:31:35 -08002259 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002260}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002261EXPORT_SYMBOL_GPL(ip_route_output_flow);
2262
David S. Millerf1ce3062012-07-12 10:10:17 -07002263static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002264 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002265 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002267 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002268 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002269 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002270 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002271 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002272 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002273
Eric W. Biederman15e47302012-09-07 20:12:54 +00002274 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002275 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002276 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002277
2278 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 r->rtm_family = AF_INET;
2280 r->rtm_dst_len = 32;
2281 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002282 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002284 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2285 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 r->rtm_type = rt->rt_type;
2287 r->rtm_scope = RT_SCOPE_UNIVERSE;
2288 r->rtm_protocol = RTPROT_UNSPEC;
2289 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2290 if (rt->rt_flags & RTCF_NOTIFY)
2291 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002292
David S. Millerf1ce3062012-07-12 10:10:17 -07002293 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002294 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002295 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002297 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002298 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 }
David S. Millerf3756b72012-04-01 20:39:02 -04002300 if (rt->dst.dev &&
2301 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2302 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002303#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002304 if (rt->dst.tclassid &&
2305 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2306 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002308 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002309 fl4->saddr != src) {
2310 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002311 goto nla_put_failure;
2312 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002313 if (rt->rt_uses_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002314 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2315 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002316
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002317 expires = rt->dst.expires;
2318 if (expires) {
2319 unsigned long now = jiffies;
2320
2321 if (time_before(now, expires))
2322 expires -= now;
2323 else
2324 expires = 0;
2325 }
2326
Julian Anastasov521f5492012-07-20 12:02:08 +03002327 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002328 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002329 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2330 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002331 goto nla_put_failure;
2332
David Millerb4869882012-07-01 02:03:01 +00002333 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002334 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002335 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002336
Changli Gaod8d1f302010-06-10 23:31:35 -07002337 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002338
David S. Millerc7537962010-11-11 17:07:48 -08002339 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002340#ifdef CONFIG_IP_MROUTE
2341 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2342 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2343 int err = ipmr_get_route(net, skb,
2344 fl4->saddr, fl4->daddr,
2345 r, nowait);
2346 if (err <= 0) {
2347 if (!nowait) {
2348 if (err == 0)
2349 return 0;
2350 goto nla_put_failure;
2351 } else {
2352 if (err == -EMSGSIZE)
2353 goto nla_put_failure;
2354 error = err;
2355 }
2356 }
2357 } else
2358#endif
2359 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2360 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 }
2362
David S. Millerf1850712012-07-10 07:26:01 -07002363 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002364 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365
Thomas Grafbe403ea2006-08-17 18:15:17 -07002366 return nlmsg_end(skb, nlh);
2367
2368nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002369 nlmsg_cancel(skb, nlh);
2370 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002371}
2372
Thomas Graf661d2962013-03-21 07:45:29 +00002373static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002375 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002376 struct rtmsg *rtm;
2377 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002378 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002379 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002380 __be32 dst = 0;
2381 __be32 src = 0;
2382 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002383 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002384 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 struct sk_buff *skb;
2386
Thomas Grafd889ce32006-08-17 18:15:44 -07002387 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2388 if (err < 0)
2389 goto errout;
2390
2391 rtm = nlmsg_data(nlh);
2392
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002394 if (skb == NULL) {
2395 err = -ENOBUFS;
2396 goto errout;
2397 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002398
2399 /* Reserve room for dummy headers, this skb can pass
2400 through good chunk of routing engine.
2401 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002402 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002403 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002404
2405 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002406 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2408
Al Viro17fb2c62006-09-26 22:15:25 -07002409 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2410 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002411 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002412 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413
David Millerd6c0a4f2012-07-01 02:02:59 +00002414 memset(&fl4, 0, sizeof(fl4));
2415 fl4.daddr = dst;
2416 fl4.saddr = src;
2417 fl4.flowi4_tos = rtm->rtm_tos;
2418 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2419 fl4.flowi4_mark = mark;
2420
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002422 struct net_device *dev;
2423
Denis V. Lunev19375042008-02-28 20:52:04 -08002424 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002425 if (dev == NULL) {
2426 err = -ENODEV;
2427 goto errout_free;
2428 }
2429
Linus Torvalds1da177e2005-04-16 15:20:36 -07002430 skb->protocol = htons(ETH_P_IP);
2431 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002432 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 local_bh_disable();
2434 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2435 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002436
Eric Dumazet511c3f92009-06-02 05:14:27 +00002437 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002438 if (err == 0 && rt->dst.error)
2439 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002440 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002441 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002442
2443 err = 0;
2444 if (IS_ERR(rt))
2445 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002447
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002449 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002450
Changli Gaod8d1f302010-06-10 23:31:35 -07002451 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 if (rtm->rtm_flags & RTM_F_NOTIFY)
2453 rt->rt_flags |= RTCF_NOTIFY;
2454
David S. Millerf1ce3062012-07-12 10:10:17 -07002455 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002456 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002457 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002458 if (err <= 0)
2459 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002460
Eric W. Biederman15e47302012-09-07 20:12:54 +00002461 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002462errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002463 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002464
Thomas Grafd889ce32006-08-17 18:15:44 -07002465errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002467 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468}
2469
2470int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2471{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 return skb->len;
2473}
2474
2475void ip_rt_multicast_event(struct in_device *in_dev)
2476{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002477 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478}
2479
2480#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002481static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2482static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2483static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2484static int ip_rt_gc_elasticity __read_mostly = 8;
2485
Joe Perchesfe2c6332013-06-11 23:04:25 -07002486static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002487 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 size_t *lenp, loff_t *ppos)
2489{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002490 struct net *net = (struct net *)__ctl->extra1;
2491
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002493 rt_cache_flush(net);
2494 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002496 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497
2498 return -EINVAL;
2499}
2500
Joe Perchesfe2c6332013-06-11 23:04:25 -07002501static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002502 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002503 .procname = "gc_thresh",
2504 .data = &ipv4_dst_ops.gc_thresh,
2505 .maxlen = sizeof(int),
2506 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002507 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508 },
2509 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002510 .procname = "max_size",
2511 .data = &ip_rt_max_size,
2512 .maxlen = sizeof(int),
2513 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002514 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 },
2516 {
2517 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002518
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 .procname = "gc_min_interval",
2520 .data = &ip_rt_gc_min_interval,
2521 .maxlen = sizeof(int),
2522 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002523 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002524 },
2525 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 .procname = "gc_min_interval_ms",
2527 .data = &ip_rt_gc_min_interval,
2528 .maxlen = sizeof(int),
2529 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002530 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002531 },
2532 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 .procname = "gc_timeout",
2534 .data = &ip_rt_gc_timeout,
2535 .maxlen = sizeof(int),
2536 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002537 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002538 },
2539 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002540 .procname = "gc_interval",
2541 .data = &ip_rt_gc_interval,
2542 .maxlen = sizeof(int),
2543 .mode = 0644,
2544 .proc_handler = proc_dointvec_jiffies,
2545 },
2546 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 .procname = "redirect_load",
2548 .data = &ip_rt_redirect_load,
2549 .maxlen = sizeof(int),
2550 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002551 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002552 },
2553 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 .procname = "redirect_number",
2555 .data = &ip_rt_redirect_number,
2556 .maxlen = sizeof(int),
2557 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002558 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 },
2560 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 .procname = "redirect_silence",
2562 .data = &ip_rt_redirect_silence,
2563 .maxlen = sizeof(int),
2564 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002565 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002566 },
2567 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 .procname = "error_cost",
2569 .data = &ip_rt_error_cost,
2570 .maxlen = sizeof(int),
2571 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002572 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573 },
2574 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002575 .procname = "error_burst",
2576 .data = &ip_rt_error_burst,
2577 .maxlen = sizeof(int),
2578 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002579 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002580 },
2581 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582 .procname = "gc_elasticity",
2583 .data = &ip_rt_gc_elasticity,
2584 .maxlen = sizeof(int),
2585 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002586 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587 },
2588 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 .procname = "mtu_expires",
2590 .data = &ip_rt_mtu_expires,
2591 .maxlen = sizeof(int),
2592 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002593 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594 },
2595 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002596 .procname = "min_pmtu",
2597 .data = &ip_rt_min_pmtu,
2598 .maxlen = sizeof(int),
2599 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002600 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002601 },
2602 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 .procname = "min_adv_mss",
2604 .data = &ip_rt_min_advmss,
2605 .maxlen = sizeof(int),
2606 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002607 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002608 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002609 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002611
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002612static struct ctl_table ipv4_route_flush_table[] = {
2613 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002614 .procname = "flush",
2615 .maxlen = sizeof(int),
2616 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002617 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002618 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002619 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002620};
2621
2622static __net_init int sysctl_route_net_init(struct net *net)
2623{
2624 struct ctl_table *tbl;
2625
2626 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002627 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002628 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2629 if (tbl == NULL)
2630 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002631
2632 /* Don't export sysctls to unprivileged users */
2633 if (net->user_ns != &init_user_ns)
2634 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002635 }
2636 tbl[0].extra1 = net;
2637
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002638 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002639 if (net->ipv4.route_hdr == NULL)
2640 goto err_reg;
2641 return 0;
2642
2643err_reg:
2644 if (tbl != ipv4_route_flush_table)
2645 kfree(tbl);
2646err_dup:
2647 return -ENOMEM;
2648}
2649
2650static __net_exit void sysctl_route_net_exit(struct net *net)
2651{
2652 struct ctl_table *tbl;
2653
2654 tbl = net->ipv4.route_hdr->ctl_table_arg;
2655 unregister_net_sysctl_table(net->ipv4.route_hdr);
2656 BUG_ON(tbl == ipv4_route_flush_table);
2657 kfree(tbl);
2658}
2659
2660static __net_initdata struct pernet_operations sysctl_route_ops = {
2661 .init = sysctl_route_net_init,
2662 .exit = sysctl_route_net_exit,
2663};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664#endif
2665
Neil Horman3ee94372010-05-08 01:57:52 -07002666static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002667{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002668 atomic_set(&net->rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00002669 atomic_set(&net->fnhe_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002670 get_random_bytes(&net->ipv4.dev_addr_genid,
2671 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002672 return 0;
2673}
2674
Neil Horman3ee94372010-05-08 01:57:52 -07002675static __net_initdata struct pernet_operations rt_genid_ops = {
2676 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002677};
2678
David S. Millerc3426b42012-06-09 16:27:05 -07002679static int __net_init ipv4_inetpeer_init(struct net *net)
2680{
2681 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2682
2683 if (!bp)
2684 return -ENOMEM;
2685 inet_peer_base_init(bp);
2686 net->ipv4.peers = bp;
2687 return 0;
2688}
2689
2690static void __net_exit ipv4_inetpeer_exit(struct net *net)
2691{
2692 struct inet_peer_base *bp = net->ipv4.peers;
2693
2694 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002695 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002696 kfree(bp);
2697}
2698
2699static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2700 .init = ipv4_inetpeer_init,
2701 .exit = ipv4_inetpeer_exit,
2702};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002703
Patrick McHardyc7066f72011-01-14 13:36:42 +01002704#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002705struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002706#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708int __init ip_rt_init(void)
2709{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002710 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002711
Patrick McHardyc7066f72011-01-14 13:36:42 +01002712#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002713 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002714 if (!ip_rt_acct)
2715 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716#endif
2717
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002718 ipv4_dst_ops.kmem_cachep =
2719 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002720 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721
David S. Miller14e50e52007-05-24 18:17:54 -07002722 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2723
Eric Dumazetfc66f952010-10-08 06:37:34 +00002724 if (dst_entries_init(&ipv4_dst_ops) < 0)
2725 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2726
2727 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2728 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2729
David S. Miller89aef892012-07-17 11:00:09 -07002730 ipv4_dst_ops.gc_thresh = ~0;
2731 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002732
Linus Torvalds1da177e2005-04-16 15:20:36 -07002733 devinet_init();
2734 ip_fib_init();
2735
Denis V. Lunev73b38712008-02-28 20:51:18 -08002736 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002737 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002738#ifdef CONFIG_XFRM
2739 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002740 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002742 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002743
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002744#ifdef CONFIG_SYSCTL
2745 register_pernet_subsys(&sysctl_route_ops);
2746#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002747 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002748 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002749 return rc;
2750}
2751
Al Viroa1bc6eb2008-07-30 06:32:52 -04002752#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002753/*
2754 * We really need to sanitize the damn ipv4 init order, then all
2755 * this nonsense will go away.
2756 */
2757void __init ip_static_sysctl_init(void)
2758{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002759 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002760}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002761#endif