blob: f51823dc998bbe2c19ceac4e471e82b0e4b1eaf3 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
Linus Torvalds7c0f6ba2016-12-24 11:46:01 -080068#include <linux/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazet73f156a2014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +020094#include <net/dst_metadata.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/protocol.h>
97#include <net/ip.h>
98#include <net/route.h>
99#include <net/inetpeer.h>
100#include <net/sock.h>
101#include <net/ip_fib.h>
102#include <net/arp.h>
103#include <net/tcp.h>
104#include <net/icmp.h>
105#include <net/xfrm.h>
Roopa Prabhu571e7222015-07-21 10:43:47 +0200106#include <net/lwtunnel.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700107#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700108#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#ifdef CONFIG_SYSCTL
110#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000111#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700113#include <net/secure_seq.h>
Thomas Graf1b7179d2015-07-21 10:43:59 +0200114#include <net/ip_tunnels.h>
David Ahern385add92015-09-29 20:07:13 -0700115#include <net/l3mdev.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116
David S. Miller68a5e3d2011-03-11 20:07:33 -0500117#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000118 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120#define RT_GC_TIMEOUT (300*HZ)
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700123static int ip_rt_redirect_number __read_mostly = 9;
124static int ip_rt_redirect_load __read_mostly = HZ / 50;
125static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
126static int ip_rt_error_cost __read_mostly = HZ;
127static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700128static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
129static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
130static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500131
Xin Longdeed49d2016-02-18 21:21:19 +0800132static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133/*
134 * Interface to generic destination cache.
135 */
136
137static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800138static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000139static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
141static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700142static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
143 struct sk_buff *skb, u32 mtu);
144static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
145 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700146static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147
David S. Miller62fa8a82011-01-26 20:51:05 -0800148static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
149{
David S. Miller31248732012-07-10 07:08:18 -0700150 WARN_ON(1);
151 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800152}
153
David S. Millerf894cbf2012-07-02 21:52:24 -0700154static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
155 struct sk_buff *skb,
156 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700157
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158static struct dst_ops ipv4_dst_ops = {
159 .family = AF_INET,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800161 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000162 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800163 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700164 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700165 .negative_advice = ipv4_negative_advice,
166 .link_failure = ipv4_link_failure,
167 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700168 .redirect = ip_do_redirect,
Eric W. Biedermanb92dacd2015-10-07 16:48:37 -0500169 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700170 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700171};
172
173#define ECN_OR_COST(class) TC_PRIO_##class
174
Philippe De Muyter4839c522007-07-09 15:32:57 -0700175const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000177 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700178 TC_PRIO_BESTEFFORT,
179 ECN_OR_COST(BESTEFFORT),
180 TC_PRIO_BULK,
181 ECN_OR_COST(BULK),
182 TC_PRIO_BULK,
183 ECN_OR_COST(BULK),
184 TC_PRIO_INTERACTIVE,
185 ECN_OR_COST(INTERACTIVE),
186 TC_PRIO_INTERACTIVE,
187 ECN_OR_COST(INTERACTIVE),
188 TC_PRIO_INTERACTIVE_BULK,
189 ECN_OR_COST(INTERACTIVE_BULK),
190 TC_PRIO_INTERACTIVE_BULK,
191 ECN_OR_COST(INTERACTIVE_BULK)
192};
Amir Vadaid4a96862012-04-04 21:33:28 +0000193EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194
Eric Dumazet2f970d82006-01-17 02:54:36 -0800195static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Christoph Lameter3ed66e92014-04-07 15:39:40 -0700196#define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
200{
Eric Dumazet29e75252008-01-31 17:05:09 -0800201 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700202 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800203 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204}
205
206static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
207{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700209 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700210}
211
212static void rt_cache_seq_stop(struct seq_file *seq, void *v)
213{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static int rt_cache_seq_show(struct seq_file *seq, void *v)
217{
218 if (v == SEQ_START_TOKEN)
219 seq_printf(seq, "%-127s\n",
220 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
221 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
222 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900223 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700224}
225
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700226static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227 .start = rt_cache_seq_start,
228 .next = rt_cache_seq_next,
229 .stop = rt_cache_seq_stop,
230 .show = rt_cache_seq_show,
231};
232
233static int rt_cache_seq_open(struct inode *inode, struct file *file)
234{
David S. Miller89aef892012-07-17 11:00:09 -0700235 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236}
237
Arjan van de Ven9a321442007-02-12 00:55:35 -0800238static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239 .owner = THIS_MODULE,
240 .open = rt_cache_seq_open,
241 .read = seq_read,
242 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700243 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244};
245
246
247static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
248{
249 int cpu;
250
251 if (*pos == 0)
252 return SEQ_START_TOKEN;
253
Rusty Russell0f23174a2008-12-29 12:23:42 +0000254 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255 if (!cpu_possible(cpu))
256 continue;
257 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800258 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 }
260 return NULL;
261}
262
263static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
264{
265 int cpu;
266
Rusty Russell0f23174a2008-12-29 12:23:42 +0000267 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268 if (!cpu_possible(cpu))
269 continue;
270 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800271 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 }
273 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900274
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275}
276
277static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
278{
279
280}
281
282static int rt_cpu_seq_show(struct seq_file *seq, void *v)
283{
284 struct rt_cache_stat *st = v;
285
286 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700287 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700288 return 0;
289 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900290
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
292 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000293 dst_entries_get_slow(&ipv4_dst_ops),
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700294 0, /* st->in_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 st->in_slow_tot,
296 st->in_slow_mc,
297 st->in_no_route,
298 st->in_brd,
299 st->in_martian_dst,
300 st->in_martian_src,
301
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700302 0, /* st->out_hit */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700303 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900304 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700305
Eric Dumazet0baf2b32013-10-16 02:49:04 -0700306 0, /* st->gc_total */
307 0, /* st->gc_ignored */
308 0, /* st->gc_goal_miss */
309 0, /* st->gc_dst_overflow */
310 0, /* st->in_hlist_search */
311 0 /* st->out_hlist_search */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700312 );
313 return 0;
314}
315
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700316static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 .start = rt_cpu_seq_start,
318 .next = rt_cpu_seq_next,
319 .stop = rt_cpu_seq_stop,
320 .show = rt_cpu_seq_show,
321};
322
323
324static int rt_cpu_seq_open(struct inode *inode, struct file *file)
325{
326 return seq_open(file, &rt_cpu_seq_ops);
327}
328
Arjan van de Ven9a321442007-02-12 00:55:35 -0800329static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700330 .owner = THIS_MODULE,
331 .open = rt_cpu_seq_open,
332 .read = seq_read,
333 .llseek = seq_lseek,
334 .release = seq_release,
335};
336
Patrick McHardyc7066f72011-01-14 13:36:42 +0100337#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800338static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800339{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800340 struct ip_rt_acct *dst, *src;
341 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
344 if (!dst)
345 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800346
Alexey Dobriyana661c412009-11-25 15:40:35 -0800347 for_each_possible_cpu(i) {
348 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
349 for (j = 0; j < 256; j++) {
350 dst[j].o_bytes += src[j].o_bytes;
351 dst[j].o_packets += src[j].o_packets;
352 dst[j].i_bytes += src[j].i_bytes;
353 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800354 }
355 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800356
357 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
358 kfree(dst);
359 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800360}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800361
362static int rt_acct_proc_open(struct inode *inode, struct file *file)
363{
364 return single_open(file, rt_acct_proc_show, NULL);
365}
366
367static const struct file_operations rt_acct_proc_fops = {
368 .owner = THIS_MODULE,
369 .open = rt_acct_proc_open,
370 .read = seq_read,
371 .llseek = seq_lseek,
372 .release = single_release,
373};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800374#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800375
Denis V. Lunev73b38712008-02-28 20:51:18 -0800376static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800377{
378 struct proc_dir_entry *pde;
379
Gao fengd4beaa62013-02-18 01:34:54 +0000380 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
381 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800382 if (!pde)
383 goto err1;
384
Wang Chen77020722008-02-28 14:14:25 -0800385 pde = proc_create("rt_cache", S_IRUGO,
386 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800387 if (!pde)
388 goto err2;
389
Patrick McHardyc7066f72011-01-14 13:36:42 +0100390#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800391 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800392 if (!pde)
393 goto err3;
394#endif
395 return 0;
396
Patrick McHardyc7066f72011-01-14 13:36:42 +0100397#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800398err3:
399 remove_proc_entry("rt_cache", net->proc_net_stat);
400#endif
401err2:
402 remove_proc_entry("rt_cache", net->proc_net);
403err1:
404 return -ENOMEM;
405}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800406
407static void __net_exit ip_rt_do_proc_exit(struct net *net)
408{
409 remove_proc_entry("rt_cache", net->proc_net_stat);
410 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100411#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800412 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000413#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800414}
415
416static struct pernet_operations ip_rt_proc_ops __net_initdata = {
417 .init = ip_rt_do_proc_init,
418 .exit = ip_rt_do_proc_exit,
419};
420
421static int __init ip_rt_proc_init(void)
422{
423 return register_pernet_subsys(&ip_rt_proc_ops);
424}
425
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800426#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800427static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800428{
429 return 0;
430}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700431#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900432
Eric Dumazet4331deb2012-07-25 05:11:23 +0000433static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700434{
fan.duca4c3fc2013-07-30 08:33:53 +0800435 return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700436}
437
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000438void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800439{
fan.duca4c3fc2013-07-30 08:33:53 +0800440 rt_genid_bump_ipv4(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000441}
442
David S. Millerf894cbf2012-07-02 21:52:24 -0700443static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
444 struct sk_buff *skb,
445 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000446{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700447 struct net_device *dev = dst->dev;
448 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500449 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000450 struct neighbour *n;
451
David S. Miller39232972012-01-26 15:22:32 -0500452 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700453 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500454 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700455 else if (skb)
456 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700457
David S. Miller80703d22012-02-15 17:48:35 -0500458 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700459 if (n)
460 return n;
David Miller32092ec2011-07-25 00:01:41 +0000461 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462}
463
Eric Dumazet04ca6972014-07-26 08:58:10 +0200464#define IP_IDENTS_SZ 2048u
Eric Dumazet04ca6972014-07-26 08:58:10 +0200465
Eric Dumazet355b5902015-05-01 10:37:49 -0700466static atomic_t *ip_idents __read_mostly;
467static u32 *ip_tstamps __read_mostly;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200468
469/* In order to protect privacy, we add a perturbation to identifiers
470 * if one generator is seldom used. This makes hard for an attacker
471 * to infer how many packets were sent between two points in time.
472 */
473u32 ip_idents_reserve(u32 hash, int segs)
474{
Eric Dumazet355b5902015-05-01 10:37:49 -0700475 u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ;
476 atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
477 u32 old = ACCESS_ONCE(*p_tstamp);
Eric Dumazet04ca6972014-07-26 08:58:10 +0200478 u32 now = (u32)jiffies;
Eric Dumazetadb03112016-09-20 18:06:17 -0700479 u32 new, delta = 0;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200480
Eric Dumazet355b5902015-05-01 10:37:49 -0700481 if (old != now && cmpxchg(p_tstamp, old, now) == old)
Eric Dumazet04ca6972014-07-26 08:58:10 +0200482 delta = prandom_u32_max(now - old);
483
Eric Dumazetadb03112016-09-20 18:06:17 -0700484 /* Do not use atomic_add_return() as it makes UBSAN unhappy */
485 do {
486 old = (u32)atomic_read(p_id);
487 new = old + delta + segs;
488 } while (atomic_cmpxchg(p_id, old, new) != old);
489
490 return new - segs;
Eric Dumazet04ca6972014-07-26 08:58:10 +0200491}
492EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700493
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100494void __ip_select_ident(struct net *net, struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700495{
Eric Dumazet73f156a2014-06-02 05:26:03 -0700496 static u32 ip_idents_hashrnd __read_mostly;
497 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
Eric Dumazet73f156a2014-06-02 05:26:03 -0700499 net_get_random_once(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700500
Eric Dumazet04ca6972014-07-26 08:58:10 +0200501 hash = jhash_3words((__force u32)iph->daddr,
502 (__force u32)iph->saddr,
Hannes Frederic Sowab6a77192015-03-25 17:07:44 +0100503 iph->protocol ^ net_hash_mix(net),
Eric Dumazet04ca6972014-07-26 08:58:10 +0200504 ip_idents_hashrnd);
Eric Dumazet73f156a2014-06-02 05:26:03 -0700505 id = ip_idents_reserve(hash, segs);
506 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700507}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000508EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900510static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
511 const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700512 const struct iphdr *iph,
513 int oif, u8 tos,
514 u8 prot, u32 mark, int flow_flags)
515{
516 if (sk) {
517 const struct inet_sock *inet = inet_sk(sk);
518
519 oif = sk->sk_bound_dev_if;
520 mark = sk->sk_mark;
521 tos = RT_CONN_FLAGS(sk);
522 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
523 }
524 flowi4_init_output(fl4, oif, mark, tos,
525 RT_SCOPE_UNIVERSE, prot,
526 flow_flags,
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900527 iph->daddr, iph->saddr, 0, 0,
528 sock_net_uid(net, sk));
David S. Miller4895c772012-07-17 04:19:00 -0700529}
530
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200531static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
532 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700533{
Lorenzo Colittid109e612016-11-30 02:56:47 +0900534 const struct net *net = dev_net(skb->dev);
David S. Miller4895c772012-07-17 04:19:00 -0700535 const struct iphdr *iph = ip_hdr(skb);
536 int oif = skb->dev->ifindex;
537 u8 tos = RT_TOS(iph->tos);
538 u8 prot = iph->protocol;
539 u32 mark = skb->mark;
540
Lorenzo Colittid109e612016-11-30 02:56:47 +0900541 __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700542}
543
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200544static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700545{
546 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200547 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700548 __be32 daddr = inet->inet_daddr;
549
550 rcu_read_lock();
551 inet_opt = rcu_dereference(inet->inet_opt);
552 if (inet_opt && inet_opt->opt.srr)
553 daddr = inet_opt->opt.faddr;
554 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
555 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
556 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
557 inet_sk_flowi_flags(sk),
Lorenzo Colittie2d118a2016-11-04 02:23:43 +0900558 daddr, inet->inet_saddr, 0, 0, sk->sk_uid);
David S. Miller4895c772012-07-17 04:19:00 -0700559 rcu_read_unlock();
560}
561
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200562static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
563 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700564{
565 if (skb)
566 build_skb_flow_key(fl4, skb, sk);
567 else
568 build_sk_flow_key(fl4, sk);
569}
570
David S. Millerc5038a82012-07-31 15:02:02 -0700571static inline void rt_free(struct rtable *rt)
572{
573 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
574}
575
576static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700577
Timo Teräs2ffae992013-06-27 10:27:05 +0300578static void fnhe_flush_routes(struct fib_nh_exception *fnhe)
579{
580 struct rtable *rt;
581
582 rt = rcu_dereference(fnhe->fnhe_rth_input);
583 if (rt) {
584 RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL);
585 rt_free(rt);
586 }
587 rt = rcu_dereference(fnhe->fnhe_rth_output);
588 if (rt) {
589 RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL);
590 rt_free(rt);
591 }
592}
593
Julian Anastasovaee06da2012-07-18 10:15:35 +0000594static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700595{
596 struct fib_nh_exception *fnhe, *oldest;
597
598 oldest = rcu_dereference(hash->chain);
599 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
600 fnhe = rcu_dereference(fnhe->fnhe_next)) {
601 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
602 oldest = fnhe;
603 }
Timo Teräs2ffae992013-06-27 10:27:05 +0300604 fnhe_flush_routes(oldest);
David S. Miller4895c772012-07-17 04:19:00 -0700605 return oldest;
606}
607
David S. Millerd3a25c92012-07-17 13:23:08 -0700608static inline u32 fnhe_hashfun(__be32 daddr)
609{
Eric Dumazetd546c622014-09-04 08:21:31 -0700610 static u32 fnhe_hashrnd __read_mostly;
David S. Millerd3a25c92012-07-17 13:23:08 -0700611 u32 hval;
612
Eric Dumazetd546c622014-09-04 08:21:31 -0700613 net_get_random_once(&fnhe_hashrnd, sizeof(fnhe_hashrnd));
614 hval = jhash_1word((__force u32) daddr, fnhe_hashrnd);
615 return hash_32(hval, FNHE_HASH_SHIFT);
David S. Millerd3a25c92012-07-17 13:23:08 -0700616}
617
Timo Teräs387aa652013-05-27 20:46:31 +0000618static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
619{
620 rt->rt_pmtu = fnhe->fnhe_pmtu;
621 rt->dst.expires = fnhe->fnhe_expires;
622
623 if (fnhe->fnhe_gw) {
624 rt->rt_flags |= RTCF_REDIRECTED;
625 rt->rt_gateway = fnhe->fnhe_gw;
626 rt->rt_uses_gateway = 1;
627 }
628}
629
Julian Anastasovaee06da2012-07-18 10:15:35 +0000630static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
631 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700632{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000633 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700634 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000635 struct rtable *rt;
636 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700637 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000638 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700639
David S. Millerc5038a82012-07-31 15:02:02 -0700640 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000641
Eric Dumazetcaa41522014-09-03 22:21:56 -0700642 hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -0700643 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000644 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700645 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000646 goto out_unlock;
Eric Dumazetcaa41522014-09-03 22:21:56 -0700647 rcu_assign_pointer(nh->nh_exceptions, hash);
David S. Miller4895c772012-07-17 04:19:00 -0700648 }
649
David S. Miller4895c772012-07-17 04:19:00 -0700650 hash += hval;
651
652 depth = 0;
653 for (fnhe = rcu_dereference(hash->chain); fnhe;
654 fnhe = rcu_dereference(fnhe->fnhe_next)) {
655 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000656 break;
David S. Miller4895c772012-07-17 04:19:00 -0700657 depth++;
658 }
659
Julian Anastasovaee06da2012-07-18 10:15:35 +0000660 if (fnhe) {
661 if (gw)
662 fnhe->fnhe_gw = gw;
663 if (pmtu) {
664 fnhe->fnhe_pmtu = pmtu;
Timo Teräs387aa652013-05-27 20:46:31 +0000665 fnhe->fnhe_expires = max(1UL, expires);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000666 }
Timo Teräs387aa652013-05-27 20:46:31 +0000667 /* Update all cached dsts too */
Timo Teräs2ffae992013-06-27 10:27:05 +0300668 rt = rcu_dereference(fnhe->fnhe_rth_input);
669 if (rt)
670 fill_route_from_fnhe(rt, fnhe);
671 rt = rcu_dereference(fnhe->fnhe_rth_output);
Timo Teräs387aa652013-05-27 20:46:31 +0000672 if (rt)
673 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000674 } else {
675 if (depth > FNHE_RECLAIM_DEPTH)
676 fnhe = fnhe_oldest(hash);
677 else {
678 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
679 if (!fnhe)
680 goto out_unlock;
681
682 fnhe->fnhe_next = hash->chain;
683 rcu_assign_pointer(hash->chain, fnhe);
684 }
Timo Teräs5aad1de2013-05-27 20:46:33 +0000685 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
Julian Anastasovaee06da2012-07-18 10:15:35 +0000686 fnhe->fnhe_daddr = daddr;
687 fnhe->fnhe_gw = gw;
688 fnhe->fnhe_pmtu = pmtu;
689 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000690
691 /* Exception created; mark the cached routes for the nexthop
692 * stale, so anyone caching it rechecks if this exception
693 * applies to them.
694 */
Timo Teräs2ffae992013-06-27 10:27:05 +0300695 rt = rcu_dereference(nh->nh_rth_input);
696 if (rt)
697 rt->dst.obsolete = DST_OBSOLETE_KILL;
698
Timo Teräs387aa652013-05-27 20:46:31 +0000699 for_each_possible_cpu(i) {
700 struct rtable __rcu **prt;
701 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
702 rt = rcu_dereference(*prt);
703 if (rt)
704 rt->dst.obsolete = DST_OBSOLETE_KILL;
705 }
David S. Miller4895c772012-07-17 04:19:00 -0700706 }
David S. Miller4895c772012-07-17 04:19:00 -0700707
David S. Miller4895c772012-07-17 04:19:00 -0700708 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000709
710out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700711 spin_unlock_bh(&fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700712}
713
David S. Millerceb33202012-07-17 11:31:28 -0700714static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
715 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700716{
David S. Millere47a1852012-07-11 20:55:47 -0700717 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700718 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700719 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700720 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700721 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700722 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800723 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
David S. Miller94206122012-07-11 20:38:08 -0700725 switch (icmp_hdr(skb)->code & 7) {
726 case ICMP_REDIR_NET:
727 case ICMP_REDIR_NETTOS:
728 case ICMP_REDIR_HOST:
729 case ICMP_REDIR_HOSTTOS:
730 break;
731
732 default:
733 return;
734 }
735
David S. Millere47a1852012-07-11 20:55:47 -0700736 if (rt->rt_gateway != old_gw)
737 return;
738
739 in_dev = __in_dev_get_rcu(dev);
740 if (!in_dev)
741 return;
742
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900743 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800744 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
745 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
746 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747 goto reject_redirect;
748
749 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
750 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
751 goto reject_redirect;
752 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
753 goto reject_redirect;
754 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800755 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 goto reject_redirect;
757 }
758
Stephen Suryaputra Lin969447f2016-11-10 11:16:15 -0500759 n = __ipv4_neigh_lookup(rt->dst.dev, new_gw);
760 if (!n)
761 n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev);
WANG Cong2c1a4312014-09-24 17:07:53 -0700762 if (!IS_ERR(n)) {
David S. Millere47a1852012-07-11 20:55:47 -0700763 if (!(n->nud_state & NUD_VALID)) {
764 neigh_event_send(n, NULL);
765 } else {
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400766 if (fib_lookup(net, fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700767 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700768
Julian Anastasovaee06da2012-07-18 10:15:35 +0000769 update_or_create_fnhe(nh, fl4->daddr, new_gw,
Xin Longdeed49d2016-02-18 21:21:19 +0800770 0, jiffies + ip_rt_gc_timeout);
David S. Miller4895c772012-07-17 04:19:00 -0700771 }
David S. Millerceb33202012-07-17 11:31:28 -0700772 if (kill_route)
773 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700774 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
775 }
776 neigh_release(n);
777 }
778 return;
779
780reject_redirect:
781#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700782 if (IN_DEV_LOG_MARTIANS(in_dev)) {
783 const struct iphdr *iph = (const struct iphdr *) skb->data;
784 __be32 daddr = iph->daddr;
785 __be32 saddr = iph->saddr;
786
David S. Millere47a1852012-07-11 20:55:47 -0700787 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
788 " Advised path = %pI4 -> %pI4\n",
789 &old_gw, dev->name, &new_gw,
790 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700791 }
David S. Millere47a1852012-07-11 20:55:47 -0700792#endif
793 ;
794}
795
David S. Miller4895c772012-07-17 04:19:00 -0700796static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
797{
798 struct rtable *rt;
799 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200800 const struct iphdr *iph = (const struct iphdr *) skb->data;
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900801 struct net *net = dev_net(skb->dev);
Michal Kubecekf96ef982013-05-28 08:26:49 +0200802 int oif = skb->dev->ifindex;
803 u8 tos = RT_TOS(iph->tos);
804 u8 prot = iph->protocol;
805 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700806
807 rt = (struct rtable *) dst;
808
Lorenzo Colitti7d995692016-12-23 00:33:57 +0900809 __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700810 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700811}
812
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
814{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800815 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700816 struct dst_entry *ret = dst;
817
818 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000819 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820 ip_rt_put(rt);
821 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700822 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
823 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700824 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700825 ret = NULL;
826 }
827 }
828 return ret;
829}
830
831/*
832 * Algorithm:
833 * 1. The first ip_rt_redirect_number redirects are sent
834 * with exponential backoff, then we stop sending them at all,
835 * assuming that the host ignores our redirects.
836 * 2. If we did not see packets requiring redirects
837 * during ip_rt_redirect_silence, we assume that the host
838 * forgot redirected route and start to send redirects again.
839 *
840 * This algorithm is much cheaper and more intelligent than dumb load limiting
841 * in icmp.c.
842 *
843 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
844 * and "frag. need" (breaks PMTU discovery) in icmp.c.
845 */
846
847void ip_rt_send_redirect(struct sk_buff *skb)
848{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000849 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700850 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800851 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700852 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700853 int log_martians;
David Ahern192132b2015-08-27 16:07:03 -0700854 int vif;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700855
Eric Dumazet30038fc2009-08-28 23:52:01 -0700856 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700857 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700858 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
859 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700861 }
862 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
David Ahern385add92015-09-29 20:07:13 -0700863 vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700864 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865
David S. Miller1d861aa2012-07-10 03:58:16 -0700866 net = dev_net(rt->dst.dev);
David Ahern192132b2015-08-27 16:07:03 -0700867 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800868 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000869 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
870 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800871 return;
872 }
873
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874 /* No redirected packets during ip_rt_redirect_silence;
875 * reset the algorithm.
876 */
David S. Miller92d86822011-02-04 15:55:25 -0800877 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
878 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700879
880 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700881 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700882 */
David S. Miller92d86822011-02-04 15:55:25 -0800883 if (peer->rate_tokens >= ip_rt_redirect_number) {
884 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700885 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700886 }
887
888 /* Check for load limit; set rate_last to the latest sent
889 * redirect.
890 */
David S. Miller92d86822011-02-04 15:55:25 -0800891 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800892 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800893 (peer->rate_last +
894 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000895 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
896
897 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800898 peer->rate_last = jiffies;
899 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700901 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000902 peer->rate_tokens == ip_rt_redirect_number)
903 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700904 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000905 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700906#endif
907 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700908out_put_peer:
909 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910}
911
912static int ip_error(struct sk_buff *skb)
913{
David S. Miller251da412012-06-26 16:27:09 -0700914 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000915 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800916 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700918 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800919 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920 int code;
921
Eric W. Biederman381c7592015-05-22 04:58:12 -0500922 /* IP on this device is disabled. */
923 if (!in_dev)
924 goto out;
925
David S. Miller251da412012-06-26 16:27:09 -0700926 net = dev_net(rt->dst.dev);
927 if (!IN_DEV_FORWARD(in_dev)) {
928 switch (rt->dst.error) {
929 case EHOSTUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700930 __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS);
David S. Miller251da412012-06-26 16:27:09 -0700931 break;
932
933 case ENETUNREACH:
Eric Dumazetb45386e2016-04-27 16:44:35 -0700934 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
David S. Miller251da412012-06-26 16:27:09 -0700935 break;
936 }
937 goto out;
938 }
939
Changli Gaod8d1f302010-06-10 23:31:35 -0700940 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000941 case EINVAL:
942 default:
943 goto out;
944 case EHOSTUNREACH:
945 code = ICMP_HOST_UNREACH;
946 break;
947 case ENETUNREACH:
948 code = ICMP_NET_UNREACH;
Eric Dumazetb45386e2016-04-27 16:44:35 -0700949 __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000950 break;
951 case EACCES:
952 code = ICMP_PKT_FILTERED;
953 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700954 }
955
David Ahern192132b2015-08-27 16:07:03 -0700956 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
David Ahern385add92015-09-29 20:07:13 -0700957 l3mdev_master_ifindex(skb->dev), 1);
David S. Miller92d86822011-02-04 15:55:25 -0800958
959 send = true;
960 if (peer) {
961 now = jiffies;
962 peer->rate_tokens += now - peer->rate_last;
963 if (peer->rate_tokens > ip_rt_error_burst)
964 peer->rate_tokens = ip_rt_error_burst;
965 peer->rate_last = now;
966 if (peer->rate_tokens >= ip_rt_error_cost)
967 peer->rate_tokens -= ip_rt_error_cost;
968 else
969 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700970 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 }
David S. Miller92d86822011-02-04 15:55:25 -0800972 if (send)
973 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700974
975out: kfree_skb(skb);
976 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900977}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978
Steffen Klassertd851c122012-10-07 22:47:25 +0000979static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700980{
Steffen Klassertd851c122012-10-07 22:47:25 +0000981 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700982 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800983
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000984 if (dst_metric_locked(dst, RTAX_MTU))
985 return;
986
Herbert Xucb6ccf02015-04-28 11:43:15 +0800987 if (ipv4_mtu(dst) < mtu)
Li Wei3cdaa5b2015-01-29 16:09:03 +0800988 return;
989
David S. Miller59436342012-07-10 06:58:42 -0700990 if (mtu < ip_rt_min_pmtu)
991 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000992
Timo Teräsf0162292013-05-27 20:46:32 +0000993 if (rt->rt_pmtu == mtu &&
994 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
995 return;
996
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000997 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -0400998 if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700999 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -07001000
Julian Anastasovaee06da2012-07-18 10:15:35 +00001001 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
1002 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -07001003 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +00001004 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005}
1006
David S. Miller4895c772012-07-17 04:19:00 -07001007static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
1008 struct sk_buff *skb, u32 mtu)
1009{
1010 struct rtable *rt = (struct rtable *) dst;
1011 struct flowi4 fl4;
1012
1013 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +00001014 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -07001015}
1016
David S. Miller36393392012-06-14 22:21:46 -07001017void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
1018 int oif, u32 mark, u8 protocol, int flow_flags)
1019{
David S. Miller4895c772012-07-17 04:19:00 -07001020 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -07001021 struct flowi4 fl4;
1022 struct rtable *rt;
1023
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001024 if (!mark)
1025 mark = IP4_REPLY_MARK(net, skb->mark);
1026
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001027 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001028 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -07001029 rt = __ip_route_output_key(net, &fl4);
1030 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -07001031 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -07001032 ip_rt_put(rt);
1033 }
1034}
1035EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
1036
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001037static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -07001038{
David S. Miller4895c772012-07-17 04:19:00 -07001039 const struct iphdr *iph = (const struct iphdr *) skb->data;
1040 struct flowi4 fl4;
1041 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -07001042
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001043 __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0);
Lorenzo Colitti1b3c61d2014-05-13 10:17:34 -07001044
1045 if (!fl4.flowi4_mark)
1046 fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark);
1047
David S. Miller4895c772012-07-17 04:19:00 -07001048 rt = __ip_route_output_key(sock_net(sk), &fl4);
1049 if (!IS_ERR(rt)) {
1050 __ip_rt_update_pmtu(rt, &fl4, mtu);
1051 ip_rt_put(rt);
1052 }
David S. Miller36393392012-06-14 22:21:46 -07001053}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001054
1055void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1056{
1057 const struct iphdr *iph = (const struct iphdr *) skb->data;
1058 struct flowi4 fl4;
1059 struct rtable *rt;
Eric Dumazet7f502362014-06-30 01:26:23 -07001060 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001061 bool new = false;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001062 struct net *net = sock_net(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001063
1064 bh_lock_sock(sk);
Hannes Frederic Sowa482fc602013-11-05 02:24:17 +01001065
1066 if (!ip_sk_accept_pmtu(sk))
1067 goto out;
1068
Eric Dumazet7f502362014-06-30 01:26:23 -07001069 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001070
Eric Dumazet7f502362014-06-30 01:26:23 -07001071 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001072 __ipv4_sk_update_pmtu(skb, sk, mtu);
1073 goto out;
1074 }
1075
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001076 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001077
Eric Dumazet7f502362014-06-30 01:26:23 -07001078 rt = (struct rtable *)odst;
Ian Morris51456b22015-04-03 09:17:26 +01001079 if (odst->obsolete && !odst->ops->check(odst, 0)) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001080 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1081 if (IS_ERR(rt))
1082 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001083
1084 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001085 }
1086
1087 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1088
Eric Dumazet7f502362014-06-30 01:26:23 -07001089 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001090 if (new)
1091 dst_release(&rt->dst);
1092
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001093 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1094 if (IS_ERR(rt))
1095 goto out;
1096
Steffen Klassertb44108d2013-01-22 00:01:28 +00001097 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001098 }
1099
Steffen Klassertb44108d2013-01-22 00:01:28 +00001100 if (new)
Eric Dumazet7f502362014-06-30 01:26:23 -07001101 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001102
1103out:
1104 bh_unlock_sock(sk);
Eric Dumazet7f502362014-06-30 01:26:23 -07001105 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001106}
David S. Miller36393392012-06-14 22:21:46 -07001107EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001108
David S. Millerb42597e2012-07-11 21:25:45 -07001109void ipv4_redirect(struct sk_buff *skb, struct net *net,
1110 int oif, u32 mark, u8 protocol, int flow_flags)
1111{
David S. Miller4895c772012-07-17 04:19:00 -07001112 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001113 struct flowi4 fl4;
1114 struct rtable *rt;
1115
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001116 __build_flow_key(net, &fl4, NULL, iph, oif,
David S. Miller4895c772012-07-17 04:19:00 -07001117 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001118 rt = __ip_route_output_key(net, &fl4);
1119 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001120 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001121 ip_rt_put(rt);
1122 }
1123}
1124EXPORT_SYMBOL_GPL(ipv4_redirect);
1125
1126void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1127{
David S. Miller4895c772012-07-17 04:19:00 -07001128 const struct iphdr *iph = (const struct iphdr *) skb->data;
1129 struct flowi4 fl4;
1130 struct rtable *rt;
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001131 struct net *net = sock_net(sk);
David S. Millerb42597e2012-07-11 21:25:45 -07001132
Lorenzo Colittie2d118a2016-11-04 02:23:43 +09001133 __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0);
1134 rt = __ip_route_output_key(net, &fl4);
David S. Miller4895c772012-07-17 04:19:00 -07001135 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001136 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001137 ip_rt_put(rt);
1138 }
David S. Millerb42597e2012-07-11 21:25:45 -07001139}
1140EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1141
David S. Millerefbc3682011-12-01 13:38:59 -05001142static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1143{
1144 struct rtable *rt = (struct rtable *) dst;
1145
David S. Millerceb33202012-07-17 11:31:28 -07001146 /* All IPV4 dsts are created with ->obsolete set to the value
1147 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1148 * into this function always.
1149 *
Timo Teräs387aa652013-05-27 20:46:31 +00001150 * When a PMTU/redirect information update invalidates a route,
1151 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1152 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001153 */
Timo Teräs387aa652013-05-27 20:46:31 +00001154 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001155 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001156 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001157}
1158
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159static void ipv4_link_failure(struct sk_buff *skb)
1160{
1161 struct rtable *rt;
1162
1163 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1164
Eric Dumazet511c3f92009-06-02 05:14:27 +00001165 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001166 if (rt)
1167 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168}
1169
Eric W. Biedermanede20592015-10-07 16:48:47 -05001170static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171{
Joe Perches91df42b2012-05-15 14:11:54 +00001172 pr_debug("%s: %pI4 -> %pI4, %s\n",
1173 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1174 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001176 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001177 return 0;
1178}
1179
1180/*
1181 We do not cache source address of outgoing interface,
1182 because it is used only by IP RR, TS and SRR options,
1183 so that it out of fast path.
1184
1185 BTW remember: "addr" is allowed to be not aligned
1186 in IP options!
1187 */
1188
David S. Miller8e363602011-05-13 17:29:41 -04001189void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190{
Al Viroa61ced52006-09-26 21:27:54 -07001191 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001192
David S. Millerc7537962010-11-11 17:07:48 -08001193 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001194 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001195 else {
David S. Miller8e363602011-05-13 17:29:41 -04001196 struct fib_result res;
1197 struct flowi4 fl4;
1198 struct iphdr *iph;
1199
1200 iph = ip_hdr(skb);
1201
1202 memset(&fl4, 0, sizeof(fl4));
1203 fl4.daddr = iph->daddr;
1204 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001205 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001206 fl4.flowi4_oif = rt->dst.dev->ifindex;
1207 fl4.flowi4_iif = skb->dev->ifindex;
1208 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001209
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001210 rcu_read_lock();
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001211 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001212 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001213 else
David S. Millerf8126f12012-07-13 05:03:45 -07001214 src = inet_select_addr(rt->dst.dev,
1215 rt_nexthop(rt, iph->daddr),
1216 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001217 rcu_read_unlock();
1218 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001219 memcpy(addr, &src, 4);
1220}
1221
Patrick McHardyc7066f72011-01-14 13:36:42 +01001222#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001223static void set_class_tag(struct rtable *rt, u32 tag)
1224{
Changli Gaod8d1f302010-06-10 23:31:35 -07001225 if (!(rt->dst.tclassid & 0xFFFF))
1226 rt->dst.tclassid |= tag & 0xFFFF;
1227 if (!(rt->dst.tclassid & 0xFFFF0000))
1228 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001229}
1230#endif
1231
David S. Miller0dbaee32010-12-13 12:52:14 -08001232static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1233{
1234 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1235
1236 if (advmss == 0) {
1237 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1238 ip_rt_min_advmss);
1239 if (advmss > 65535 - 40)
1240 advmss = 65535 - 40;
1241 }
1242 return advmss;
1243}
1244
Steffen Klassertebb762f2011-11-23 02:12:51 +00001245static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001246{
Steffen Klassert261663b2011-11-23 02:14:50 +00001247 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001248 unsigned int mtu = rt->rt_pmtu;
1249
Alexander Duyck98d75c32012-08-27 06:30:01 +00001250 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001251 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001252
Steffen Klassert38d523e2013-01-16 20:55:01 +00001253 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001254 return mtu;
1255
1256 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001257
1258 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001259 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001260 mtu = 576;
1261 }
1262
Roopa Prabhu14972cb2016-08-24 20:10:43 -07001263 mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
1264
1265 return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
David S. Millerd33e4552010-12-14 13:01:14 -08001266}
1267
David S. Millerf2bb4be2012-07-17 12:20:47 -07001268static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001269{
Eric Dumazetcaa41522014-09-03 22:21:56 -07001270 struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions);
David S. Miller4895c772012-07-17 04:19:00 -07001271 struct fib_nh_exception *fnhe;
1272 u32 hval;
1273
David S. Millerf2bb4be2012-07-17 12:20:47 -07001274 if (!hash)
1275 return NULL;
1276
David S. Millerd3a25c92012-07-17 13:23:08 -07001277 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001278
1279 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1280 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001281 if (fnhe->fnhe_daddr == daddr)
1282 return fnhe;
1283 }
1284 return NULL;
1285}
David S. Miller4895c772012-07-17 04:19:00 -07001286
David S. Millercaacf052012-07-31 15:06:50 -07001287static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001288 __be32 daddr)
1289{
David S. Millercaacf052012-07-31 15:06:50 -07001290 bool ret = false;
1291
David S. Millerc5038a82012-07-31 15:02:02 -07001292 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001293
David S. Millerc5038a82012-07-31 15:02:02 -07001294 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001295 struct rtable __rcu **porig;
1296 struct rtable *orig;
Timo Teräs5aad1de2013-05-27 20:46:33 +00001297 int genid = fnhe_genid(dev_net(rt->dst.dev));
Timo Teräs2ffae992013-06-27 10:27:05 +03001298
1299 if (rt_is_input_route(rt))
1300 porig = &fnhe->fnhe_rth_input;
1301 else
1302 porig = &fnhe->fnhe_rth_output;
1303 orig = rcu_dereference(*porig);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001304
1305 if (fnhe->fnhe_genid != genid) {
1306 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001307 fnhe->fnhe_gw = 0;
1308 fnhe->fnhe_pmtu = 0;
1309 fnhe->fnhe_expires = 0;
Timo Teräs2ffae992013-06-27 10:27:05 +03001310 fnhe_flush_routes(fnhe);
1311 orig = NULL;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001312 }
Timo Teräs387aa652013-05-27 20:46:31 +00001313 fill_route_from_fnhe(rt, fnhe);
1314 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001315 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001316
Timo Teräs2ffae992013-06-27 10:27:05 +03001317 if (!(rt->dst.flags & DST_NOCACHE)) {
1318 rcu_assign_pointer(*porig, rt);
1319 if (orig)
1320 rt_free(orig);
1321 ret = true;
1322 }
David S. Millerc5038a82012-07-31 15:02:02 -07001323
1324 fnhe->fnhe_stamp = jiffies;
David S. Millerc5038a82012-07-31 15:02:02 -07001325 }
1326 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001327
1328 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001329}
1330
David S. Millercaacf052012-07-31 15:06:50 -07001331static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001332{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001333 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001334 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001335
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001336 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001337 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001338 } else {
Christoph Lameter903ceff2014-08-17 12:30:35 -05001339 p = (struct rtable **)raw_cpu_ptr(nh->nh_pcpu_rth_output);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001340 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001341 orig = *p;
1342
1343 prev = cmpxchg(p, orig, rt);
1344 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001345 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001346 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001347 } else
David S. Millercaacf052012-07-31 15:06:50 -07001348 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001349
1350 return ret;
1351}
1352
Eric Dumazet5055c372015-01-14 15:17:06 -08001353struct uncached_list {
1354 spinlock_t lock;
1355 struct list_head head;
1356};
1357
1358static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
David S. Millercaacf052012-07-31 15:06:50 -07001359
1360static void rt_add_uncached_list(struct rtable *rt)
1361{
Eric Dumazet5055c372015-01-14 15:17:06 -08001362 struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
1363
1364 rt->rt_uncached_list = ul;
1365
1366 spin_lock_bh(&ul->lock);
1367 list_add_tail(&rt->rt_uncached, &ul->head);
1368 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001369}
1370
1371static void ipv4_dst_destroy(struct dst_entry *dst)
1372{
1373 struct rtable *rt = (struct rtable *) dst;
1374
Eric Dumazet78df76a2012-08-24 05:40:47 +00001375 if (!list_empty(&rt->rt_uncached)) {
Eric Dumazet5055c372015-01-14 15:17:06 -08001376 struct uncached_list *ul = rt->rt_uncached_list;
1377
1378 spin_lock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001379 list_del(&rt->rt_uncached);
Eric Dumazet5055c372015-01-14 15:17:06 -08001380 spin_unlock_bh(&ul->lock);
David S. Millercaacf052012-07-31 15:06:50 -07001381 }
1382}
1383
1384void rt_flush_dev(struct net_device *dev)
1385{
Eric Dumazet5055c372015-01-14 15:17:06 -08001386 struct net *net = dev_net(dev);
1387 struct rtable *rt;
1388 int cpu;
David S. Millercaacf052012-07-31 15:06:50 -07001389
Eric Dumazet5055c372015-01-14 15:17:06 -08001390 for_each_possible_cpu(cpu) {
1391 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
1392
1393 spin_lock_bh(&ul->lock);
1394 list_for_each_entry(rt, &ul->head, rt_uncached) {
David S. Millercaacf052012-07-31 15:06:50 -07001395 if (rt->dst.dev != dev)
1396 continue;
1397 rt->dst.dev = net->loopback_dev;
1398 dev_hold(rt->dst.dev);
1399 dev_put(dev);
1400 }
Eric Dumazet5055c372015-01-14 15:17:06 -08001401 spin_unlock_bh(&ul->lock);
David S. Miller4895c772012-07-17 04:19:00 -07001402 }
1403}
1404
Eric Dumazet4331deb2012-07-25 05:11:23 +00001405static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001406{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001407 return rt &&
1408 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1409 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001410}
1411
David S. Millerf2bb4be2012-07-17 12:20:47 -07001412static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001413 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001414 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001415 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001416{
David S. Millercaacf052012-07-31 15:06:50 -07001417 bool cached = false;
1418
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001420 struct fib_nh *nh = &FIB_RES_NH(*res);
1421
Julian Anastasov155e8332012-10-08 11:41:18 +00001422 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001423 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001424 rt->rt_uses_gateway = 1;
1425 }
David S. Miller28605832012-07-17 14:55:59 -07001426 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001427#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001428 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001429#endif
Jiri Benc61adedf2015-08-20 13:56:25 +02001430 rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
David S. Millerc5038a82012-07-31 15:02:02 -07001431 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001432 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001433 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001434 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001435 if (unlikely(!cached)) {
1436 /* Routes we intend to cache in nexthop exception or
1437 * FIB nexthop have the DST_NOCACHE bit clear.
1438 * However, if we are unsuccessful at storing this
1439 * route into the cache we really need to set it.
1440 */
1441 rt->dst.flags |= DST_NOCACHE;
1442 if (!rt->rt_gateway)
1443 rt->rt_gateway = daddr;
1444 rt_add_uncached_list(rt);
1445 }
1446 } else
David S. Millercaacf052012-07-31 15:06:50 -07001447 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001448
Patrick McHardyc7066f72011-01-14 13:36:42 +01001449#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001450#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001451 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452#endif
1453 set_class_tag(rt, itag);
1454#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455}
1456
David Ahern9ab179d2016-04-07 11:10:06 -07001457struct rtable *rt_dst_alloc(struct net_device *dev,
1458 unsigned int flags, u16 type,
1459 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001460{
David Ahernd08c4f32015-09-02 13:58:34 -07001461 struct rtable *rt;
1462
1463 rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
1464 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
1465 (nopolicy ? DST_NOPOLICY : 0) |
1466 (noxfrm ? DST_NOXFRM : 0));
1467
1468 if (rt) {
1469 rt->rt_genid = rt_genid_ipv4(dev_net(dev));
1470 rt->rt_flags = flags;
1471 rt->rt_type = type;
1472 rt->rt_is_input = 0;
1473 rt->rt_iif = 0;
1474 rt->rt_pmtu = 0;
1475 rt->rt_gateway = 0;
1476 rt->rt_uses_gateway = 0;
David Ahernb7503e02015-09-02 13:58:35 -07001477 rt->rt_table_id = 0;
David Ahernd08c4f32015-09-02 13:58:34 -07001478 INIT_LIST_HEAD(&rt->rt_uncached);
1479
1480 rt->dst.output = ip_output;
1481 if (flags & RTCF_LOCAL)
1482 rt->dst.input = ip_local_deliver;
1483 }
1484
1485 return rt;
David S. Miller0c4dcd52011-02-17 15:42:37 -08001486}
David Ahern9ab179d2016-04-07 11:10:06 -07001487EXPORT_SYMBOL(rt_dst_alloc);
David S. Miller0c4dcd52011-02-17 15:42:37 -08001488
Eric Dumazet96d36222010-06-02 19:21:31 +00001489/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001490static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491 u8 tos, struct net_device *dev, int our)
1492{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001494 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Ahernd08c4f32015-09-02 13:58:34 -07001495 unsigned int flags = RTCF_MULTICAST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001497 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498
1499 /* Primary sanity checks. */
1500
Ian Morris51456b22015-04-03 09:17:26 +01001501 if (!in_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 return -EINVAL;
1503
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001504 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001505 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 goto e_inval;
1507
Alexander Duyck75fea732015-09-28 11:10:38 -07001508 if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
1509 goto e_inval;
Thomas Grafd0daebc32012-06-12 00:44:01 +00001510
Joe Perchesf97c1e02007-12-16 13:45:43 -08001511 if (ipv4_is_zeronet(saddr)) {
1512 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001513 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001514 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001515 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1516 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001517 if (err < 0)
1518 goto e_err;
1519 }
David Ahernd08c4f32015-09-02 13:58:34 -07001520 if (our)
1521 flags |= RTCF_LOCAL;
1522
1523 rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001524 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 if (!rth)
1526 goto e_nobufs;
1527
Patrick McHardyc7066f72011-01-14 13:36:42 +01001528#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001529 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530#endif
David S. Millercf911662011-04-28 14:31:47 -07001531 rth->dst.output = ip_rt_bug;
David S. Miller9917e1e82012-07-17 14:44:26 -07001532 rth->rt_is_input= 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001533
1534#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001535 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001536 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537#endif
1538 RT_CACHE_STAT_INC(in_slow_mc);
1539
David S. Miller89aef892012-07-17 11:00:09 -07001540 skb_dst_set(skb, &rth->dst);
1541 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542
1543e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001544 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001545e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001546 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001547e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001548 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001549}
1550
1551
1552static void ip_handle_martian_source(struct net_device *dev,
1553 struct in_device *in_dev,
1554 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001555 __be32 daddr,
1556 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557{
1558 RT_CACHE_STAT_INC(in_martian_src);
1559#ifdef CONFIG_IP_ROUTE_VERBOSE
1560 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1561 /*
1562 * RFC1812 recommendation, if source is martian,
1563 * the only hint is MAC header.
1564 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001565 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001566 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001567 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001568 print_hex_dump(KERN_WARNING, "ll header: ",
1569 DUMP_PREFIX_OFFSET, 16, 1,
1570 skb_mac_header(skb),
1571 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 }
1573 }
1574#endif
1575}
1576
Xin Longdeed49d2016-02-18 21:21:19 +08001577static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr)
1578{
1579 struct fnhe_hash_bucket *hash;
1580 struct fib_nh_exception *fnhe, __rcu **fnhe_p;
1581 u32 hval = fnhe_hashfun(daddr);
1582
1583 spin_lock_bh(&fnhe_lock);
1584
1585 hash = rcu_dereference_protected(nh->nh_exceptions,
1586 lockdep_is_held(&fnhe_lock));
1587 hash += hval;
1588
1589 fnhe_p = &hash->chain;
1590 fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock));
1591 while (fnhe) {
1592 if (fnhe->fnhe_daddr == daddr) {
1593 rcu_assign_pointer(*fnhe_p, rcu_dereference_protected(
1594 fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)));
1595 fnhe_flush_routes(fnhe);
1596 kfree_rcu(fnhe, rcu);
1597 break;
1598 }
1599 fnhe_p = &fnhe->fnhe_next;
1600 fnhe = rcu_dereference_protected(fnhe->fnhe_next,
1601 lockdep_is_held(&fnhe_lock));
1602 }
1603
1604 spin_unlock_bh(&fnhe_lock);
1605}
1606
Thomas Grafefd85702016-11-30 17:10:09 +01001607static void set_lwt_redirect(struct rtable *rth)
1608{
1609 if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
1610 rth->dst.lwtstate->orig_output = rth->dst.output;
1611 rth->dst.output = lwtunnel_output;
1612 }
1613
1614 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1615 rth->dst.lwtstate->orig_input = rth->dst.input;
1616 rth->dst.input = lwtunnel_input;
1617 }
1618}
1619
Eric Dumazet47360222010-06-03 04:13:21 +00001620/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001621static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001622 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001623 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001624 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625{
Timo Teräs2ffae992013-06-27 10:27:05 +03001626 struct fib_nh_exception *fnhe;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001627 struct rtable *rth;
1628 int err;
1629 struct in_device *out_dev;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001630 bool do_cache;
Li RongQingfbdc0ad2014-05-22 16:36:55 +08001631 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632
1633 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001634 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Ian Morris51456b22015-04-03 09:17:26 +01001635 if (!out_dev) {
Joe Perchese87cc472012-05-13 21:56:26 +00001636 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 return -EINVAL;
1638 }
1639
Michael Smith5c04c812011-04-07 04:51:50 +00001640 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001641 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001643 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001644 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001645
Linus Torvalds1da177e2005-04-16 15:20:36 -07001646 goto cleanup;
1647 }
1648
Julian Anastasove81da0e2012-10-08 11:41:15 +00001649 do_cache = res->fi && !itag;
1650 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001651 skb->protocol == htons(ETH_P_IP) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 (IN_DEV_SHARED_MEDIA(out_dev) ||
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01001653 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1654 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001655
1656 if (skb->protocol != htons(ETH_P_IP)) {
1657 /* Not IP (i.e. ARP). Do not create route, if it is
1658 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001659 *
1660 * Proxy arp feature have been extended to allow, ARP
1661 * replies back to the same interface, to support
1662 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001664 if (out_dev == in_dev &&
1665 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001666 err = -EINVAL;
1667 goto cleanup;
1668 }
1669 }
1670
Timo Teräs2ffae992013-06-27 10:27:05 +03001671 fnhe = find_exception(&FIB_RES_NH(*res), daddr);
Julian Anastasove81da0e2012-10-08 11:41:15 +00001672 if (do_cache) {
Xin Longdeed49d2016-02-18 21:21:19 +08001673 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03001674 rth = rcu_dereference(fnhe->fnhe_rth_input);
Xin Longdeed49d2016-02-18 21:21:19 +08001675 if (rth && rth->dst.expires &&
1676 time_after(jiffies, rth->dst.expires)) {
1677 ip_del_fnhe(&FIB_RES_NH(*res), daddr);
1678 fnhe = NULL;
1679 } else {
1680 goto rt_cache;
1681 }
1682 }
Timo Teräs2ffae992013-06-27 10:27:05 +03001683
Xin Longdeed49d2016-02-18 21:21:19 +08001684 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1685
1686rt_cache:
Julian Anastasove81da0e2012-10-08 11:41:15 +00001687 if (rt_cache_valid(rth)) {
1688 skb_dst_set_noref(skb, &rth->dst);
1689 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001690 }
1691 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001692
David Ahernd08c4f32015-09-02 13:58:34 -07001693 rth = rt_dst_alloc(out_dev->dev, 0, res->type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001694 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001695 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 if (!rth) {
1697 err = -ENOBUFS;
1698 goto cleanup;
1699 }
1700
David S. Miller9917e1e82012-07-17 14:44:26 -07001701 rth->rt_is_input = 1;
David Ahernb7503e02015-09-02 13:58:35 -07001702 if (res->table)
1703 rth->rt_table_id = res->table->tb_id;
Duan Jionga6254862014-02-17 15:23:43 +08001704 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705
Changli Gaod8d1f302010-06-10 23:31:35 -07001706 rth->dst.input = ip_forward;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707
Timo Teräs2ffae992013-06-27 10:27:05 +03001708 rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
Thomas Grafefd85702016-11-30 17:10:09 +01001709 set_lwt_redirect(rth);
David S. Millerc6cffba2012-07-26 11:14:38 +00001710 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001711out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001712 err = 0;
1713 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001715}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716
Peter Nørlund79a13152015-09-30 10:12:22 +02001717#ifdef CONFIG_IP_ROUTE_MULTIPATH
1718
1719/* To make ICMP packets follow the right flow, the multipath hash is
1720 * calculated from the inner IP addresses in reverse order.
1721 */
1722static int ip_multipath_icmp_hash(struct sk_buff *skb)
1723{
1724 const struct iphdr *outer_iph = ip_hdr(skb);
1725 struct icmphdr _icmph;
1726 const struct icmphdr *icmph;
1727 struct iphdr _inner_iph;
1728 const struct iphdr *inner_iph;
1729
1730 if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
1731 goto standard_hash;
1732
1733 icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
1734 &_icmph);
1735 if (!icmph)
1736 goto standard_hash;
1737
1738 if (icmph->type != ICMP_DEST_UNREACH &&
1739 icmph->type != ICMP_REDIRECT &&
1740 icmph->type != ICMP_TIME_EXCEEDED &&
1741 icmph->type != ICMP_PARAMETERPROB) {
1742 goto standard_hash;
1743 }
1744
1745 inner_iph = skb_header_pointer(skb,
1746 outer_iph->ihl * 4 + sizeof(_icmph),
1747 sizeof(_inner_iph), &_inner_iph);
1748 if (!inner_iph)
1749 goto standard_hash;
1750
1751 return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
1752
1753standard_hash:
1754 return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
1755}
1756
1757#endif /* CONFIG_IP_ROUTE_MULTIPATH */
1758
Stephen Hemminger5969f712008-04-10 01:52:09 -07001759static int ip_mkroute_input(struct sk_buff *skb,
1760 struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001761 struct in_device *in_dev,
1762 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764#ifdef CONFIG_IP_ROUTE_MULTIPATH
Peter Nørlund0e884c72015-09-30 10:12:21 +02001765 if (res->fi && res->fi->fib_nhs > 1) {
1766 int h;
1767
Peter Nørlund79a13152015-09-30 10:12:22 +02001768 if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
1769 h = ip_multipath_icmp_hash(skb);
1770 else
1771 h = fib_multipath_hash(saddr, daddr);
Peter Nørlund0e884c72015-09-30 10:12:21 +02001772 fib_select_multipath(res, h);
1773 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774#endif
1775
1776 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001777 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778}
1779
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780/*
1781 * NOTE. We drop all the packets that has local source
1782 * addresses, because every properly looped back packet
1783 * must have correct destination already attached by output routine.
1784 *
1785 * Such approach solves two big problems:
1786 * 1. Not simplex devices are handled properly.
1787 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001788 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 */
1790
Al Viro9e12bb22006-09-26 21:25:20 -07001791static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001792 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001793{
1794 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001795 struct in_device *in_dev = __in_dev_get_rcu(dev);
Thomas Graf1b7179d2015-07-21 10:43:59 +02001796 struct ip_tunnel_info *tun_info;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001797 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001798 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001800 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001802 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001803 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804
1805 /* IP on this device is disabled. */
1806
1807 if (!in_dev)
1808 goto out;
1809
1810 /* Check for the most weird martians, which can be not detected
1811 by fib_lookup.
1812 */
1813
Jiri Benc61adedf2015-08-20 13:56:25 +02001814 tun_info = skb_tunnel_info(skb);
Jiri Benc46fa0622015-08-28 20:48:19 +02001815 if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
Thomas Graf1b7179d2015-07-21 10:43:59 +02001816 fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
1817 else
1818 fl4.flowi4_tun_key.tun_id = 0;
Thomas Graff38a9eb2015-07-21 10:43:56 +02001819 skb_dst_drop(skb);
1820
Thomas Grafd0daebc32012-06-12 00:44:01 +00001821 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 goto martian_source;
1823
David S. Millerd2d68ba2012-07-17 12:58:50 -07001824 res.fi = NULL;
David Ahernbde6f9d2015-09-16 10:16:39 -06001825 res.table = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001826 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827 goto brd_input;
1828
1829 /* Accept zero addresses only to limited broadcast;
1830 * I even do not know to fix it or not. Waiting for complains :-)
1831 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001832 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001833 goto martian_source;
1834
Thomas Grafd0daebc32012-06-12 00:44:01 +00001835 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836 goto martian_destination;
1837
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001838 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1839 * and call it once if daddr or/and saddr are loopback addresses
1840 */
1841 if (ipv4_is_loopback(daddr)) {
1842 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001843 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001844 } else if (ipv4_is_loopback(saddr)) {
1845 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001846 goto martian_source;
1847 }
1848
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849 /*
1850 * Now we are ready to route packet.
1851 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001852 fl4.flowi4_oif = 0;
David Aherne0d56fd2016-09-10 12:09:57 -07001853 fl4.flowi4_iif = dev->ifindex;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001854 fl4.flowi4_mark = skb->mark;
1855 fl4.flowi4_tos = tos;
1856 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
David Ahernb84f7872015-09-29 19:07:07 -07001857 fl4.flowi4_flags = 0;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001858 fl4.daddr = daddr;
1859 fl4.saddr = saddr;
Andy Gospodarek0eeb0752015-06-23 13:45:37 -04001860 err = fib_lookup(net, &fl4, &res, 0);
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001861 if (err != 0) {
1862 if (!IN_DEV_FORWARD(in_dev))
1863 err = -EHOSTUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001865 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866
1867 if (res.type == RTN_BROADCAST)
1868 goto brd_input;
1869
1870 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001871 err = fib_validate_source(skb, saddr, daddr, tos,
Cong Wang0d5edc62014-04-15 16:25:35 -07001872 0, dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001873 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001874 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 goto local_input;
1876 }
1877
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001878 if (!IN_DEV_FORWARD(in_dev)) {
1879 err = -EHOSTUNREACH;
David S. Miller251da412012-06-26 16:27:09 -07001880 goto no_route;
Duan Jiongcd0f0b92014-02-14 18:26:22 +08001881 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001882 if (res.type != RTN_UNICAST)
1883 goto martian_destination;
1884
David Aherndc33da52017-01-06 17:39:58 -08001885 err = ip_mkroute_input(skb, &res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001886out: return err;
1887
1888brd_input:
1889 if (skb->protocol != htons(ETH_P_IP))
1890 goto e_inval;
1891
David S. Miller41347dc2012-06-28 04:05:27 -07001892 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001893 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1894 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001895 if (err < 0)
David Ahern0d753962015-09-28 11:10:44 -07001896 goto martian_source;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001897 }
1898 flags |= RTCF_BROADCAST;
1899 res.type = RTN_BROADCAST;
1900 RT_CACHE_STAT_INC(in_brd);
1901
1902local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001903 do_cache = false;
1904 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001905 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001906 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001907 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001908 skb_dst_set_noref(skb, &rth->dst);
1909 err = 0;
1910 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001911 }
1912 do_cache = true;
1913 }
1914 }
1915
David Ahernf5a0aab2016-12-29 15:29:03 -08001916 rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
1917 flags | RTCF_LOCAL, res.type,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001918 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919 if (!rth)
1920 goto e_nobufs;
1921
Changli Gaod8d1f302010-06-10 23:31:35 -07001922 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001923#ifdef CONFIG_IP_ROUTE_CLASSID
1924 rth->dst.tclassid = itag;
1925#endif
David S. Miller9917e1e82012-07-17 14:44:26 -07001926 rth->rt_is_input = 1;
David Ahernb7503e02015-09-02 13:58:35 -07001927 if (res.table)
1928 rth->rt_table_id = res.table->tb_id;
Roopa Prabhu571e7222015-07-21 10:43:47 +02001929
Duan Jionga6254862014-02-17 15:23:43 +08001930 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001932 rth->dst.input= ip_error;
1933 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 rth->rt_flags &= ~RTCF_LOCAL;
1935 }
Thomas Grafefd85702016-11-30 17:10:09 +01001936
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08001937 if (do_cache) {
Thomas Grafefd85702016-11-30 17:10:09 +01001938 struct fib_nh *nh = &FIB_RES_NH(res);
1939
1940 rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
1941 if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
1942 WARN_ON(rth->dst.input == lwtunnel_input);
1943 rth->dst.lwtstate->orig_input = rth->dst.input;
1944 rth->dst.input = lwtunnel_input;
1945 }
1946
1947 if (unlikely(!rt_cache_route(nh, rth))) {
Alexei Starovoitovdcdfdf52013-11-19 19:12:34 -08001948 rth->dst.flags |= DST_NOCACHE;
1949 rt_add_uncached_list(rth);
1950 }
1951 }
David S. Miller89aef892012-07-17 11:00:09 -07001952 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001953 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001954 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955
1956no_route:
1957 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 res.type = RTN_UNREACHABLE;
Nicolas Cavallarifa19c2b02014-10-30 10:09:53 +01001959 res.fi = NULL;
David Ahernbde6f9d2015-09-16 10:16:39 -06001960 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 goto local_input;
1962
1963 /*
1964 * Do not cache martian addresses: they should be logged (RFC1812)
1965 */
1966martian_destination:
1967 RT_CACHE_STAT_INC(in_martian_dst);
1968#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001969 if (IN_DEV_LOG_MARTIANS(in_dev))
1970 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1971 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001972#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001973
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974e_inval:
1975 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001976 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001977
1978e_nobufs:
1979 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001980 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981
1982martian_source:
1983 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001984 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985}
1986
David S. Millerc6cffba2012-07-26 11:14:38 +00001987int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1988 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001989{
Eric Dumazet96d36222010-06-02 19:21:31 +00001990 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991
Eric Dumazet96d36222010-06-02 19:21:31 +00001992 rcu_read_lock();
1993
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 /* Multicast recognition logic is moved from route cache to here.
1995 The problem was that too many Ethernet cards have broken/missing
1996 hardware multicast filters :-( As result the host on multicasting
1997 network acquires a lot of useless route cache entries, sort of
1998 SDR messages from all the world. Now we try to get rid of them.
1999 Really, provided software IP multicast filter is organized
2000 reasonably (at least, hashed), it does not result in a slowdown
2001 comparing with route cache reject entries.
2002 Note, that multicast routers are not affected, because
2003 route cache entry is created eventually.
2004 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08002005 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00002006 struct in_device *in_dev = __in_dev_get_rcu(dev);
David Aherne58e4152016-10-31 15:54:00 -07002007 int our = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008
David Aherne58e4152016-10-31 15:54:00 -07002009 if (in_dev)
2010 our = ip_check_mc_rcu(in_dev, daddr, saddr,
2011 ip_hdr(skb)->protocol);
2012
2013 /* check l3 master if no match yet */
2014 if ((!in_dev || !our) && netif_is_l3_slave(dev)) {
2015 struct in_device *l3_in_dev;
2016
2017 l3_in_dev = __in_dev_get_rcu(skb->dev);
2018 if (l3_in_dev)
2019 our = ip_check_mc_rcu(l3_in_dev, daddr, saddr,
2020 ip_hdr(skb)->protocol);
2021 }
2022
2023 res = -EINVAL;
2024 if (our
Linus Torvalds1da177e2005-04-16 15:20:36 -07002025#ifdef CONFIG_IP_MROUTE
David Aherne58e4152016-10-31 15:54:00 -07002026 ||
2027 (!ipv4_is_local_multicast(daddr) &&
2028 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002029#endif
David Aherne58e4152016-10-31 15:54:00 -07002030 ) {
2031 res = ip_route_input_mc(skb, daddr, saddr,
2032 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 }
2034 rcu_read_unlock();
David Aherne58e4152016-10-31 15:54:00 -07002035 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002036 }
David S. Millerc10237e2012-06-27 17:05:06 -07002037 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00002038 rcu_read_unlock();
2039 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040}
David S. Millerc6cffba2012-07-26 11:14:38 +00002041EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00002043/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08002044static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00002045 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00002046 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08002047 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002048{
David S. Miller982721f2011-02-16 21:44:24 -08002049 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002050 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08002051 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08002052 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08002053 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002054 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002055
Thomas Grafd0daebc32012-06-12 00:44:01 +00002056 in_dev = __in_dev_get_rcu(dev_out);
2057 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08002058 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059
Thomas Grafd0daebc32012-06-12 00:44:01 +00002060 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
David Ahern5f02ce242016-09-10 12:09:54 -07002061 if (ipv4_is_loopback(fl4->saddr) &&
2062 !(dev_out->flags & IFF_LOOPBACK) &&
2063 !netif_is_l3_master(dev_out))
Thomas Grafd0daebc32012-06-12 00:44:01 +00002064 return ERR_PTR(-EINVAL);
2065
David S. Miller68a5e3d2011-03-11 20:07:33 -05002066 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002067 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002068 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08002069 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05002070 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08002071 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002072
2073 if (dev_out->flags & IFF_LOOPBACK)
2074 flags |= RTCF_LOCAL;
2075
Julian Anastasov63617422012-11-22 23:04:14 +02002076 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08002077 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08002079 fi = NULL;
2080 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002081 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07002082 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
2083 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02002085 else
2086 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00002088 * default one, but do not gateway in this case.
2089 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090 */
David S. Miller982721f2011-02-16 21:44:24 -08002091 if (fi && res->prefixlen < 4)
2092 fi = NULL;
Chris Friesend6d5e992016-04-08 15:21:30 -06002093 } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
2094 (orig_oif != dev_out->ifindex)) {
2095 /* For local routes that require a particular output interface
2096 * we do not want to cache the result. Caching the result
2097 * causes incorrect behaviour when there are multiple source
2098 * addresses on the interface, the end result being that if the
2099 * intended recipient is waiting on that interface for the
2100 * packet he won't receive it because it will be delivered on
2101 * the loopback interface and the IP_PKTINFO ipi_ifindex will
2102 * be set to the loopback interface as well.
2103 */
2104 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105 }
2106
David S. Millerf2bb4be2012-07-17 12:20:47 -07002107 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02002108 do_cache &= fi != NULL;
2109 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07002110 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002111 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00002112
Julian Anastasovc92b9652012-10-08 11:41:19 +00002113 fnhe = find_exception(nh, fl4->daddr);
Xin Longdeed49d2016-02-18 21:21:19 +08002114 if (fnhe) {
Timo Teräs2ffae992013-06-27 10:27:05 +03002115 prth = &fnhe->fnhe_rth_output;
Xin Longdeed49d2016-02-18 21:21:19 +08002116 rth = rcu_dereference(*prth);
2117 if (rth && rth->dst.expires &&
2118 time_after(jiffies, rth->dst.expires)) {
2119 ip_del_fnhe(nh, fl4->daddr);
2120 fnhe = NULL;
2121 } else {
2122 goto rt_cache;
Julian Anastasovc92b9652012-10-08 11:41:19 +00002123 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002124 }
Xin Longdeed49d2016-02-18 21:21:19 +08002125
2126 if (unlikely(fl4->flowi4_flags &
2127 FLOWI_FLAG_KNOWN_NH &&
2128 !(nh->nh_gw &&
2129 nh->nh_scope == RT_SCOPE_LINK))) {
2130 do_cache = false;
2131 goto add;
2132 }
2133 prth = raw_cpu_ptr(nh->nh_pcpu_rth_output);
David S. Millerc5038a82012-07-31 15:02:02 -07002134 rth = rcu_dereference(*prth);
Xin Longdeed49d2016-02-18 21:21:19 +08002135
2136rt_cache:
David S. Millerc5038a82012-07-31 15:02:02 -07002137 if (rt_cache_valid(rth)) {
2138 dst_hold(&rth->dst);
2139 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07002140 }
2141 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00002142
2143add:
David Ahernd08c4f32015-09-02 13:58:34 -07002144 rth = rt_dst_alloc(dev_out, flags, type,
David S. Miller5c1e6aa2011-04-28 14:13:38 -07002145 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07002146 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00002147 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002148 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08002149 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00002150
David S. Miller13378ca2012-07-23 13:57:45 -07002151 rth->rt_iif = orig_oif ? : 0;
David Ahernb7503e02015-09-02 13:58:35 -07002152 if (res->table)
2153 rth->rt_table_id = res->table->tb_id;
2154
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155 RT_CACHE_STAT_INC(out_slow_tot);
2156
Linus Torvalds1da177e2005-04-16 15:20:36 -07002157 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002158 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002160 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002161 RT_CACHE_STAT_INC(out_slow_mc);
2162 }
2163#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08002164 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07002166 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002167 rth->dst.input = ip_mr_input;
2168 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 }
2170 }
2171#endif
2172 }
2173
David S. Millerf2bb4be2012-07-17 12:20:47 -07002174 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Thomas Grafefd85702016-11-30 17:10:09 +01002175 set_lwt_redirect(rth);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002176
David S. Miller5ada5522011-02-17 15:29:00 -08002177 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002178}
2179
Linus Torvalds1da177e2005-04-16 15:20:36 -07002180/*
2181 * Major route resolver routine.
2182 */
2183
Peter Nørlund79a13152015-09-30 10:12:22 +02002184struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
2185 int mp_hash)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002187 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00002188 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07002189 unsigned int flags = 0;
2190 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08002191 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07002192 int orig_oif;
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002193 int err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002194
David S. Miller85b91b02012-07-13 08:21:29 -07002195 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002197 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002198
David S. Miller813b3b52011-04-28 14:48:42 -07002199 orig_oif = fl4->flowi4_oif;
2200
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002201 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07002202 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
2203 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
2204 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08002205
David S. Miller010c2702011-02-17 15:37:09 -08002206 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07002207 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002208 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07002209 if (ipv4_is_multicast(fl4->saddr) ||
2210 ipv4_is_lbcast(fl4->saddr) ||
2211 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002212 goto out;
2213
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 /* I removed check for oif == dev_out->oif here.
2215 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08002216 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
2217 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218 2. Moreover, we are allowed to send packets with saddr
2219 of another iface. --ANK
2220 */
2221
David S. Miller813b3b52011-04-28 14:48:42 -07002222 if (fl4->flowi4_oif == 0 &&
2223 (ipv4_is_multicast(fl4->daddr) ||
2224 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07002225 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002226 dev_out = __ip_dev_find(net, fl4->saddr, false);
Ian Morris51456b22015-04-03 09:17:26 +01002227 if (!dev_out)
Julian Anastasova210d012008-10-01 07:28:28 -07002228 goto out;
2229
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 /* Special hack: user can direct multicasts
2231 and limited broadcast via necessary interface
2232 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2233 This hack is not just for fun, it allows
2234 vic,vat and friends to work.
2235 They bind socket to loopback, set ttl to zero
2236 and expect that it will work.
2237 From the viewpoint of routing cache they are broken,
2238 because we are not allowed to build multicast path
2239 with loopback source addr (look, routing cache
2240 cannot know, that ttl is zero, so that packet
2241 will not leave this host and route is valid).
2242 Luckily, this hack is good workaround.
2243 */
2244
David S. Miller813b3b52011-04-28 14:48:42 -07002245 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 goto make_route;
2247 }
Julian Anastasova210d012008-10-01 07:28:28 -07002248
David S. Miller813b3b52011-04-28 14:48:42 -07002249 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002250 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002251 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002252 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002253 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002254 }
2255
2256
David S. Miller813b3b52011-04-28 14:48:42 -07002257 if (fl4->flowi4_oif) {
2258 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002259 rth = ERR_PTR(-ENODEV);
Ian Morris51456b22015-04-03 09:17:26 +01002260 if (!dev_out)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002261 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002262
2263 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002264 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002265 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002266 goto out;
2267 }
David S. Miller813b3b52011-04-28 14:48:42 -07002268 if (ipv4_is_local_multicast(fl4->daddr) ||
Andrew Lunn6a211652015-05-01 16:39:54 +02002269 ipv4_is_lbcast(fl4->daddr) ||
2270 fl4->flowi4_proto == IPPROTO_IGMP) {
David S. Miller813b3b52011-04-28 14:48:42 -07002271 if (!fl4->saddr)
2272 fl4->saddr = inet_select_addr(dev_out, 0,
2273 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274 goto make_route;
2275 }
Jiri Benc0a7e2262013-10-04 17:04:48 +02002276 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002277 if (ipv4_is_multicast(fl4->daddr))
2278 fl4->saddr = inet_select_addr(dev_out, 0,
2279 fl4->flowi4_scope);
2280 else if (!fl4->daddr)
2281 fl4->saddr = inet_select_addr(dev_out, 0,
2282 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002283 }
2284 }
2285
David S. Miller813b3b52011-04-28 14:48:42 -07002286 if (!fl4->daddr) {
2287 fl4->daddr = fl4->saddr;
2288 if (!fl4->daddr)
2289 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002290 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002291 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 res.type = RTN_LOCAL;
2293 flags |= RTCF_LOCAL;
2294 goto make_route;
2295 }
2296
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002297 err = fib_lookup(net, fl4, &res, 0);
2298 if (err) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002300 res.table = NULL;
David Ahern6104e112016-10-12 13:20:11 -07002301 if (fl4->flowi4_oif &&
David Aherne58e4152016-10-31 15:54:00 -07002302 (ipv4_is_multicast(fl4->daddr) ||
2303 !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002304 /* Apparently, routing tables are wrong. Assume,
2305 that the destination is on link.
2306
2307 WHY? DW.
2308 Because we are allowed to send to iface
2309 even if it has NO routes and NO assigned
2310 addresses. When oif is specified, routing
2311 tables are looked up with only one purpose:
2312 to catch if destination is gatewayed, rather than
2313 direct. Moreover, if MSG_DONTROUTE is set,
2314 we send packet, ignoring both routing tables
2315 and ifaddr state. --ANK
2316
2317
2318 We could make it even if oif is unknown,
2319 likely IPv6, but we do not.
2320 */
2321
David S. Miller813b3b52011-04-28 14:48:42 -07002322 if (fl4->saddr == 0)
2323 fl4->saddr = inet_select_addr(dev_out, 0,
2324 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325 res.type = RTN_UNICAST;
2326 goto make_route;
2327 }
Nikola ForrĂ³0315e382015-09-17 16:01:32 +02002328 rth = ERR_PTR(err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 goto out;
2330 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331
2332 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002333 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002334 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002335 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002336 else
David S. Miller813b3b52011-04-28 14:48:42 -07002337 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002338 }
David Ahern5f02ce242016-09-10 12:09:54 -07002339
2340 /* L3 master device is the loopback for that domain */
2341 dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002342 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002343 flags |= RTCF_LOCAL;
2344 goto make_route;
2345 }
2346
David Ahern3ce58d82015-10-05 08:51:25 -07002347 fib_select_path(net, &res, fl4, mp_hash);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348
Linus Torvalds1da177e2005-04-16 15:20:36 -07002349 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002350 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002351
2352
2353make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002354 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355
David S. Miller010c2702011-02-17 15:37:09 -08002356out:
2357 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002358 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359}
Peter Nørlund79a13152015-09-30 10:12:22 +02002360EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002361
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002362static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2363{
2364 return NULL;
2365}
2366
Steffen Klassertebb762f2011-11-23 02:12:51 +00002367static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002368{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002369 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2370
2371 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002372}
2373
David S. Miller6700c272012-07-17 03:29:28 -07002374static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2375 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002376{
2377}
2378
David S. Miller6700c272012-07-17 03:29:28 -07002379static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2380 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002381{
2382}
2383
Held Bernhard0972ddb2011-04-24 22:07:32 +00002384static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2385 unsigned long old)
2386{
2387 return NULL;
2388}
2389
David S. Miller14e50e52007-05-24 18:17:54 -07002390static struct dst_ops ipv4_dst_blackhole_ops = {
2391 .family = AF_INET,
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002392 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002393 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002394 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002395 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002396 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002397 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002398 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002399};
2400
David S. Miller2774c132011-03-01 14:59:04 -08002401struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002402{
David S. Miller2774c132011-03-01 14:59:04 -08002403 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002404 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002405
David S. Millerf5b0a872012-07-19 12:31:33 -07002406 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002407 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002408 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002409
David S. Miller14e50e52007-05-24 18:17:54 -07002410 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002411 new->input = dst_discard;
Eric W. Biedermanede20592015-10-07 16:48:47 -05002412 new->output = dst_discard_out;
David S. Miller14e50e52007-05-24 18:17:54 -07002413
Changli Gaod8d1f302010-06-10 23:31:35 -07002414 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002415 if (new->dev)
2416 dev_hold(new->dev);
2417
David S. Miller9917e1e82012-07-17 14:44:26 -07002418 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002419 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002420 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002421
fan.duca4c3fc2013-07-30 08:33:53 +08002422 rt->rt_genid = rt_genid_ipv4(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002423 rt->rt_flags = ort->rt_flags;
2424 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002425 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002426 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002427
David S. Millercaacf052012-07-31 15:06:50 -07002428 INIT_LIST_HEAD(&rt->rt_uncached);
David S. Miller14e50e52007-05-24 18:17:54 -07002429 dst_free(new);
2430 }
2431
David S. Miller2774c132011-03-01 14:59:04 -08002432 dst_release(dst_orig);
2433
2434 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002435}
2436
David S. Miller9d6ec932011-03-12 01:12:47 -05002437struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
Eric Dumazet6f9c9612015-09-25 07:39:10 -07002438 const struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439{
David S. Miller9d6ec932011-03-12 01:12:47 -05002440 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441
David S. Millerb23dd4f2011-03-02 14:31:35 -08002442 if (IS_ERR(rt))
2443 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444
David S. Miller56157872011-05-02 14:37:45 -07002445 if (flp4->flowi4_proto)
Steffen Klassertf92ee612014-09-16 10:08:40 +02002446 rt = (struct rtable *)xfrm_lookup_route(net, &rt->dst,
2447 flowi4_to_flowi(flp4),
2448 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002449
David S. Millerb23dd4f2011-03-02 14:31:35 -08002450 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002451}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002452EXPORT_SYMBOL_GPL(ip_route_output_flow);
2453
David Ahernc36ba662015-09-02 13:58:36 -07002454static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002455 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David Ahern0c8d8032017-01-05 19:32:46 -08002456 u32 seq, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002458 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002460 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002461 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002462 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002463 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002464
David Ahern0c8d8032017-01-05 19:32:46 -08002465 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), 0);
Ian Morris51456b22015-04-03 09:17:26 +01002466 if (!nlh)
Patrick McHardy26932562007-01-31 23:16:40 -08002467 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002468
2469 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 r->rtm_family = AF_INET;
2471 r->rtm_dst_len = 32;
2472 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002473 r->rtm_tos = fl4->flowi4_tos;
David Ahernc36ba662015-09-02 13:58:36 -07002474 r->rtm_table = table_id;
2475 if (nla_put_u32(skb, RTA_TABLE, table_id))
David S. Millerf3756b72012-04-01 20:39:02 -04002476 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002477 r->rtm_type = rt->rt_type;
2478 r->rtm_scope = RT_SCOPE_UNIVERSE;
2479 r->rtm_protocol = RTPROT_UNSPEC;
2480 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2481 if (rt->rt_flags & RTCF_NOTIFY)
2482 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowadf4d9252015-01-23 12:01:26 +01002483 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2484 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002485
Jiri Benc930345e2015-03-29 16:59:25 +02002486 if (nla_put_in_addr(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002487 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002488 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002489 r->rtm_src_len = 32;
Jiri Benc930345e2015-03-29 16:59:25 +02002490 if (nla_put_in_addr(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002491 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002492 }
David S. Millerf3756b72012-04-01 20:39:02 -04002493 if (rt->dst.dev &&
2494 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2495 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002496#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002497 if (rt->dst.tclassid &&
2498 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2499 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002500#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002501 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002502 fl4->saddr != src) {
Jiri Benc930345e2015-03-29 16:59:25 +02002503 if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002504 goto nla_put_failure;
2505 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002506 if (rt->rt_uses_gateway &&
Jiri Benc930345e2015-03-29 16:59:25 +02002507 nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gateway))
David S. Millerf3756b72012-04-01 20:39:02 -04002508 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002509
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002510 expires = rt->dst.expires;
2511 if (expires) {
2512 unsigned long now = jiffies;
2513
2514 if (time_before(now, expires))
2515 expires -= now;
2516 else
2517 expires = 0;
2518 }
2519
Julian Anastasov521f5492012-07-20 12:02:08 +03002520 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002521 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002522 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2523 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002524 goto nla_put_failure;
2525
David Millerb4869882012-07-01 02:03:01 +00002526 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002527 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002528 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002529
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002530 if (!uid_eq(fl4->flowi4_uid, INVALID_UID) &&
2531 nla_put_u32(skb, RTA_UID,
2532 from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
2533 goto nla_put_failure;
2534
Changli Gaod8d1f302010-06-10 23:31:35 -07002535 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002536
David S. Millerc7537962010-11-11 17:07:48 -08002537 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002538#ifdef CONFIG_IP_MROUTE
2539 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2540 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2541 int err = ipmr_get_route(net, skb,
2542 fl4->saddr, fl4->daddr,
David Ahern9f09eae2017-01-06 17:39:06 -08002543 r, portid);
Nikolay Aleksandrov2cf75072016-09-25 23:08:31 +02002544
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002545 if (err <= 0) {
David Ahern0c8d8032017-01-05 19:32:46 -08002546 if (err == 0)
2547 return 0;
2548 goto nla_put_failure;
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002549 }
2550 } else
2551#endif
Julian Anastasov91146152014-04-13 18:08:02 +03002552 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002553 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 }
2555
David S. Millerf1850712012-07-10 07:26:01 -07002556 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002557 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558
Johannes Berg053c0952015-01-16 22:09:00 +01002559 nlmsg_end(skb, nlh);
2560 return 0;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002561
2562nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002563 nlmsg_cancel(skb, nlh);
2564 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565}
2566
Thomas Graf661d2962013-03-21 07:45:29 +00002567static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002569 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002570 struct rtmsg *rtm;
2571 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002573 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002574 __be32 dst = 0;
2575 __be32 src = 0;
2576 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002577 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002578 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002579 struct sk_buff *skb;
David Ahernc36ba662015-09-02 13:58:36 -07002580 u32 table_id = RT_TABLE_MAIN;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002581 kuid_t uid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002582
Thomas Grafd889ce32006-08-17 18:15:44 -07002583 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2584 if (err < 0)
2585 goto errout;
2586
2587 rtm = nlmsg_data(nlh);
2588
Linus Torvalds1da177e2005-04-16 15:20:36 -07002589 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002590 if (!skb) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002591 err = -ENOBUFS;
2592 goto errout;
2593 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002594
2595 /* Reserve room for dummy headers, this skb can pass
2596 through good chunk of routing engine.
2597 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002598 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002599 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002600
2601 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002602 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002603 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2604
Jiri Benc67b61f62015-03-29 16:59:26 +02002605 src = tb[RTA_SRC] ? nla_get_in_addr(tb[RTA_SRC]) : 0;
2606 dst = tb[RTA_DST] ? nla_get_in_addr(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002607 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002608 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002609 if (tb[RTA_UID])
2610 uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
2611 else
2612 uid = (iif ? INVALID_UID : current_uid());
Linus Torvalds1da177e2005-04-16 15:20:36 -07002613
David Millerd6c0a4f2012-07-01 02:02:59 +00002614 memset(&fl4, 0, sizeof(fl4));
2615 fl4.daddr = dst;
2616 fl4.saddr = src;
2617 fl4.flowi4_tos = rtm->rtm_tos;
2618 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2619 fl4.flowi4_mark = mark;
Lorenzo Colitti622ec2c2016-11-04 02:23:42 +09002620 fl4.flowi4_uid = uid;
David Millerd6c0a4f2012-07-01 02:02:59 +00002621
Linus Torvalds1da177e2005-04-16 15:20:36 -07002622 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002623 struct net_device *dev;
2624
Denis V. Lunev19375042008-02-28 20:52:04 -08002625 dev = __dev_get_by_index(net, iif);
Ian Morris51456b22015-04-03 09:17:26 +01002626 if (!dev) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002627 err = -ENODEV;
2628 goto errout_free;
2629 }
2630
Linus Torvalds1da177e2005-04-16 15:20:36 -07002631 skb->protocol = htons(ETH_P_IP);
2632 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002633 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002634 local_bh_disable();
2635 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2636 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002637
Eric Dumazet511c3f92009-06-02 05:14:27 +00002638 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002639 if (err == 0 && rt->dst.error)
2640 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002641 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002642 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002643
2644 err = 0;
2645 if (IS_ERR(rt))
2646 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002648
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002650 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651
Changli Gaod8d1f302010-06-10 23:31:35 -07002652 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002653 if (rtm->rtm_flags & RTM_F_NOTIFY)
2654 rt->rt_flags |= RTCF_NOTIFY;
2655
David Ahernc36ba662015-09-02 13:58:36 -07002656 if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
2657 table_id = rt->rt_table_id;
2658
2659 err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002660 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
David Ahern0c8d8032017-01-05 19:32:46 -08002661 RTM_NEWROUTE);
David S. Miller7b46a642015-01-18 23:36:08 -05002662 if (err < 0)
Thomas Grafd889ce32006-08-17 18:15:44 -07002663 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002664
Eric W. Biederman15e47302012-09-07 20:12:54 +00002665 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002666errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002667 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002668
Thomas Grafd889ce32006-08-17 18:15:44 -07002669errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002671 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002672}
2673
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674void ip_rt_multicast_event(struct in_device *in_dev)
2675{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002676 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002677}
2678
2679#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002680static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2681static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2682static int ip_rt_gc_elasticity __read_mostly = 8;
2683
Joe Perchesfe2c6332013-06-11 23:04:25 -07002684static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002685 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686 size_t *lenp, loff_t *ppos)
2687{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002688 struct net *net = (struct net *)__ctl->extra1;
2689
Linus Torvalds1da177e2005-04-16 15:20:36 -07002690 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002691 rt_cache_flush(net);
2692 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002694 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695
2696 return -EINVAL;
2697}
2698
Joe Perchesfe2c6332013-06-11 23:04:25 -07002699static struct ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002700 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701 .procname = "gc_thresh",
2702 .data = &ipv4_dst_ops.gc_thresh,
2703 .maxlen = sizeof(int),
2704 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002705 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002706 },
2707 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002708 .procname = "max_size",
2709 .data = &ip_rt_max_size,
2710 .maxlen = sizeof(int),
2711 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002712 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713 },
2714 {
2715 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002716
Linus Torvalds1da177e2005-04-16 15:20:36 -07002717 .procname = "gc_min_interval",
2718 .data = &ip_rt_gc_min_interval,
2719 .maxlen = sizeof(int),
2720 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002721 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002722 },
2723 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 .procname = "gc_min_interval_ms",
2725 .data = &ip_rt_gc_min_interval,
2726 .maxlen = sizeof(int),
2727 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002728 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002729 },
2730 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731 .procname = "gc_timeout",
2732 .data = &ip_rt_gc_timeout,
2733 .maxlen = sizeof(int),
2734 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002735 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 },
2737 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002738 .procname = "gc_interval",
2739 .data = &ip_rt_gc_interval,
2740 .maxlen = sizeof(int),
2741 .mode = 0644,
2742 .proc_handler = proc_dointvec_jiffies,
2743 },
2744 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002745 .procname = "redirect_load",
2746 .data = &ip_rt_redirect_load,
2747 .maxlen = sizeof(int),
2748 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002749 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750 },
2751 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752 .procname = "redirect_number",
2753 .data = &ip_rt_redirect_number,
2754 .maxlen = sizeof(int),
2755 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002756 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757 },
2758 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759 .procname = "redirect_silence",
2760 .data = &ip_rt_redirect_silence,
2761 .maxlen = sizeof(int),
2762 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002763 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002764 },
2765 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002766 .procname = "error_cost",
2767 .data = &ip_rt_error_cost,
2768 .maxlen = sizeof(int),
2769 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002770 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002771 },
2772 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002773 .procname = "error_burst",
2774 .data = &ip_rt_error_burst,
2775 .maxlen = sizeof(int),
2776 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002777 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002778 },
2779 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002780 .procname = "gc_elasticity",
2781 .data = &ip_rt_gc_elasticity,
2782 .maxlen = sizeof(int),
2783 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002784 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002785 },
2786 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002787 .procname = "mtu_expires",
2788 .data = &ip_rt_mtu_expires,
2789 .maxlen = sizeof(int),
2790 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002791 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002792 },
2793 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002794 .procname = "min_pmtu",
2795 .data = &ip_rt_min_pmtu,
2796 .maxlen = sizeof(int),
2797 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002798 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002799 },
2800 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002801 .procname = "min_adv_mss",
2802 .data = &ip_rt_min_advmss,
2803 .maxlen = sizeof(int),
2804 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002805 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002806 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002807 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002808};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002809
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002810static struct ctl_table ipv4_route_flush_table[] = {
2811 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002812 .procname = "flush",
2813 .maxlen = sizeof(int),
2814 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002815 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002816 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002817 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002818};
2819
2820static __net_init int sysctl_route_net_init(struct net *net)
2821{
2822 struct ctl_table *tbl;
2823
2824 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002825 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002826 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
Ian Morris51456b22015-04-03 09:17:26 +01002827 if (!tbl)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002828 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002829
2830 /* Don't export sysctls to unprivileged users */
2831 if (net->user_ns != &init_user_ns)
2832 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002833 }
2834 tbl[0].extra1 = net;
2835
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002836 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Ian Morris51456b22015-04-03 09:17:26 +01002837 if (!net->ipv4.route_hdr)
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002838 goto err_reg;
2839 return 0;
2840
2841err_reg:
2842 if (tbl != ipv4_route_flush_table)
2843 kfree(tbl);
2844err_dup:
2845 return -ENOMEM;
2846}
2847
2848static __net_exit void sysctl_route_net_exit(struct net *net)
2849{
2850 struct ctl_table *tbl;
2851
2852 tbl = net->ipv4.route_hdr->ctl_table_arg;
2853 unregister_net_sysctl_table(net->ipv4.route_hdr);
2854 BUG_ON(tbl == ipv4_route_flush_table);
2855 kfree(tbl);
2856}
2857
2858static __net_initdata struct pernet_operations sysctl_route_ops = {
2859 .init = sysctl_route_net_init,
2860 .exit = sysctl_route_net_exit,
2861};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002862#endif
2863
Neil Horman3ee94372010-05-08 01:57:52 -07002864static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002865{
fan.duca4c3fc2013-07-30 08:33:53 +08002866 atomic_set(&net->ipv4.rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00002867 atomic_set(&net->fnhe_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002868 get_random_bytes(&net->ipv4.dev_addr_genid,
2869 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002870 return 0;
2871}
2872
Neil Horman3ee94372010-05-08 01:57:52 -07002873static __net_initdata struct pernet_operations rt_genid_ops = {
2874 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002875};
2876
David S. Millerc3426b42012-06-09 16:27:05 -07002877static int __net_init ipv4_inetpeer_init(struct net *net)
2878{
2879 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2880
2881 if (!bp)
2882 return -ENOMEM;
2883 inet_peer_base_init(bp);
2884 net->ipv4.peers = bp;
2885 return 0;
2886}
2887
2888static void __net_exit ipv4_inetpeer_exit(struct net *net)
2889{
2890 struct inet_peer_base *bp = net->ipv4.peers;
2891
2892 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002893 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002894 kfree(bp);
2895}
2896
2897static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2898 .init = ipv4_inetpeer_init,
2899 .exit = ipv4_inetpeer_exit,
2900};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002901
Patrick McHardyc7066f72011-01-14 13:36:42 +01002902#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002903struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002904#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002905
Linus Torvalds1da177e2005-04-16 15:20:36 -07002906int __init ip_rt_init(void)
2907{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002908 int rc = 0;
Eric Dumazet5055c372015-01-14 15:17:06 -08002909 int cpu;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002910
Eric Dumazet73f156a2014-06-02 05:26:03 -07002911 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2912 if (!ip_idents)
2913 panic("IP: failed to allocate ip_idents\n");
2914
2915 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2916
Eric Dumazet355b5902015-05-01 10:37:49 -07002917 ip_tstamps = kcalloc(IP_IDENTS_SZ, sizeof(*ip_tstamps), GFP_KERNEL);
2918 if (!ip_tstamps)
2919 panic("IP: failed to allocate ip_tstamps\n");
2920
Eric Dumazet5055c372015-01-14 15:17:06 -08002921 for_each_possible_cpu(cpu) {
2922 struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
2923
2924 INIT_LIST_HEAD(&ul->head);
2925 spin_lock_init(&ul->lock);
2926 }
Patrick McHardyc7066f72011-01-14 13:36:42 +01002927#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002928 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002929 if (!ip_rt_acct)
2930 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002931#endif
2932
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002933 ipv4_dst_ops.kmem_cachep =
2934 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002935 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002936
David S. Miller14e50e52007-05-24 18:17:54 -07002937 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2938
Eric Dumazetfc66f952010-10-08 06:37:34 +00002939 if (dst_entries_init(&ipv4_dst_ops) < 0)
2940 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2941
2942 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2943 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2944
David S. Miller89aef892012-07-17 11:00:09 -07002945 ipv4_dst_ops.gc_thresh = ~0;
2946 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002947
Linus Torvalds1da177e2005-04-16 15:20:36 -07002948 devinet_init();
2949 ip_fib_init();
2950
Denis V. Lunev73b38712008-02-28 20:51:18 -08002951 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002952 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002953#ifdef CONFIG_XFRM
2954 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002955 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002956#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002957 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002958
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002959#ifdef CONFIG_SYSCTL
2960 register_pernet_subsys(&sysctl_route_ops);
2961#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002962 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002963 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002964 return rc;
2965}
2966
Al Viroa1bc6eb2008-07-30 06:32:52 -04002967#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002968/*
2969 * We really need to sanitize the damn ipv4 init order, then all
2970 * this nonsense will go away.
2971 */
2972void __init ip_static_sysctl_init(void)
2973{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002974 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002975}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002976#endif