blob: d1a9907901cba65518bbd9b07bd34d72366153fc [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
Lucas De Marchi25985ed2011-03-30 22:57:33 -030015 * This work is based on the LPC-trie which is originally described in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
Justin P. Mattock631dd1a2010-10-18 11:03:14 +020019 * http://www.csc.kth.se/~snilsson/software/dyntrie2/
Robert Olsson19baf832005-06-21 12:43:18 -070020 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Jens Låås80b71b82009-08-28 23:57:15 -070051#define VERSION "0.409"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070054#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <linux/types.h>
56#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070057#include <linux/mm.h>
58#include <linux/string.h>
59#include <linux/socket.h>
60#include <linux/sockios.h>
61#include <linux/errno.h>
62#include <linux/in.h>
63#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080064#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070065#include <linux/netdevice.h>
66#include <linux/if_arp.h>
67#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070068#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070069#include <linux/skbuff.h>
70#include <linux/netlink.h>
71#include <linux/init.h>
72#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090073#include <linux/slab.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040074#include <linux/export.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020075#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070076#include <net/ip.h>
77#include <net/protocol.h>
78#include <net/route.h>
79#include <net/tcp.h>
80#include <net/sock.h>
81#include <net/ip_fib.h>
82#include "fib_lookup.h"
83
Robert Olsson06ef9212006-03-20 21:35:01 -080084#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070085
Robert Olsson19baf832005-06-21 12:43:18 -070086#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070087
Robert Olsson19baf832005-06-21 12:43:18 -070088typedef unsigned int t_key;
89
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080090#define IS_TNODE(n) ((n)->bits)
91#define IS_LEAF(n) (!(n)->bits)
Robert Olsson2373ce12005-08-25 13:01:29 -070092
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080093struct tnode {
94 t_key key;
95 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
96 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
97 struct tnode __rcu *parent;
Alexander Duyck37fd30f2014-12-31 10:55:41 -080098 struct rcu_head rcu;
99 /* everything above this comment must be the same as rt_trie_node */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800100 unsigned int full_children; /* KEYLENGTH bits needed */
101 unsigned int empty_children; /* KEYLENGTH bits needed */
102 struct rt_trie_node __rcu *child[0];
103};
Robert Olsson19baf832005-06-21 12:43:18 -0700104
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800105/* This struct represents the shared bits between tnode and leaf. If any
106 * ordering is changed here is must also be updated in tnode and leaf as
107 * well.
108 */
David S. Millerb299e4f2011-02-02 20:48:10 -0800109struct rt_trie_node {
Eric Dumazet8d965442008-01-13 22:31:44 -0800110 t_key key;
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800111 unsigned char bits;
112 unsigned char pos;
113 struct tnode __rcu *parent;
114 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700115};
116
117struct leaf {
Eric Dumazet8d965442008-01-13 22:31:44 -0800118 t_key key;
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800119 unsigned char bits;
120 unsigned char pos;
121 struct tnode __rcu *parent;
Robert Olsson2373ce12005-08-25 13:01:29 -0700122 struct rcu_head rcu;
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800123 /* everything above this comment must be the same as rt_trie_node */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800124 struct hlist_head list;
Robert Olsson19baf832005-06-21 12:43:18 -0700125};
126
127struct leaf_info {
128 struct hlist_node hlist;
129 int plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000130 u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
Robert Olsson19baf832005-06-21 12:43:18 -0700131 struct list_head falh;
Eric Dumazet5c745012011-07-18 03:16:33 +0000132 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700133};
134
Robert Olsson19baf832005-06-21 12:43:18 -0700135#ifdef CONFIG_IP_FIB_TRIE_STATS
136struct trie_use_stats {
137 unsigned int gets;
138 unsigned int backtrack;
139 unsigned int semantic_match_passed;
140 unsigned int semantic_match_miss;
141 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700142 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700143};
144#endif
145
146struct trie_stat {
147 unsigned int totdepth;
148 unsigned int maxdepth;
149 unsigned int tnodes;
150 unsigned int leaves;
151 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800152 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800153 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700154};
Robert Olsson19baf832005-06-21 12:43:18 -0700155
156struct trie {
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700157 struct rt_trie_node __rcu *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700158#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800159 struct trie_use_stats __percpu *stats;
Robert Olsson19baf832005-06-21 12:43:18 -0700160#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700161};
162
David S. Millerb299e4f2011-02-02 20:48:10 -0800163static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800164 int wasfull);
David S. Millerb299e4f2011-02-02 20:48:10 -0800165static struct rt_trie_node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700166static struct tnode *inflate(struct trie *t, struct tnode *tn);
167static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700168/* tnodes to free after resize(); protected by RTNL */
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800169static struct callback_head *tnode_free_head;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000170static size_t tnode_free_size;
171
172/*
173 * synchronize_rcu after call_rcu for that many pages; it should be especially
174 * useful before resizing the root node with PREEMPT_NONE configs; the value was
175 * obtained experimentally, aiming to avoid visible slowdown.
176 */
177static const int sync_pages = 128;
Robert Olsson19baf832005-06-21 12:43:18 -0700178
Christoph Lametere18b8902006-12-06 20:33:20 -0800179static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800180static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700181
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800182/* caller must hold RTNL */
183#define node_parent(n) rtnl_dereference((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700184
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800185/* caller must hold RCU read lock or RTNL */
186#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700187
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800188/* wrapper for rcu_assign_pointer */
David S. Millerb299e4f2011-02-02 20:48:10 -0800189static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
Stephen Hemminger06801912007-08-10 15:22:13 -0700190{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800191 if (node)
192 rcu_assign_pointer(node->parent, ptr);
193}
194
195#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
196
197/* This provides us with the number of children in this node, in the case of a
198 * leaf this will return 0 meaning none of the children are accessible.
199 */
200static inline int tnode_child_length(const struct tnode *tn)
201{
202 return (1ul << tn->bits) & ~(1ul);
Stephen Hemminger06801912007-08-10 15:22:13 -0700203}
Robert Olsson2373ce12005-08-25 13:01:29 -0700204
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700205/*
206 * caller must hold RTNL
207 */
208static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700209{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800210 BUG_ON(i >= tnode_child_length(tn));
Robert Olsson19baf832005-06-21 12:43:18 -0700211
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700212 return rtnl_dereference(tn->child[i]);
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800213}
214
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700215/*
216 * caller must hold RCU read lock or RTNL
217 */
218static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800219{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800220 BUG_ON(i >= tnode_child_length(tn));
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800221
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700222 return rcu_dereference_rtnl(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700223}
224
David S. Miller3b004562011-02-16 14:56:22 -0800225static inline t_key mask_pfx(t_key k, unsigned int l)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700226{
227 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
228}
229
David S. Miller3b004562011-02-16 14:56:22 -0800230static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700231{
Olof Johansson91b9a272005-08-09 20:24:39 -0700232 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700233 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700234 else
Robert Olsson19baf832005-06-21 12:43:18 -0700235 return 0;
236}
237
238static inline int tkey_equals(t_key a, t_key b)
239{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700240 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700241}
242
243static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
244{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700245 if (bits == 0 || offset >= KEYLENGTH)
246 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700247 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
248 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700249}
Robert Olsson19baf832005-06-21 12:43:18 -0700250
251static inline int tkey_mismatch(t_key a, int offset, t_key b)
252{
253 t_key diff = a ^ b;
254 int i = offset;
255
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700256 if (!diff)
257 return 0;
258 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700259 i++;
260 return i;
261}
262
Robert Olsson19baf832005-06-21 12:43:18 -0700263/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900264 To understand this stuff, an understanding of keys and all their bits is
265 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700266 all of the bits in that key are significant.
267
268 Consider a node 'n' and its parent 'tp'.
269
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900270 If n is a leaf, every bit in its key is significant. Its presence is
271 necessitated by path compression, since during a tree traversal (when
272 searching for a leaf - unless we are doing an insertion) we will completely
273 ignore all skipped bits we encounter. Thus we need to verify, at the end of
274 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700275 correct key path.
276
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277 Note that we can never "miss" the correct key in the tree if present by
278 following the wrong path. Path compression ensures that segments of the key
279 that are the same for all keys with a given prefix are skipped, but the
280 skipped part *is* identical for each node in the subtrie below the skipped
281 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700282 call to tkey_sub_equals() in trie_insert().
283
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900284 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700285 have many different meanings.
286
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900287 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700288 _________________________________________________________________
289 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
290 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900291 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700292
293 _________________________________________________________________
294 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
295 -----------------------------------------------------------------
296 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
297
298 tp->pos = 7
299 tp->bits = 3
300 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700301 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700302
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900303 First, let's just ignore the bits that come before the parent tp, that is
304 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700305 not use them for anything.
306
307 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900308 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700309 'n' among tp's children.
310
311 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
312 for the node n.
313
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900314 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700315 of the bits are really not needed or indeed known in n->key.
316
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900317 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700318 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900319
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700320
Robert Olsson19baf832005-06-21 12:43:18 -0700321 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
322 at this point.
323
324*/
325
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800326static const int halve_threshold = 25;
327static const int inflate_threshold = 50;
Jarek Poplawski345aa032009-07-07 19:39:16 -0700328static const int halve_threshold_root = 15;
Jens Låås80b71b82009-08-28 23:57:15 -0700329static const int inflate_threshold_root = 30;
Robert Olsson2373ce12005-08-25 13:01:29 -0700330
331static void __alias_free_mem(struct rcu_head *head)
332{
333 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
334 kmem_cache_free(fn_alias_kmem, fa);
335}
336
337static inline void alias_free_mem_rcu(struct fib_alias *fa)
338{
339 call_rcu(&fa->rcu, __alias_free_mem);
340}
341
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800342#define TNODE_KMALLOC_MAX \
343 ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct rt_trie_node *))
344
345static void __node_free_rcu(struct rcu_head *head)
Robert Olsson2373ce12005-08-25 13:01:29 -0700346{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800347 struct rt_trie_node *n = container_of(head, struct rt_trie_node, rcu);
348
349 if (IS_LEAF(n))
350 kmem_cache_free(trie_leaf_kmem, n);
351 else if (n->bits <= TNODE_KMALLOC_MAX)
352 kfree(n);
353 else
354 vfree(n);
Robert Olsson2373ce12005-08-25 13:01:29 -0700355}
356
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800357#define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
Stephen Hemminger387a5482008-04-10 03:47:34 -0700358
Robert Olsson2373ce12005-08-25 13:01:29 -0700359static inline void free_leaf_info(struct leaf_info *leaf)
360{
Lai Jiangshanbceb0f42011-03-18 11:42:34 +0800361 kfree_rcu(leaf, rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700362}
363
Eric Dumazet8d965442008-01-13 22:31:44 -0800364static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700365{
Robert Olsson2373ce12005-08-25 13:01:29 -0700366 if (size <= PAGE_SIZE)
Eric Dumazet8d965442008-01-13 22:31:44 -0800367 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700368 else
Eric Dumazet7a1c8e52010-11-20 07:46:35 +0000369 return vzalloc(size);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700370}
Robert Olsson2373ce12005-08-25 13:01:29 -0700371
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700372static void tnode_free_safe(struct tnode *tn)
373{
374 BUG_ON(IS_LEAF(tn));
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800375 tn->rcu.next = tnode_free_head;
376 tnode_free_head = &tn->rcu;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700377}
378
379static void tnode_free_flush(void)
380{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800381 struct callback_head *head;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700382
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800383 while ((head = tnode_free_head)) {
384 struct tnode *tn = container_of(head, struct tnode, rcu);
385
386 tnode_free_head = head->next;
387 tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
388
389 node_free(tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700390 }
Jarek Poplawskic3059472009-07-14 08:33:08 +0000391
392 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
393 tnode_free_size = 0;
394 synchronize_rcu();
395 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700396}
397
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800398static struct leaf *leaf_new(t_key key)
Robert Olsson19baf832005-06-21 12:43:18 -0700399{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800400 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700401 if (l) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800402 l->parent = NULL;
403 /* set key and pos to reflect full key value
404 * any trailing zeros in the key should be ignored
405 * as the nodes are searched
406 */
407 l->key = key;
408 l->pos = KEYLENGTH;
409 /* set bits to 0 indicating we are not a tnode */
410 l->bits = 0;
411
Robert Olsson19baf832005-06-21 12:43:18 -0700412 INIT_HLIST_HEAD(&l->list);
413 }
414 return l;
415}
416
417static struct leaf_info *leaf_info_new(int plen)
418{
419 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700420 if (li) {
421 li->plen = plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000422 li->mask_plen = ntohl(inet_make_mask(plen));
Robert Olsson2373ce12005-08-25 13:01:29 -0700423 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700424 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700425 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700426}
427
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800428static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700429{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800430 size_t sz = offsetof(struct tnode, child[1 << bits]);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700431 struct tnode *tn = tnode_alloc(sz);
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800432 unsigned int shift = pos + bits;
433
434 /* verify bits and pos their msb bits clear and values are valid */
435 BUG_ON(!bits || (shift > KEYLENGTH));
Robert Olsson19baf832005-06-21 12:43:18 -0700436
Olof Johansson91b9a272005-08-09 20:24:39 -0700437 if (tn) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800438 tn->parent = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700439 tn->pos = pos;
440 tn->bits = bits;
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800441 tn->key = mask_pfx(key, pos);
Robert Olsson19baf832005-06-21 12:43:18 -0700442 tn->full_children = 0;
443 tn->empty_children = 1<<bits;
444 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700445
Eric Dumazeta034ee32010-09-09 23:32:28 +0000446 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
Lin Ming4ea4bf72012-07-29 01:19:55 +0000447 sizeof(struct rt_trie_node *) << bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700448 return tn;
449}
450
Robert Olsson19baf832005-06-21 12:43:18 -0700451/*
452 * Check whether a tnode 'n' is "full", i.e. it is an internal node
453 * and no bits are skipped. See discussion in dyntree paper p. 6
454 */
455
David S. Millerb299e4f2011-02-02 20:48:10 -0800456static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700457{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800458 return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700459}
460
Lin Ming61648d92012-07-29 02:00:03 +0000461static inline void put_child(struct tnode *tn, int i,
David S. Millerb299e4f2011-02-02 20:48:10 -0800462 struct rt_trie_node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700463{
464 tnode_put_child_reorg(tn, i, n, -1);
465}
466
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700467 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700468 * Add a child at position i overwriting the old value.
469 * Update the value of full_children and empty_children.
470 */
471
David S. Millerb299e4f2011-02-02 20:48:10 -0800472static void tnode_put_child_reorg(struct tnode *tn, int i, struct rt_trie_node *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800473 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700474{
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700475 struct rt_trie_node *chi = rtnl_dereference(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700476 int isfull;
477
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700478 BUG_ON(i >= 1<<tn->bits);
479
Robert Olsson19baf832005-06-21 12:43:18 -0700480 /* update emptyChildren */
481 if (n == NULL && chi != NULL)
482 tn->empty_children++;
483 else if (n != NULL && chi == NULL)
484 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700485
Robert Olsson19baf832005-06-21 12:43:18 -0700486 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700487 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700488 wasfull = tnode_full(tn, chi);
489
490 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700491 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700492 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700493 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700494 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700495
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800496 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700497
Eric Dumazetcf778b02012-01-12 04:41:32 +0000498 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700499}
500
Jens Låås80b71b82009-08-28 23:57:15 -0700501#define MAX_WORK 10
David S. Millerb299e4f2011-02-02 20:48:10 -0800502static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700503{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800504 struct rt_trie_node *n = NULL;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700505 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700506 int inflate_threshold_use;
507 int halve_threshold_use;
Jens Låås80b71b82009-08-28 23:57:15 -0700508 int max_work;
Robert Olsson19baf832005-06-21 12:43:18 -0700509
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900510 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700511 return NULL;
512
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700513 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
514 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700515
516 /* No children */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800517 if (tn->empty_children > (tnode_child_length(tn) - 1))
518 goto no_children;
519
Robert Olsson19baf832005-06-21 12:43:18 -0700520 /* One child */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800521 if (tn->empty_children == (tnode_child_length(tn) - 1))
Jens Låås80b71b82009-08-28 23:57:15 -0700522 goto one_child;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700523 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700524 * Double as long as the resulting node has a number of
525 * nonempty nodes that are above the threshold.
526 */
527
528 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700529 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
530 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700531 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700532 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700533 * children in the *doubled* node is at least 'high'."
534 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700535 * 'high' in this instance is the variable 'inflate_threshold'. It
536 * is expressed as a percentage, so we multiply it with
537 * tnode_child_length() and instead of multiplying by 2 (since the
538 * child array will be doubled by inflate()) and multiplying
539 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700540 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700541 *
542 * The left-hand side may look a bit weird: tnode_child_length(tn)
543 * - tn->empty_children is of course the number of non-null children
544 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700545 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700546 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700547 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700548 *
Robert Olsson19baf832005-06-21 12:43:18 -0700549 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700550 *
Robert Olsson19baf832005-06-21 12:43:18 -0700551 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700552 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700553 * tn->full_children;
554 *
555 * new_child_length = tnode_child_length(tn) * 2;
556 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700557 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700558 * new_child_length;
559 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700560 *
561 * ...and so on, tho it would mess up the while () loop.
562 *
Robert Olsson19baf832005-06-21 12:43:18 -0700563 * anyway,
564 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
565 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700566 *
Robert Olsson19baf832005-06-21 12:43:18 -0700567 * avoid a division:
568 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
569 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700570 *
Robert Olsson19baf832005-06-21 12:43:18 -0700571 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700572 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700573 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700574 *
Robert Olsson19baf832005-06-21 12:43:18 -0700575 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700576 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700577 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700578 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700579 *
Robert Olsson19baf832005-06-21 12:43:18 -0700580 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700581 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700582 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700583 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700584 *
Robert Olsson19baf832005-06-21 12:43:18 -0700585 */
586
Robert Olssone6308be2005-10-04 13:01:58 -0700587 /* Keep root node larger */
588
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800589 if (!node_parent(tn)) {
Jens Låås80b71b82009-08-28 23:57:15 -0700590 inflate_threshold_use = inflate_threshold_root;
591 halve_threshold_use = halve_threshold_root;
Eric Dumazeta034ee32010-09-09 23:32:28 +0000592 } else {
Robert Olssone6308be2005-10-04 13:01:58 -0700593 inflate_threshold_use = inflate_threshold;
Jens Låås80b71b82009-08-28 23:57:15 -0700594 halve_threshold_use = halve_threshold;
595 }
Robert Olssone6308be2005-10-04 13:01:58 -0700596
Jens Låås80b71b82009-08-28 23:57:15 -0700597 max_work = MAX_WORK;
598 while ((tn->full_children > 0 && max_work-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800599 50 * (tn->full_children + tnode_child_length(tn)
600 - tn->empty_children)
601 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700602
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700603 old_tn = tn;
604 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800605
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700606 if (IS_ERR(tn)) {
607 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700608#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800609 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700610#endif
611 break;
612 }
Robert Olsson19baf832005-06-21 12:43:18 -0700613 }
614
Jens Låås80b71b82009-08-28 23:57:15 -0700615 /* Return if at least one inflate is run */
Eric Dumazeta034ee32010-09-09 23:32:28 +0000616 if (max_work != MAX_WORK)
David S. Millerb299e4f2011-02-02 20:48:10 -0800617 return (struct rt_trie_node *) tn;
Jens Låås80b71b82009-08-28 23:57:15 -0700618
Robert Olsson19baf832005-06-21 12:43:18 -0700619 /*
620 * Halve as long as the number of empty children in this
621 * node is above threshold.
622 */
Robert Olsson2f368952005-07-05 15:02:40 -0700623
Jens Låås80b71b82009-08-28 23:57:15 -0700624 max_work = MAX_WORK;
625 while (tn->bits > 1 && max_work-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700626 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700627 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700628
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700629 old_tn = tn;
630 tn = halve(t, tn);
631 if (IS_ERR(tn)) {
632 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700633#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800634 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700635#endif
636 break;
637 }
638 }
639
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700640
Robert Olsson19baf832005-06-21 12:43:18 -0700641 /* Only one child remains */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800642 if (tn->empty_children == (tnode_child_length(tn) - 1)) {
643 unsigned long i;
Jens Låås80b71b82009-08-28 23:57:15 -0700644one_child:
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800645 for (i = tnode_child_length(tn); !n && i;)
646 n = tnode_get_child(tn, --i);
647no_children:
648 /* compress one level */
649 node_set_parent(n, NULL);
650 tnode_free_safe(tn);
651 return n;
Jens Låås80b71b82009-08-28 23:57:15 -0700652 }
David S. Millerb299e4f2011-02-02 20:48:10 -0800653 return (struct rt_trie_node *) tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700654}
655
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700656
657static void tnode_clean_free(struct tnode *tn)
658{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800659 struct rt_trie_node *tofree;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700660 int i;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700661
662 for (i = 0; i < tnode_child_length(tn); i++) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800663 tofree = rtnl_dereference(tn->child[i]);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700664 if (tofree)
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800665 node_free(tofree);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700666 }
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800667 node_free(tn);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700668}
669
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700670static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700671{
Robert Olsson19baf832005-06-21 12:43:18 -0700672 struct tnode *oldtnode = tn;
673 int olen = tnode_child_length(tn);
674 int i;
675
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700676 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700677
678 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
679
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700680 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700681 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700682
683 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700684 * Preallocate and store tnodes before the actual work so we
685 * don't get into an inconsistent state if memory allocation
686 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700687 * of tnode is ignored.
688 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700689
690 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800691 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700692
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800693 inode = (struct tnode *) tnode_get_child(oldtnode, i);
Robert Olsson2f368952005-07-05 15:02:40 -0700694 if (inode &&
695 IS_TNODE(inode) &&
696 inode->pos == oldtnode->pos + oldtnode->bits &&
697 inode->bits > 1) {
698 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700699 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700700
Robert Olsson2f368952005-07-05 15:02:40 -0700701 left = tnode_new(inode->key&(~m), inode->pos + 1,
702 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700703 if (!left)
704 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700705
Robert Olsson2f368952005-07-05 15:02:40 -0700706 right = tnode_new(inode->key|m, inode->pos + 1,
707 inode->bits - 1);
708
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900709 if (!right) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800710 node_free(left);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700711 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900712 }
Robert Olsson2f368952005-07-05 15:02:40 -0700713
Lin Ming61648d92012-07-29 02:00:03 +0000714 put_child(tn, 2*i, (struct rt_trie_node *) left);
715 put_child(tn, 2*i+1, (struct rt_trie_node *) right);
Robert Olsson2f368952005-07-05 15:02:40 -0700716 }
717 }
718
Olof Johansson91b9a272005-08-09 20:24:39 -0700719 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800720 struct tnode *inode;
David S. Millerb299e4f2011-02-02 20:48:10 -0800721 struct rt_trie_node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700722 struct tnode *left, *right;
723 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700724
Robert Olsson19baf832005-06-21 12:43:18 -0700725 /* An empty child */
726 if (node == NULL)
727 continue;
728
729 /* A leaf or an internal node with skipped bits */
730
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800731 if (IS_LEAF(node) || (node->pos > (tn->pos + tn->bits - 1))) {
baker.zhangbbe34cf2013-10-01 07:45:09 +0800732 put_child(tn,
733 tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1),
734 node);
Robert Olsson19baf832005-06-21 12:43:18 -0700735 continue;
736 }
737
738 /* An internal node with two children */
739 inode = (struct tnode *) node;
740
741 if (inode->bits == 1) {
Lin Ming61648d92012-07-29 02:00:03 +0000742 put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
743 put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
Robert Olsson19baf832005-06-21 12:43:18 -0700744
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700745 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700746 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700747 }
748
Olof Johansson91b9a272005-08-09 20:24:39 -0700749 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700750
Olof Johansson91b9a272005-08-09 20:24:39 -0700751 /* We will replace this node 'inode' with two new
752 * ones, 'left' and 'right', each with half of the
753 * original children. The two new nodes will have
754 * a position one bit further down the key and this
755 * means that the "significant" part of their keys
756 * (see the discussion near the top of this file)
757 * will differ by one bit, which will be "0" in
758 * left's key and "1" in right's key. Since we are
759 * moving the key position by one step, the bit that
760 * we are moving away from - the bit at position
761 * (inode->pos) - is the one that will differ between
762 * left and right. So... we synthesize that bit in the
763 * two new keys.
764 * The mask 'm' below will be a single "one" bit at
765 * the position (inode->pos)
766 */
Robert Olsson19baf832005-06-21 12:43:18 -0700767
Olof Johansson91b9a272005-08-09 20:24:39 -0700768 /* Use the old key, but set the new significant
769 * bit to zero.
770 */
Robert Olsson19baf832005-06-21 12:43:18 -0700771
Olof Johansson91b9a272005-08-09 20:24:39 -0700772 left = (struct tnode *) tnode_get_child(tn, 2*i);
Lin Ming61648d92012-07-29 02:00:03 +0000773 put_child(tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700774
Olof Johansson91b9a272005-08-09 20:24:39 -0700775 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700776
Olof Johansson91b9a272005-08-09 20:24:39 -0700777 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
Lin Ming61648d92012-07-29 02:00:03 +0000778 put_child(tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700779
Olof Johansson91b9a272005-08-09 20:24:39 -0700780 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700781
Olof Johansson91b9a272005-08-09 20:24:39 -0700782 size = tnode_child_length(left);
783 for (j = 0; j < size; j++) {
Lin Ming61648d92012-07-29 02:00:03 +0000784 put_child(left, j, rtnl_dereference(inode->child[j]));
785 put_child(right, j, rtnl_dereference(inode->child[j + size]));
Robert Olsson19baf832005-06-21 12:43:18 -0700786 }
Lin Ming61648d92012-07-29 02:00:03 +0000787 put_child(tn, 2*i, resize(t, left));
788 put_child(tn, 2*i+1, resize(t, right));
Olof Johansson91b9a272005-08-09 20:24:39 -0700789
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700790 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700791 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700792 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700793 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700794nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700795 tnode_clean_free(tn);
796 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700797}
798
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700799static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700800{
801 struct tnode *oldtnode = tn;
David S. Millerb299e4f2011-02-02 20:48:10 -0800802 struct rt_trie_node *left, *right;
Robert Olsson19baf832005-06-21 12:43:18 -0700803 int i;
804 int olen = tnode_child_length(tn);
805
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700806 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700807
808 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700809
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700810 if (!tn)
811 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700812
813 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700814 * Preallocate and store tnodes before the actual work so we
815 * don't get into an inconsistent state if memory allocation
816 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700817 * of tnode is ignored.
818 */
819
Olof Johansson91b9a272005-08-09 20:24:39 -0700820 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700821 left = tnode_get_child(oldtnode, i);
822 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700823
Robert Olsson2f368952005-07-05 15:02:40 -0700824 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700825 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700826 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700827
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700828 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700829
830 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700831 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700832
Lin Ming61648d92012-07-29 02:00:03 +0000833 put_child(tn, i/2, (struct rt_trie_node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700834 }
Robert Olsson2f368952005-07-05 15:02:40 -0700835
Robert Olsson2f368952005-07-05 15:02:40 -0700836 }
Robert Olsson19baf832005-06-21 12:43:18 -0700837
Olof Johansson91b9a272005-08-09 20:24:39 -0700838 for (i = 0; i < olen; i += 2) {
839 struct tnode *newBinNode;
840
Robert Olsson19baf832005-06-21 12:43:18 -0700841 left = tnode_get_child(oldtnode, i);
842 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700843
Robert Olsson19baf832005-06-21 12:43:18 -0700844 /* At least one of the children is empty */
845 if (left == NULL) {
846 if (right == NULL) /* Both are empty */
847 continue;
Lin Ming61648d92012-07-29 02:00:03 +0000848 put_child(tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700849 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700850 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700851
852 if (right == NULL) {
Lin Ming61648d92012-07-29 02:00:03 +0000853 put_child(tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700854 continue;
855 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700856
Robert Olsson19baf832005-06-21 12:43:18 -0700857 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700858 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
Lin Ming61648d92012-07-29 02:00:03 +0000859 put_child(tn, i/2, NULL);
860 put_child(newBinNode, 0, left);
861 put_child(newBinNode, 1, right);
862 put_child(tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700863 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700864 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700865 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700866nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700867 tnode_clean_free(tn);
868 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700869}
870
Robert Olsson772cb712005-09-19 15:31:18 -0700871/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700872 via get_fa_head and dump */
873
Robert Olsson772cb712005-09-19 15:31:18 -0700874static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700875{
Robert Olsson772cb712005-09-19 15:31:18 -0700876 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700877 struct leaf_info *li;
878
Sasha Levinb67bfe02013-02-27 17:06:00 -0800879 hlist_for_each_entry_rcu(li, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700880 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700881 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700882
Robert Olsson19baf832005-06-21 12:43:18 -0700883 return NULL;
884}
885
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800886static inline struct list_head *get_fa_head(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700887{
Robert Olsson772cb712005-09-19 15:31:18 -0700888 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700889
Olof Johansson91b9a272005-08-09 20:24:39 -0700890 if (!li)
891 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700892
Olof Johansson91b9a272005-08-09 20:24:39 -0700893 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700894}
895
896static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
897{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900898 struct leaf_info *li = NULL, *last = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700899
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900900 if (hlist_empty(head)) {
901 hlist_add_head_rcu(&new->hlist, head);
902 } else {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800903 hlist_for_each_entry(li, head, hlist) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900904 if (new->plen > li->plen)
905 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700906
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900907 last = li;
908 }
909 if (last)
Ken Helias1d023282014-08-06 16:09:16 -0700910 hlist_add_behind_rcu(&new->hlist, &last->hlist);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900911 else
912 hlist_add_before_rcu(&new->hlist, &li->hlist);
913 }
Robert Olsson19baf832005-06-21 12:43:18 -0700914}
915
Robert Olsson2373ce12005-08-25 13:01:29 -0700916/* rcu_read_lock needs to be hold by caller from readside */
917
Robert Olsson19baf832005-06-21 12:43:18 -0700918static struct leaf *
919fib_find_node(struct trie *t, u32 key)
920{
921 int pos;
922 struct tnode *tn;
David S. Millerb299e4f2011-02-02 20:48:10 -0800923 struct rt_trie_node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700924
925 pos = 0;
Eric Dumazeta034ee32010-09-09 23:32:28 +0000926 n = rcu_dereference_rtnl(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700927
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800928 while (n && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700929 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700930
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700931 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700932 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800933 n = tnode_get_child_rcu(tn,
934 tkey_extract_bits(key,
935 tn->pos,
936 tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700937 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700938 break;
939 }
940 /* Case we have found a leaf. Compare prefixes */
941
Olof Johansson91b9a272005-08-09 20:24:39 -0700942 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
943 return (struct leaf *)n;
944
Robert Olsson19baf832005-06-21 12:43:18 -0700945 return NULL;
946}
947
Jarek Poplawski7b855762009-06-18 00:28:51 -0700948static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700949{
Robert Olsson19baf832005-06-21 12:43:18 -0700950 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -0700951 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -0700952 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700953
Robert Olsson3ed18d72009-05-21 15:20:59 -0700954 key = tn->key;
955
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800956 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700957 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
958 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Joe Perchese3192692012-06-03 17:41:40 +0000959 tn = (struct tnode *)resize(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800960
Joe Perchese3192692012-06-03 17:41:40 +0000961 tnode_put_child_reorg(tp, cindex,
David S. Millerb299e4f2011-02-02 20:48:10 -0800962 (struct rt_trie_node *)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700963
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800964 tp = node_parent(tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700965 if (!tp)
Eric Dumazetcf778b02012-01-12 04:41:32 +0000966 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700967
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700968 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -0700969 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -0700970 break;
Stephen Hemminger06801912007-08-10 15:22:13 -0700971 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700972 }
Stephen Hemminger06801912007-08-10 15:22:13 -0700973
Robert Olsson19baf832005-06-21 12:43:18 -0700974 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -0700975 if (IS_TNODE(tn))
Joe Perchese3192692012-06-03 17:41:40 +0000976 tn = (struct tnode *)resize(t, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700977
Eric Dumazetcf778b02012-01-12 04:41:32 +0000978 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
Jarek Poplawski7b855762009-06-18 00:28:51 -0700979 tnode_free_flush();
Robert Olsson19baf832005-06-21 12:43:18 -0700980}
981
Robert Olsson2373ce12005-08-25 13:01:29 -0700982/* only used from updater-side */
983
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -0800984static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700985{
986 int pos, newpos;
987 struct tnode *tp = NULL, *tn = NULL;
David S. Millerb299e4f2011-02-02 20:48:10 -0800988 struct rt_trie_node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700989 struct leaf *l;
990 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700991 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700992 struct leaf_info *li;
993 t_key cindex;
994
995 pos = 0;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700996 n = rtnl_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700997
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700998 /* If we point to NULL, stop. Either the tree is empty and we should
999 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001000 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001001 * If we point to a T_TNODE, check if it matches our key. Note that
1002 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001003 * not be the parent's 'pos'+'bits'!
1004 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001005 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001006 * the index from our key, push the T_TNODE and walk the tree.
1007 *
1008 * If it doesn't, we have to replace it with a new T_TNODE.
1009 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001010 * If we point to a T_LEAF, it might or might not have the same key
1011 * as we do. If it does, just change the value, update the T_LEAF's
1012 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001013 * If it doesn't, we need to replace it with a T_TNODE.
1014 */
1015
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001016 while (n && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001017 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001018
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001019 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001020 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001021 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001022 n = tnode_get_child(tn,
1023 tkey_extract_bits(key,
1024 tn->pos,
1025 tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -07001026
Stephen Hemminger06801912007-08-10 15:22:13 -07001027 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001028 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001029 break;
1030 }
1031
1032 /*
1033 * n ----> NULL, LEAF or TNODE
1034 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001035 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001036 */
1037
Olof Johansson91b9a272005-08-09 20:24:39 -07001038 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001039
1040 /* Case 1: n is a leaf. Compare prefixes */
1041
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001042 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001043 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001044 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001045
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001046 if (!li)
1047 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001048
1049 fa_head = &li->falh;
1050 insert_leaf_info(&l->list, li);
1051 goto done;
1052 }
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001053 l = leaf_new(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001054
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001055 if (!l)
1056 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001057
Robert Olsson19baf832005-06-21 12:43:18 -07001058 li = leaf_info_new(plen);
1059
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001060 if (!li) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001061 node_free(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001062 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001063 }
Robert Olsson19baf832005-06-21 12:43:18 -07001064
1065 fa_head = &li->falh;
1066 insert_leaf_info(&l->list, li);
1067
Robert Olsson19baf832005-06-21 12:43:18 -07001068 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001069 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001070
David S. Millerb299e4f2011-02-02 20:48:10 -08001071 node_set_parent((struct rt_trie_node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001072
Olof Johansson91b9a272005-08-09 20:24:39 -07001073 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Lin Ming61648d92012-07-29 02:00:03 +00001074 put_child(tp, cindex, (struct rt_trie_node *)l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001075 } else {
1076 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001077 /*
1078 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001079 * first tnode need some special handling
1080 */
1081
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001082 if (n) {
baker.zhang4c60f1d2013-10-08 11:36:51 +08001083 pos = tp ? tp->pos+tp->bits : 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001084 newpos = tkey_mismatch(key, pos, n->key);
1085 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001086 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001087 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001088 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001089 }
Robert Olsson19baf832005-06-21 12:43:18 -07001090
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001091 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001092 free_leaf_info(li);
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001093 node_free(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001094 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001095 }
1096
David S. Millerb299e4f2011-02-02 20:48:10 -08001097 node_set_parent((struct rt_trie_node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001098
Olof Johansson91b9a272005-08-09 20:24:39 -07001099 missbit = tkey_extract_bits(key, newpos, 1);
Lin Ming61648d92012-07-29 02:00:03 +00001100 put_child(tn, missbit, (struct rt_trie_node *)l);
1101 put_child(tn, 1-missbit, n);
Robert Olsson19baf832005-06-21 12:43:18 -07001102
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001103 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001104 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Lin Ming61648d92012-07-29 02:00:03 +00001105 put_child(tp, cindex, (struct rt_trie_node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001106 } else {
Eric Dumazetcf778b02012-01-12 04:41:32 +00001107 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001108 }
Alexander Duycke962f302014-12-10 21:49:22 -08001109
1110 tp = tn;
Robert Olsson19baf832005-06-21 12:43:18 -07001111 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001112
1113 if (tp && tp->pos + tp->bits > 32)
Joe Perches058bd4d2012-03-11 18:36:11 +00001114 pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1115 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001116
Robert Olsson19baf832005-06-21 12:43:18 -07001117 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001118
Jarek Poplawski7b855762009-06-18 00:28:51 -07001119 trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001120done:
Robert Olsson19baf832005-06-21 12:43:18 -07001121 return fa_head;
1122}
1123
Robert Olssond562f1f2007-03-26 14:22:22 -07001124/*
1125 * Caller must hold RTNL.
1126 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001127int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001128{
1129 struct trie *t = (struct trie *) tb->tb_data;
1130 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001131 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001132 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001133 int plen = cfg->fc_dst_len;
1134 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001135 u32 key, mask;
1136 int err;
1137 struct leaf *l;
1138
1139 if (plen > 32)
1140 return -EINVAL;
1141
Thomas Graf4e902c52006-08-17 18:14:52 -07001142 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001143
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001144 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001145
Olof Johansson91b9a272005-08-09 20:24:39 -07001146 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001147
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001148 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001149 return -EINVAL;
1150
1151 key = key & mask;
1152
Thomas Graf4e902c52006-08-17 18:14:52 -07001153 fi = fib_create_info(cfg);
1154 if (IS_ERR(fi)) {
1155 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001156 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001157 }
Robert Olsson19baf832005-06-21 12:43:18 -07001158
1159 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001160 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001161
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001162 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001163 fa_head = get_fa_head(l, plen);
1164 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1165 }
1166
1167 /* Now fa, if non-NULL, points to the first fib alias
1168 * with the same keys [prefix,tos,priority], if such key already
1169 * exists or to the node before which we will insert new one.
1170 *
1171 * If fa is NULL, we will need to allocate a new one and
1172 * insert to the head of f.
1173 *
1174 * If f is NULL, no fib node matched the destination key
1175 * and we need to allocate a new one of those as well.
1176 */
1177
Julian Anastasov936f6f82008-01-28 21:18:06 -08001178 if (fa && fa->fa_tos == tos &&
1179 fa->fa_info->fib_priority == fi->fib_priority) {
1180 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001181
1182 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001183 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001184 goto out;
1185
Julian Anastasov936f6f82008-01-28 21:18:06 -08001186 /* We have 2 goals:
1187 * 1. Find exact match for type, scope, fib_info to avoid
1188 * duplicate routes
1189 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1190 */
1191 fa_match = NULL;
1192 fa_first = fa;
1193 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1194 list_for_each_entry_continue(fa, fa_head, fa_list) {
1195 if (fa->fa_tos != tos)
1196 break;
1197 if (fa->fa_info->fib_priority != fi->fib_priority)
1198 break;
1199 if (fa->fa_type == cfg->fc_type &&
Julian Anastasov936f6f82008-01-28 21:18:06 -08001200 fa->fa_info == fi) {
1201 fa_match = fa;
1202 break;
1203 }
1204 }
1205
Thomas Graf4e902c52006-08-17 18:14:52 -07001206 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001207 struct fib_info *fi_drop;
1208 u8 state;
1209
Julian Anastasov936f6f82008-01-28 21:18:06 -08001210 fa = fa_first;
1211 if (fa_match) {
1212 if (fa == fa_match)
1213 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001214 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001215 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001216 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001217 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001218 if (new_fa == NULL)
1219 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001220
1221 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001222 new_fa->fa_tos = fa->fa_tos;
1223 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001224 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001225 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001226 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001227
Robert Olsson2373ce12005-08-25 13:01:29 -07001228 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1229 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001230
1231 fib_release_info(fi_drop);
1232 if (state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001233 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Milan Kocianb8f55832007-05-23 14:55:06 -07001234 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1235 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001236
Olof Johansson91b9a272005-08-09 20:24:39 -07001237 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001238 }
1239 /* Error if we find a perfect match which
1240 * uses the same scope, type, and nexthop
1241 * information.
1242 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001243 if (fa_match)
1244 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001245
Thomas Graf4e902c52006-08-17 18:14:52 -07001246 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001247 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001248 }
1249 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001250 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001251 goto out;
1252
1253 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001254 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001255 if (new_fa == NULL)
1256 goto out;
1257
1258 new_fa->fa_info = fi;
1259 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001260 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001261 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001262 /*
1263 * Insert new entry to the list.
1264 */
1265
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001266 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001267 fa_head = fib_insert_node(t, key, plen);
1268 if (unlikely(!fa_head)) {
1269 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001270 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001271 }
Robert Olssonf835e472005-06-28 15:00:39 -07001272 }
Robert Olsson19baf832005-06-21 12:43:18 -07001273
David S. Miller21d8c492011-04-14 14:49:37 -07001274 if (!plen)
1275 tb->tb_num_default++;
1276
Robert Olsson2373ce12005-08-25 13:01:29 -07001277 list_add_tail_rcu(&new_fa->fa_list,
1278 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001279
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001280 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Thomas Graf4e902c52006-08-17 18:14:52 -07001281 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001282 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001283succeeded:
1284 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001285
1286out_free_new_fa:
1287 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001288out:
1289 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001290err:
Robert Olsson19baf832005-06-21 12:43:18 -07001291 return err;
1292}
1293
Robert Olsson772cb712005-09-19 15:31:18 -07001294/* should be called with rcu_read_lock */
David S. Miller5b470442011-01-31 16:10:03 -08001295static int check_leaf(struct fib_table *tb, struct trie *t, struct leaf *l,
David S. Miller22bd5b92011-03-11 19:54:08 -05001296 t_key key, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001297 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001298{
Robert Olsson19baf832005-06-21 12:43:18 -07001299 struct leaf_info *li;
1300 struct hlist_head *hhead = &l->list;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001301
Sasha Levinb67bfe02013-02-27 17:06:00 -08001302 hlist_for_each_entry_rcu(li, hhead, hlist) {
David S. Miller3be06862011-03-07 15:01:10 -08001303 struct fib_alias *fa;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001304
Eric Dumazet5c745012011-07-18 03:16:33 +00001305 if (l->key != (key & li->mask_plen))
Robert Olsson19baf832005-06-21 12:43:18 -07001306 continue;
1307
David S. Miller3be06862011-03-07 15:01:10 -08001308 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1309 struct fib_info *fi = fa->fa_info;
1310 int nhsel, err;
1311
David S. Miller22bd5b92011-03-11 19:54:08 -05001312 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
David S. Miller3be06862011-03-07 15:01:10 -08001313 continue;
David S. Millerdccd9ecc2012-05-10 22:16:32 -04001314 if (fi->fib_dead)
1315 continue;
David S. Miller37e826c2011-03-24 18:06:47 -07001316 if (fa->fa_info->fib_scope < flp->flowi4_scope)
David S. Miller3be06862011-03-07 15:01:10 -08001317 continue;
1318 fib_alias_accessed(fa);
1319 err = fib_props[fa->fa_type].error;
1320 if (err) {
1321#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001322 this_cpu_inc(t->stats->semantic_match_passed);
David S. Miller3be06862011-03-07 15:01:10 -08001323#endif
Julian Anastasov1fbc7842011-03-25 20:33:23 -07001324 return err;
David S. Miller3be06862011-03-07 15:01:10 -08001325 }
1326 if (fi->fib_flags & RTNH_F_DEAD)
1327 continue;
1328 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1329 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1330
1331 if (nh->nh_flags & RTNH_F_DEAD)
1332 continue;
David S. Miller22bd5b92011-03-11 19:54:08 -05001333 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
David S. Miller3be06862011-03-07 15:01:10 -08001334 continue;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001335
Robert Olsson19baf832005-06-21 12:43:18 -07001336#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001337 this_cpu_inc(t->stats->semantic_match_passed);
Robert Olsson19baf832005-06-21 12:43:18 -07001338#endif
Eric Dumazet5c745012011-07-18 03:16:33 +00001339 res->prefixlen = li->plen;
David S. Miller3be06862011-03-07 15:01:10 -08001340 res->nh_sel = nhsel;
1341 res->type = fa->fa_type;
David S. Miller37e826c2011-03-24 18:06:47 -07001342 res->scope = fa->fa_info->fib_scope;
David S. Miller3be06862011-03-07 15:01:10 -08001343 res->fi = fi;
1344 res->table = tb;
1345 res->fa_head = &li->falh;
1346 if (!(fib_flags & FIB_LOOKUP_NOREF))
Eric Dumazet5c745012011-07-18 03:16:33 +00001347 atomic_inc(&fi->fib_clntref);
David S. Miller3be06862011-03-07 15:01:10 -08001348 return 0;
1349 }
1350 }
1351
1352#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001353 this_cpu_inc(t->stats->semantic_match_miss);
David S. Miller3be06862011-03-07 15:01:10 -08001354#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001355 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001356
Ben Hutchings2e655572008-07-10 16:52:52 -07001357 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001358}
1359
David S. Miller22bd5b92011-03-11 19:54:08 -05001360int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001361 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001362{
1363 struct trie *t = (struct trie *) tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001364#ifdef CONFIG_IP_FIB_TRIE_STATS
1365 struct trie_use_stats __percpu *stats = t->stats;
1366#endif
Ben Hutchings2e655572008-07-10 16:52:52 -07001367 int ret;
David S. Millerb299e4f2011-02-02 20:48:10 -08001368 struct rt_trie_node *n;
Robert Olsson19baf832005-06-21 12:43:18 -07001369 struct tnode *pn;
David S. Miller3b004562011-02-16 14:56:22 -08001370 unsigned int pos, bits;
David S. Miller22bd5b92011-03-11 19:54:08 -05001371 t_key key = ntohl(flp->daddr);
David S. Miller3b004562011-02-16 14:56:22 -08001372 unsigned int chopped_off;
Robert Olsson19baf832005-06-21 12:43:18 -07001373 t_key cindex = 0;
David S. Miller3b004562011-02-16 14:56:22 -08001374 unsigned int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001375 struct tnode *cn;
Eric Dumazet874ffa82010-10-13 06:56:11 +00001376 t_key pref_mismatch;
Olof Johansson91b9a272005-08-09 20:24:39 -07001377
Robert Olsson2373ce12005-08-25 13:01:29 -07001378 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001379
Robert Olsson2373ce12005-08-25 13:01:29 -07001380 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001381 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001382 goto failed;
1383
1384#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001385 this_cpu_inc(stats->gets);
Robert Olsson19baf832005-06-21 12:43:18 -07001386#endif
1387
1388 /* Just a leaf? */
1389 if (IS_LEAF(n)) {
David S. Miller5b470442011-01-31 16:10:03 -08001390 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001391 goto found;
Robert Olsson19baf832005-06-21 12:43:18 -07001392 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001393
Robert Olsson19baf832005-06-21 12:43:18 -07001394 pn = (struct tnode *) n;
1395 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001396
Olof Johansson91b9a272005-08-09 20:24:39 -07001397 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001398 pos = pn->pos;
1399 bits = pn->bits;
1400
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001401 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001402 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1403 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001404
Jarek Poplawskib902e572009-07-14 11:20:32 +00001405 n = tnode_get_child_rcu(pn, cindex);
Robert Olsson19baf832005-06-21 12:43:18 -07001406
1407 if (n == NULL) {
1408#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001409 this_cpu_inc(stats->null_node_hit);
Robert Olsson19baf832005-06-21 12:43:18 -07001410#endif
1411 goto backtrace;
1412 }
1413
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001414 if (IS_LEAF(n)) {
David S. Miller5b470442011-01-31 16:10:03 -08001415 ret = check_leaf(tb, t, (struct leaf *)n, key, flp, res, fib_flags);
Ben Hutchings2e655572008-07-10 16:52:52 -07001416 if (ret > 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07001417 goto backtrace;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001418 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001419 }
1420
Olof Johansson91b9a272005-08-09 20:24:39 -07001421 cn = (struct tnode *)n;
1422
1423 /*
1424 * It's a tnode, and we can do some extra checks here if we
1425 * like, to avoid descending into a dead-end branch.
1426 * This tnode is in the parent's child array at index
1427 * key[p_pos..p_pos+p_bits] but potentially with some bits
1428 * chopped off, so in reality the index may be just a
1429 * subprefix, padded with zero at the end.
1430 * We can also take a look at any skipped bits in this
1431 * tnode - everything up to p_pos is supposed to be ok,
1432 * and the non-chopped bits of the index (se previous
1433 * paragraph) are also guaranteed ok, but the rest is
1434 * considered unknown.
1435 *
1436 * The skipped bits are key[pos+bits..cn->pos].
1437 */
1438
1439 /* If current_prefix_length < pos+bits, we are already doing
1440 * actual prefix matching, which means everything from
1441 * pos+(bits-chopped_off) onward must be zero along some
1442 * branch of this subtree - otherwise there is *no* valid
1443 * prefix present. Here we can only check the skipped
1444 * bits. Remember, since we have already indexed into the
1445 * parent's child array, we know that the bits we chopped of
1446 * *are* zero.
1447 */
1448
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001449 /* NOTA BENE: Checking only skipped bits
1450 for the new node here */
Olof Johansson91b9a272005-08-09 20:24:39 -07001451
1452 if (current_prefix_length < pos+bits) {
1453 if (tkey_extract_bits(cn->key, current_prefix_length,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001454 cn->pos - current_prefix_length)
1455 || !(cn->child[0]))
Olof Johansson91b9a272005-08-09 20:24:39 -07001456 goto backtrace;
1457 }
1458
1459 /*
1460 * If chopped_off=0, the index is fully validated and we
1461 * only need to look at the skipped bits for this, the new,
1462 * tnode. What we actually want to do is to find out if
1463 * these skipped bits match our key perfectly, or if we will
1464 * have to count on finding a matching prefix further down,
1465 * because if we do, we would like to have some way of
1466 * verifying the existence of such a prefix at this point.
1467 */
1468
1469 /* The only thing we can do at this point is to verify that
1470 * any such matching prefix can indeed be a prefix to our
1471 * key, and if the bits in the node we are inspecting that
1472 * do not match our key are not ZERO, this cannot be true.
1473 * Thus, find out where there is a mismatch (before cn->pos)
1474 * and verify that all the mismatching bits are zero in the
1475 * new tnode's key.
1476 */
1477
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001478 /*
1479 * Note: We aren't very concerned about the piece of
1480 * the key that precede pn->pos+pn->bits, since these
1481 * have already been checked. The bits after cn->pos
1482 * aren't checked since these are by definition
1483 * "unknown" at this point. Thus, what we want to see
1484 * is if we are about to enter the "prefix matching"
1485 * state, and in that case verify that the skipped
1486 * bits that will prevail throughout this subtree are
1487 * zero, as they have to be if we are to find a
1488 * matching prefix.
Olof Johansson91b9a272005-08-09 20:24:39 -07001489 */
1490
Eric Dumazet874ffa82010-10-13 06:56:11 +00001491 pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001492
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001493 /*
1494 * In short: If skipped bits in this node do not match
1495 * the search key, enter the "prefix matching"
1496 * state.directly.
Olof Johansson91b9a272005-08-09 20:24:39 -07001497 */
1498 if (pref_mismatch) {
Eric Dumazet79cda752012-08-07 10:45:47 +00001499 /* fls(x) = __fls(x) + 1 */
1500 int mp = KEYLENGTH - __fls(pref_mismatch) - 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001501
Eric Dumazet874ffa82010-10-13 06:56:11 +00001502 if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07001503 goto backtrace;
1504
1505 if (current_prefix_length >= cn->pos)
1506 current_prefix_length = mp;
1507 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001508
Olof Johansson91b9a272005-08-09 20:24:39 -07001509 pn = (struct tnode *)n; /* Descend */
1510 chopped_off = 0;
1511 continue;
1512
Robert Olsson19baf832005-06-21 12:43:18 -07001513backtrace:
1514 chopped_off++;
1515
1516 /* As zero don't change the child key (cindex) */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001517 while ((chopped_off <= pn->bits)
1518 && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001519 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001520
1521 /* Decrease current_... with bits chopped off */
1522 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001523 current_prefix_length = pn->pos + pn->bits
1524 - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001525
Robert Olsson19baf832005-06-21 12:43:18 -07001526 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001527 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001528 * chopped off all bits in this tnode walk up to our parent.
1529 */
1530
Olof Johansson91b9a272005-08-09 20:24:39 -07001531 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001532 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001533 } else {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001534 struct tnode *parent = node_parent_rcu(pn);
Stephen Hemminger06801912007-08-10 15:22:13 -07001535 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001536 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001537
Robert Olsson19baf832005-06-21 12:43:18 -07001538 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001539 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1540 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001541 chopped_off = 0;
1542
1543#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001544 this_cpu_inc(stats->backtrack);
Robert Olsson19baf832005-06-21 12:43:18 -07001545#endif
1546 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001547 }
Robert Olsson19baf832005-06-21 12:43:18 -07001548 }
1549failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001550 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001551found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001552 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001553 return ret;
1554}
Florian Westphal6fc01432011-08-25 13:46:12 +02001555EXPORT_SYMBOL_GPL(fib_table_lookup);
Robert Olsson19baf832005-06-21 12:43:18 -07001556
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001557/*
1558 * Remove the leaf and return parent.
1559 */
1560static void trie_leaf_remove(struct trie *t, struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001561{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001562 struct tnode *tp = node_parent(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001563
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001564 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001565
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001566 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001567 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Lin Ming61648d92012-07-29 02:00:03 +00001568 put_child(tp, cindex, NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001569 trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001570 } else
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001571 RCU_INIT_POINTER(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001572
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001573 node_free(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001574}
1575
Robert Olssond562f1f2007-03-26 14:22:22 -07001576/*
1577 * Caller must hold RTNL.
1578 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001579int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001580{
1581 struct trie *t = (struct trie *) tb->tb_data;
1582 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001583 int plen = cfg->fc_dst_len;
1584 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001585 struct fib_alias *fa, *fa_to_delete;
1586 struct list_head *fa_head;
1587 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001588 struct leaf_info *li;
1589
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001590 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001591 return -EINVAL;
1592
Thomas Graf4e902c52006-08-17 18:14:52 -07001593 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001594 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001595
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001596 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001597 return -EINVAL;
1598
1599 key = key & mask;
1600 l = fib_find_node(t, key);
1601
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001602 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001603 return -ESRCH;
1604
Igor Maravicad5b3102012-08-13 10:26:08 +02001605 li = find_leaf_info(l, plen);
1606
1607 if (!li)
1608 return -ESRCH;
1609
1610 fa_head = &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -07001611 fa = fib_find_alias(fa_head, tos, 0);
1612
1613 if (!fa)
1614 return -ESRCH;
1615
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001616 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001617
1618 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001619 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1620 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001621 struct fib_info *fi = fa->fa_info;
1622
1623 if (fa->fa_tos != tos)
1624 break;
1625
Thomas Graf4e902c52006-08-17 18:14:52 -07001626 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1627 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
David S. Miller37e826c2011-03-24 18:06:47 -07001628 fa->fa_info->fib_scope == cfg->fc_scope) &&
Julian Anastasov74cb3c12011-03-19 12:13:46 +00001629 (!cfg->fc_prefsrc ||
1630 fi->fib_prefsrc == cfg->fc_prefsrc) &&
Thomas Graf4e902c52006-08-17 18:14:52 -07001631 (!cfg->fc_protocol ||
1632 fi->fib_protocol == cfg->fc_protocol) &&
1633 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001634 fa_to_delete = fa;
1635 break;
1636 }
1637 }
1638
Olof Johansson91b9a272005-08-09 20:24:39 -07001639 if (!fa_to_delete)
1640 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001641
Olof Johansson91b9a272005-08-09 20:24:39 -07001642 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001643 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001644 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001645
Robert Olsson2373ce12005-08-25 13:01:29 -07001646 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001647
David S. Miller21d8c492011-04-14 14:49:37 -07001648 if (!plen)
1649 tb->tb_num_default--;
1650
Olof Johansson91b9a272005-08-09 20:24:39 -07001651 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001652 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001653 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001654 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001655
1656 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001657 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001658
1659 if (fa->fa_state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001660 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Olof Johansson91b9a272005-08-09 20:24:39 -07001661
Robert Olsson2373ce12005-08-25 13:01:29 -07001662 fib_release_info(fa->fa_info);
1663 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001664 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001665}
1666
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001667static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001668{
1669 struct fib_alias *fa, *fa_node;
1670 int found = 0;
1671
1672 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1673 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001674
Robert Olsson2373ce12005-08-25 13:01:29 -07001675 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1676 list_del_rcu(&fa->fa_list);
1677 fib_release_info(fa->fa_info);
1678 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001679 found++;
1680 }
1681 }
1682 return found;
1683}
1684
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001685static int trie_flush_leaf(struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001686{
1687 int found = 0;
1688 struct hlist_head *lih = &l->list;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001689 struct hlist_node *tmp;
Robert Olsson19baf832005-06-21 12:43:18 -07001690 struct leaf_info *li = NULL;
1691
Sasha Levinb67bfe02013-02-27 17:06:00 -08001692 hlist_for_each_entry_safe(li, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001693 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001694
1695 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001696 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001697 free_leaf_info(li);
1698 }
1699 }
1700 return found;
1701}
1702
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001703/*
1704 * Scan for the next right leaf starting at node p->child[idx]
1705 * Since we have back pointer, no recursion necessary.
1706 */
David S. Millerb299e4f2011-02-02 20:48:10 -08001707static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001708{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001709 do {
1710 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001711
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001712 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001713 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001714 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001715 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001716
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001717 while (idx < 1u << p->bits) {
1718 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001719 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001720 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001721
Eric Dumazetaab515d2013-08-05 11:18:49 -07001722 if (IS_LEAF(c))
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001723 return (struct leaf *) c;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001724
1725 /* Rescan start scanning in new node */
1726 p = (struct tnode *) c;
1727 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001728 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001729
1730 /* Node empty, walk back up to parent */
David S. Millerb299e4f2011-02-02 20:48:10 -08001731 c = (struct rt_trie_node *) p;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001732 } while ((p = node_parent_rcu(c)) != NULL);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001733
1734 return NULL; /* Root of trie */
1735}
1736
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001737static struct leaf *trie_firstleaf(struct trie *t)
1738{
Eric Dumazeta034ee32010-09-09 23:32:28 +00001739 struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001740
1741 if (!n)
1742 return NULL;
1743
1744 if (IS_LEAF(n)) /* trie is just a leaf */
1745 return (struct leaf *) n;
1746
1747 return leaf_walk_rcu(n, NULL);
1748}
1749
1750static struct leaf *trie_nextleaf(struct leaf *l)
1751{
David S. Millerb299e4f2011-02-02 20:48:10 -08001752 struct rt_trie_node *c = (struct rt_trie_node *) l;
Jarek Poplawskib902e572009-07-14 11:20:32 +00001753 struct tnode *p = node_parent_rcu(c);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001754
1755 if (!p)
1756 return NULL; /* trie with just one leaf */
1757
1758 return leaf_walk_rcu(p, c);
Robert Olsson19baf832005-06-21 12:43:18 -07001759}
1760
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001761static struct leaf *trie_leafindex(struct trie *t, int index)
1762{
1763 struct leaf *l = trie_firstleaf(t);
1764
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001765 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001766 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001767
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001768 return l;
1769}
1770
1771
Robert Olssond562f1f2007-03-26 14:22:22 -07001772/*
1773 * Caller must hold RTNL.
1774 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001775int fib_table_flush(struct fib_table *tb)
Robert Olsson19baf832005-06-21 12:43:18 -07001776{
1777 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001778 struct leaf *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001779 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001780
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001781 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001782 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001783
1784 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001785 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001786 ll = l;
1787 }
1788
1789 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001790 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001791
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001792 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001793 return found;
1794}
1795
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001796void fib_free_table(struct fib_table *tb)
1797{
Alexander Duyck8274a972014-12-31 10:55:29 -08001798#ifdef CONFIG_IP_FIB_TRIE_STATS
1799 struct trie *t = (struct trie *)tb->tb_data;
1800
1801 free_percpu(t->stats);
1802#endif /* CONFIG_IP_FIB_TRIE_STATS */
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001803 kfree(tb);
1804}
1805
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001806static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1807 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001808 struct sk_buff *skb, struct netlink_callback *cb)
1809{
1810 int i, s_i;
1811 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001812 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001813
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001814 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001815 i = 0;
1816
Robert Olsson2373ce12005-08-25 13:01:29 -07001817 /* rcu_read_lock is hold by caller */
1818
1819 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001820 if (i < s_i) {
1821 i++;
1822 continue;
1823 }
Robert Olsson19baf832005-06-21 12:43:18 -07001824
Eric W. Biederman15e47302012-09-07 20:12:54 +00001825 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
Robert Olsson19baf832005-06-21 12:43:18 -07001826 cb->nlh->nlmsg_seq,
1827 RTM_NEWROUTE,
1828 tb->tb_id,
1829 fa->fa_type,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001830 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001831 plen,
1832 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001833 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001834 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001835 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001836 }
Robert Olsson19baf832005-06-21 12:43:18 -07001837 i++;
1838 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001839 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001840 return skb->len;
1841}
1842
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001843static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1844 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001845{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001846 struct leaf_info *li;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001847 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001848
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001849 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001850 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001851
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001852 /* rcu_read_lock is hold by caller */
Sasha Levinb67bfe02013-02-27 17:06:00 -08001853 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001854 if (i < s_i) {
1855 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001856 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001857 }
Robert Olsson19baf832005-06-21 12:43:18 -07001858
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001859 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001860 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001861
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001862 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001863 continue;
1864
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001865 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001866 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001867 return -1;
1868 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001869 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001870 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001871
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001872 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001873 return skb->len;
1874}
1875
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001876int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1877 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001878{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001879 struct leaf *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001880 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001881 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001882 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001883
Robert Olsson2373ce12005-08-25 13:01:29 -07001884 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001885 /* Dump starting at last key.
1886 * Note: 0.0.0.0/0 (ie default) is first key.
1887 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001888 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001889 l = trie_firstleaf(t);
1890 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001891 /* Normally, continue from last key, but if that is missing
1892 * fallback to using slow rescan
1893 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001894 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001895 if (!l)
1896 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001897 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001898
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001899 while (l) {
1900 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001901 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001902 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001903 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001904 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001905 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001906
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001907 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001908 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001909 memset(&cb->args[4], 0,
1910 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001911 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001912 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07001913 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001914
Robert Olsson19baf832005-06-21 12:43:18 -07001915 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07001916}
1917
David S. Miller5348ba82011-02-01 15:30:56 -08001918void __init fib_trie_init(void)
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001919{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001920 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1921 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001922 0, SLAB_PANIC, NULL);
1923
1924 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
1925 max(sizeof(struct leaf),
1926 sizeof(struct leaf_info)),
1927 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001928}
Robert Olsson19baf832005-06-21 12:43:18 -07001929
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001930
David S. Miller5348ba82011-02-01 15:30:56 -08001931struct fib_table *fib_trie_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001932{
1933 struct fib_table *tb;
1934 struct trie *t;
1935
Robert Olsson19baf832005-06-21 12:43:18 -07001936 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1937 GFP_KERNEL);
1938 if (tb == NULL)
1939 return NULL;
1940
1941 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08001942 tb->tb_default = -1;
David S. Miller21d8c492011-04-14 14:49:37 -07001943 tb->tb_num_default = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001944
1945 t = (struct trie *) tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001946 RCU_INIT_POINTER(t->trie, NULL);
1947#ifdef CONFIG_IP_FIB_TRIE_STATS
1948 t->stats = alloc_percpu(struct trie_use_stats);
1949 if (!t->stats) {
1950 kfree(tb);
1951 tb = NULL;
1952 }
1953#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001954
Robert Olsson19baf832005-06-21 12:43:18 -07001955 return tb;
1956}
1957
Robert Olsson19baf832005-06-21 12:43:18 -07001958#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001959/* Depth first Trie walk iterator */
1960struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08001961 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001962 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001963 struct tnode *tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001964 unsigned int index;
1965 unsigned int depth;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001966};
Robert Olsson19baf832005-06-21 12:43:18 -07001967
David S. Millerb299e4f2011-02-02 20:48:10 -08001968static struct rt_trie_node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001969{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001970 struct tnode *tn = iter->tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001971 unsigned int cindex = iter->index;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001972 struct tnode *p;
1973
Eric W. Biederman6640e692007-01-24 14:42:04 -08001974 /* A single entry routing table */
1975 if (!tn)
1976 return NULL;
1977
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001978 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1979 iter->tnode, iter->index, iter->depth);
1980rescan:
1981 while (cindex < (1<<tn->bits)) {
David S. Millerb299e4f2011-02-02 20:48:10 -08001982 struct rt_trie_node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001983
1984 if (n) {
1985 if (IS_LEAF(n)) {
1986 iter->tnode = tn;
1987 iter->index = cindex + 1;
1988 } else {
1989 /* push down one level */
1990 iter->tnode = (struct tnode *) n;
1991 iter->index = 0;
1992 ++iter->depth;
1993 }
1994 return n;
1995 }
1996
1997 ++cindex;
1998 }
1999
2000 /* Current node exhausted, pop back up */
David S. Millerb299e4f2011-02-02 20:48:10 -08002001 p = node_parent_rcu((struct rt_trie_node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002002 if (p) {
2003 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2004 tn = p;
2005 --iter->depth;
2006 goto rescan;
2007 }
2008
2009 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002010 return NULL;
2011}
2012
David S. Millerb299e4f2011-02-02 20:48:10 -08002013static struct rt_trie_node *fib_trie_get_first(struct fib_trie_iter *iter,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002014 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002015{
David S. Millerb299e4f2011-02-02 20:48:10 -08002016 struct rt_trie_node *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002017
Stephen Hemminger132adf52007-03-08 20:44:43 -08002018 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002019 return NULL;
2020
2021 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002022 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002023 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002024
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002025 if (IS_TNODE(n)) {
2026 iter->tnode = (struct tnode *) n;
2027 iter->index = 0;
2028 iter->depth = 1;
2029 } else {
2030 iter->tnode = NULL;
2031 iter->index = 0;
2032 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002033 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002034
2035 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002036}
2037
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002038static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002039{
David S. Millerb299e4f2011-02-02 20:48:10 -08002040 struct rt_trie_node *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002041 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002042
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002043 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002044
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002045 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002046 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002047 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08002048 struct leaf *l = (struct leaf *)n;
2049 struct leaf_info *li;
Stephen Hemminger93672292008-01-22 21:54:05 -08002050
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002051 s->leaves++;
2052 s->totdepth += iter.depth;
2053 if (iter.depth > s->maxdepth)
2054 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08002055
Sasha Levinb67bfe02013-02-27 17:06:00 -08002056 hlist_for_each_entry_rcu(li, &l->list, hlist)
Stephen Hemminger93672292008-01-22 21:54:05 -08002057 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002058 } else {
2059 const struct tnode *tn = (const struct tnode *) n;
2060 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002061
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002062 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002063 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002064 s->nodesizes[tn->bits]++;
2065
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002066 for (i = 0; i < (1<<tn->bits); i++)
2067 if (!tn->child[i])
2068 s->nullpointers++;
2069 }
2070 }
2071 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002072}
2073
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002074/*
Robert Olsson19baf832005-06-21 12:43:18 -07002075 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002076 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002077static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002078{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002079 unsigned int i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002080
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002081 if (stat->leaves)
2082 avdepth = stat->totdepth*100 / stat->leaves;
2083 else
2084 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002085
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002086 seq_printf(seq, "\tAver depth: %u.%02d\n",
2087 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002088 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002089
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002090 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002091 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08002092
2093 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2094 bytes += sizeof(struct leaf_info) * stat->prefixes;
2095
Stephen Hemminger187b5182008-01-12 20:55:55 -08002096 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002097 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002098
Robert Olsson06ef9212006-03-20 21:35:01 -08002099 max = MAX_STAT_DEPTH;
2100 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002101 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002102
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002103 pointers = 0;
Jerry Snitselaarf585a992013-07-22 12:01:58 -07002104 for (i = 1; i < max; i++)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002105 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002106 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002107 pointers += (1<<i) * stat->nodesizes[i];
2108 }
2109 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002110 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002111
David S. Millerb299e4f2011-02-02 20:48:10 -08002112 bytes += sizeof(struct rt_trie_node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002113 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2114 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002115}
Robert Olsson19baf832005-06-21 12:43:18 -07002116
2117#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002118static void trie_show_usage(struct seq_file *seq,
Alexander Duyck8274a972014-12-31 10:55:29 -08002119 const struct trie_use_stats __percpu *stats)
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002120{
Alexander Duyck8274a972014-12-31 10:55:29 -08002121 struct trie_use_stats s = { 0 };
2122 int cpu;
2123
2124 /* loop through all of the CPUs and gather up the stats */
2125 for_each_possible_cpu(cpu) {
2126 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2127
2128 s.gets += pcpu->gets;
2129 s.backtrack += pcpu->backtrack;
2130 s.semantic_match_passed += pcpu->semantic_match_passed;
2131 s.semantic_match_miss += pcpu->semantic_match_miss;
2132 s.null_node_hit += pcpu->null_node_hit;
2133 s.resize_node_skipped += pcpu->resize_node_skipped;
2134 }
2135
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002136 seq_printf(seq, "\nCounters:\n---------\n");
Alexander Duyck8274a972014-12-31 10:55:29 -08002137 seq_printf(seq, "gets = %u\n", s.gets);
2138 seq_printf(seq, "backtracks = %u\n", s.backtrack);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002139 seq_printf(seq, "semantic match passed = %u\n",
Alexander Duyck8274a972014-12-31 10:55:29 -08002140 s.semantic_match_passed);
2141 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2142 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2143 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002144}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002145#endif /* CONFIG_IP_FIB_TRIE_STATS */
2146
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002147static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002148{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002149 if (tb->tb_id == RT_TABLE_LOCAL)
2150 seq_puts(seq, "Local:\n");
2151 else if (tb->tb_id == RT_TABLE_MAIN)
2152 seq_puts(seq, "Main:\n");
2153 else
2154 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002155}
Robert Olsson19baf832005-06-21 12:43:18 -07002156
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002157
Robert Olsson19baf832005-06-21 12:43:18 -07002158static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2159{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002160 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002161 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002162
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002163 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002164 "Basic info: size of leaf:"
2165 " %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002166 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002167
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002168 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2169 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002170 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002171
Sasha Levinb67bfe02013-02-27 17:06:00 -08002172 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002173 struct trie *t = (struct trie *) tb->tb_data;
2174 struct trie_stat stat;
2175
2176 if (!t)
2177 continue;
2178
2179 fib_table_print(seq, tb);
2180
2181 trie_collect_stats(t, &stat);
2182 trie_show_stats(seq, &stat);
2183#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08002184 trie_show_usage(seq, t->stats);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002185#endif
2186 }
2187 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002188
Robert Olsson19baf832005-06-21 12:43:18 -07002189 return 0;
2190}
2191
Robert Olsson19baf832005-06-21 12:43:18 -07002192static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2193{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002194 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002195}
2196
Arjan van de Ven9a321442007-02-12 00:55:35 -08002197static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002198 .owner = THIS_MODULE,
2199 .open = fib_triestat_seq_open,
2200 .read = seq_read,
2201 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002202 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002203};
2204
David S. Millerb299e4f2011-02-02 20:48:10 -08002205static struct rt_trie_node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002206{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002207 struct fib_trie_iter *iter = seq->private;
2208 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002209 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002210 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002211
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002212 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2213 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002214 struct fib_table *tb;
2215
Sasha Levinb67bfe02013-02-27 17:06:00 -08002216 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
David S. Millerb299e4f2011-02-02 20:48:10 -08002217 struct rt_trie_node *n;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002218
2219 for (n = fib_trie_get_first(iter,
2220 (struct trie *) tb->tb_data);
2221 n; n = fib_trie_get_next(iter))
2222 if (pos == idx++) {
2223 iter->tb = tb;
2224 return n;
2225 }
2226 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002227 }
Robert Olsson19baf832005-06-21 12:43:18 -07002228
Robert Olsson19baf832005-06-21 12:43:18 -07002229 return NULL;
2230}
2231
2232static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002233 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002234{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002235 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002236 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002237}
2238
2239static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2240{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002241 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002242 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002243 struct fib_table *tb = iter->tb;
2244 struct hlist_node *tb_node;
2245 unsigned int h;
David S. Millerb299e4f2011-02-02 20:48:10 -08002246 struct rt_trie_node *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002247
Robert Olsson19baf832005-06-21 12:43:18 -07002248 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002249 /* next node in same table */
2250 n = fib_trie_get_next(iter);
2251 if (n)
2252 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002253
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002254 /* walk rest of this hash chain */
2255 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
Eric Dumazet0a5c0472011-03-31 01:51:35 -07002256 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002257 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2258 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2259 if (n)
2260 goto found;
2261 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002262
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002263 /* new hash chain */
2264 while (++h < FIB_TABLE_HASHSZ) {
2265 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Sasha Levinb67bfe02013-02-27 17:06:00 -08002266 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002267 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2268 if (n)
2269 goto found;
2270 }
2271 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002272 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002273
2274found:
2275 iter->tb = tb;
2276 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002277}
2278
2279static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002280 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002281{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002282 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002283}
2284
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002285static void seq_indent(struct seq_file *seq, int n)
2286{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002287 while (n-- > 0)
2288 seq_puts(seq, " ");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002289}
Robert Olsson19baf832005-06-21 12:43:18 -07002290
Eric Dumazet28d36e32008-01-14 23:09:56 -08002291static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002292{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002293 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002294 case RT_SCOPE_UNIVERSE: return "universe";
2295 case RT_SCOPE_SITE: return "site";
2296 case RT_SCOPE_LINK: return "link";
2297 case RT_SCOPE_HOST: return "host";
2298 case RT_SCOPE_NOWHERE: return "nowhere";
2299 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002300 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002301 return buf;
2302 }
2303}
2304
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -07002305static const char *const rtn_type_names[__RTN_MAX] = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002306 [RTN_UNSPEC] = "UNSPEC",
2307 [RTN_UNICAST] = "UNICAST",
2308 [RTN_LOCAL] = "LOCAL",
2309 [RTN_BROADCAST] = "BROADCAST",
2310 [RTN_ANYCAST] = "ANYCAST",
2311 [RTN_MULTICAST] = "MULTICAST",
2312 [RTN_BLACKHOLE] = "BLACKHOLE",
2313 [RTN_UNREACHABLE] = "UNREACHABLE",
2314 [RTN_PROHIBIT] = "PROHIBIT",
2315 [RTN_THROW] = "THROW",
2316 [RTN_NAT] = "NAT",
2317 [RTN_XRESOLVE] = "XRESOLVE",
2318};
2319
Eric Dumazeta034ee32010-09-09 23:32:28 +00002320static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002321{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002322 if (t < __RTN_MAX && rtn_type_names[t])
2323 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002324 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002325 return buf;
2326}
2327
2328/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002329static int fib_trie_seq_show(struct seq_file *seq, void *v)
2330{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002331 const struct fib_trie_iter *iter = seq->private;
David S. Millerb299e4f2011-02-02 20:48:10 -08002332 struct rt_trie_node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002333
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002334 if (!node_parent_rcu(n))
2335 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002336
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002337 if (IS_TNODE(n)) {
2338 struct tnode *tn = (struct tnode *) n;
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08002339 __be32 prf = htonl(tn->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002340
Robert Olsson1d25cd62005-09-19 15:29:52 -07002341 seq_indent(seq, iter->depth-1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002342 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
2343 &prf, tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002344 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002345
Olof Johansson91b9a272005-08-09 20:24:39 -07002346 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002347 struct leaf *l = (struct leaf *) n;
Stephen Hemminger13280422008-01-22 21:54:37 -08002348 struct leaf_info *li;
Al Viro32ab5f82006-09-26 22:21:45 -07002349 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002350
2351 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002352 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002353
Sasha Levinb67bfe02013-02-27 17:06:00 -08002354 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemminger13280422008-01-22 21:54:37 -08002355 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002356
Stephen Hemminger13280422008-01-22 21:54:37 -08002357 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2358 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002359
Stephen Hemminger13280422008-01-22 21:54:37 -08002360 seq_indent(seq, iter->depth+1);
2361 seq_printf(seq, " /%d %s %s", li->plen,
2362 rtn_scope(buf1, sizeof(buf1),
David S. Miller37e826c2011-03-24 18:06:47 -07002363 fa->fa_info->fib_scope),
Stephen Hemminger13280422008-01-22 21:54:37 -08002364 rtn_type(buf2, sizeof(buf2),
2365 fa->fa_type));
2366 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002367 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002368 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002369 }
2370 }
Robert Olsson19baf832005-06-21 12:43:18 -07002371 }
2372
2373 return 0;
2374}
2375
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002376static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002377 .start = fib_trie_seq_start,
2378 .next = fib_trie_seq_next,
2379 .stop = fib_trie_seq_stop,
2380 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002381};
2382
2383static int fib_trie_seq_open(struct inode *inode, struct file *file)
2384{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002385 return seq_open_net(inode, file, &fib_trie_seq_ops,
2386 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002387}
2388
Arjan van de Ven9a321442007-02-12 00:55:35 -08002389static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002390 .owner = THIS_MODULE,
2391 .open = fib_trie_seq_open,
2392 .read = seq_read,
2393 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002394 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002395};
2396
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002397struct fib_route_iter {
2398 struct seq_net_private p;
2399 struct trie *main_trie;
2400 loff_t pos;
2401 t_key key;
2402};
2403
2404static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
2405{
2406 struct leaf *l = NULL;
2407 struct trie *t = iter->main_trie;
2408
2409 /* use cache location of last found key */
2410 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2411 pos -= iter->pos;
2412 else {
2413 iter->pos = 0;
2414 l = trie_firstleaf(t);
2415 }
2416
2417 while (l && pos-- > 0) {
2418 iter->pos++;
2419 l = trie_nextleaf(l);
2420 }
2421
2422 if (l)
2423 iter->key = pos; /* remember it */
2424 else
2425 iter->pos = 0; /* forget it */
2426
2427 return l;
2428}
2429
2430static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2431 __acquires(RCU)
2432{
2433 struct fib_route_iter *iter = seq->private;
2434 struct fib_table *tb;
2435
2436 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002437 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002438 if (!tb)
2439 return NULL;
2440
2441 iter->main_trie = (struct trie *) tb->tb_data;
2442 if (*pos == 0)
2443 return SEQ_START_TOKEN;
2444 else
2445 return fib_route_get_idx(iter, *pos - 1);
2446}
2447
2448static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2449{
2450 struct fib_route_iter *iter = seq->private;
2451 struct leaf *l = v;
2452
2453 ++*pos;
2454 if (v == SEQ_START_TOKEN) {
2455 iter->pos = 0;
2456 l = trie_firstleaf(iter->main_trie);
2457 } else {
2458 iter->pos++;
2459 l = trie_nextleaf(l);
2460 }
2461
2462 if (l)
2463 iter->key = l->key;
2464 else
2465 iter->pos = 0;
2466 return l;
2467}
2468
2469static void fib_route_seq_stop(struct seq_file *seq, void *v)
2470 __releases(RCU)
2471{
2472 rcu_read_unlock();
2473}
2474
Eric Dumazeta034ee32010-09-09 23:32:28 +00002475static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002476{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002477 unsigned int flags = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002478
Eric Dumazeta034ee32010-09-09 23:32:28 +00002479 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2480 flags = RTF_REJECT;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002481 if (fi && fi->fib_nh->nh_gw)
2482 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002483 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002484 flags |= RTF_HOST;
2485 flags |= RTF_UP;
2486 return flags;
2487}
2488
2489/*
2490 * This outputs /proc/net/route.
2491 * The format of the file is not supposed to be changed
Eric Dumazeta034ee32010-09-09 23:32:28 +00002492 * and needs to be same as fib_hash output to avoid breaking
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002493 * legacy utilities
2494 */
2495static int fib_route_seq_show(struct seq_file *seq, void *v)
2496{
2497 struct leaf *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002498 struct leaf_info *li;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002499
2500 if (v == SEQ_START_TOKEN) {
2501 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2502 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2503 "\tWindow\tIRTT");
2504 return 0;
2505 }
2506
Sasha Levinb67bfe02013-02-27 17:06:00 -08002507 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002508 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002509 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002510
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002511 mask = inet_make_mask(li->plen);
2512 prefix = htonl(l->key);
2513
2514 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002515 const struct fib_info *fi = fa->fa_info;
Eric Dumazeta034ee32010-09-09 23:32:28 +00002516 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002517
2518 if (fa->fa_type == RTN_BROADCAST
2519 || fa->fa_type == RTN_MULTICAST)
2520 continue;
2521
Tetsuo Handa652586d2013-11-14 14:31:57 -08002522 seq_setwidth(seq, 127);
2523
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002524 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002525 seq_printf(seq,
2526 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002527 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002528 fi->fib_dev ? fi->fib_dev->name : "*",
2529 prefix,
2530 fi->fib_nh->nh_gw, flags, 0, 0,
2531 fi->fib_priority,
2532 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002533 (fi->fib_advmss ?
2534 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002535 fi->fib_window,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002536 fi->fib_rtt >> 3);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002537 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002538 seq_printf(seq,
2539 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002540 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002541 prefix, 0, flags, 0, 0, 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002542 mask, 0, 0, 0);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002543
Tetsuo Handa652586d2013-11-14 14:31:57 -08002544 seq_pad(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002545 }
2546 }
2547
2548 return 0;
2549}
2550
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002551static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002552 .start = fib_route_seq_start,
2553 .next = fib_route_seq_next,
2554 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002555 .show = fib_route_seq_show,
2556};
2557
2558static int fib_route_seq_open(struct inode *inode, struct file *file)
2559{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002560 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002561 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002562}
2563
Arjan van de Ven9a321442007-02-12 00:55:35 -08002564static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002565 .owner = THIS_MODULE,
2566 .open = fib_route_seq_open,
2567 .read = seq_read,
2568 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002569 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002570};
2571
Denis V. Lunev61a02652008-01-10 03:21:09 -08002572int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002573{
Gao fengd4beaa62013-02-18 01:34:54 +00002574 if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002575 goto out1;
2576
Gao fengd4beaa62013-02-18 01:34:54 +00002577 if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
2578 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002579 goto out2;
2580
Gao fengd4beaa62013-02-18 01:34:54 +00002581 if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002582 goto out3;
2583
Robert Olsson19baf832005-06-21 12:43:18 -07002584 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002585
2586out3:
Gao fengece31ff2013-02-18 01:34:56 +00002587 remove_proc_entry("fib_triestat", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002588out2:
Gao fengece31ff2013-02-18 01:34:56 +00002589 remove_proc_entry("fib_trie", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002590out1:
2591 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002592}
2593
Denis V. Lunev61a02652008-01-10 03:21:09 -08002594void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002595{
Gao fengece31ff2013-02-18 01:34:56 +00002596 remove_proc_entry("fib_trie", net->proc_net);
2597 remove_proc_entry("fib_triestat", net->proc_net);
2598 remove_proc_entry("route", net->proc_net);
Robert Olsson19baf832005-06-21 12:43:18 -07002599}
2600
2601#endif /* CONFIG_PROC_FS */