blob: 63c2fa7b68c4b922840fe883bd9bdf0e9a6c21c7 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Robert Olsson05eee482007-03-19 16:27:37 -070051#define VERSION "0.408"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
54#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070055#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070056#include <linux/types.h>
57#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070058#include <linux/mm.h>
59#include <linux/string.h>
60#include <linux/socket.h>
61#include <linux/sockios.h>
62#include <linux/errno.h>
63#include <linux/in.h>
64#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080065#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070066#include <linux/netdevice.h>
67#include <linux/if_arp.h>
68#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070069#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070070#include <linux/skbuff.h>
71#include <linux/netlink.h>
72#include <linux/init.h>
73#include <linux/list.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020074#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070075#include <net/ip.h>
76#include <net/protocol.h>
77#include <net/route.h>
78#include <net/tcp.h>
79#include <net/sock.h>
80#include <net/ip_fib.h>
81#include "fib_lookup.h"
82
Robert Olsson06ef9212006-03-20 21:35:01 -080083#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070084
Robert Olsson19baf832005-06-21 12:43:18 -070085#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
Robert Olsson2373ce12005-08-25 13:01:29 -070092#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
93
Olof Johansson91b9a272005-08-09 20:24:39 -070094#define IS_TNODE(n) (!(n->parent & T_LEAF))
95#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -070096
97struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -070098 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -080099 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700100};
101
102struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700103 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -0800104 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700105 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700106 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700107};
108
109struct leaf_info {
110 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700111 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700112 int plen;
113 struct list_head falh;
114};
115
116struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700117 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -0800118 t_key key;
Eric Dumazet112d8cf2008-01-12 21:27:41 -0800119 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
120 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
Eric Dumazet8d965442008-01-13 22:31:44 -0800121 unsigned int full_children; /* KEYLENGTH bits needed */
122 unsigned int empty_children; /* KEYLENGTH bits needed */
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700123 union {
124 struct rcu_head rcu;
125 struct work_struct work;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700126 struct tnode *tnode_free;
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700127 };
Olof Johansson91b9a272005-08-09 20:24:39 -0700128 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700129};
130
131#ifdef CONFIG_IP_FIB_TRIE_STATS
132struct trie_use_stats {
133 unsigned int gets;
134 unsigned int backtrack;
135 unsigned int semantic_match_passed;
136 unsigned int semantic_match_miss;
137 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700138 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800148 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800149 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700150};
Robert Olsson19baf832005-06-21 12:43:18 -0700151
152struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700153 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700154#ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700157};
158
Robert Olsson19baf832005-06-21 12:43:18 -0700159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
161 int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700162static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700163static struct tnode *inflate(struct trie *t, struct tnode *tn);
164static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head;
Robert Olsson19baf832005-06-21 12:43:18 -0700167
Christoph Lametere18b8902006-12-06 20:33:20 -0800168static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800169static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700170
Stephen Hemminger06801912007-08-10 15:22:13 -0700171static inline struct tnode *node_parent(struct node *node)
172{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800173 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
174}
Stephen Hemminger06801912007-08-10 15:22:13 -0700175
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800176static inline struct tnode *node_parent_rcu(struct node *node)
177{
178 struct tnode *ret = node_parent(node);
179
Stephen Hemminger06801912007-08-10 15:22:13 -0700180 return rcu_dereference(ret);
181}
182
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700183/* Same as rcu_assign_pointer
184 * but that macro() assumes that value is a pointer.
185 */
Stephen Hemminger06801912007-08-10 15:22:13 -0700186static inline void node_set_parent(struct node *node, struct tnode *ptr)
187{
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700188 smp_wmb();
189 node->parent = (unsigned long)ptr | NODE_TYPE(node);
Stephen Hemminger06801912007-08-10 15:22:13 -0700190}
Robert Olsson2373ce12005-08-25 13:01:29 -0700191
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800192static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700193{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800194 BUG_ON(i >= 1U << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700195
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800196 return tn->child[i];
197}
198
199static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
200{
201 struct node *ret = tnode_get_child(tn, i);
202
203 return rcu_dereference(ret);
Robert Olsson19baf832005-06-21 12:43:18 -0700204}
205
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700206static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700207{
Olof Johansson91b9a272005-08-09 20:24:39 -0700208 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700209}
210
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700211static inline t_key mask_pfx(t_key k, unsigned short l)
212{
213 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
214}
215
Robert Olsson19baf832005-06-21 12:43:18 -0700216static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
217{
Olof Johansson91b9a272005-08-09 20:24:39 -0700218 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700219 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700220 else
Robert Olsson19baf832005-06-21 12:43:18 -0700221 return 0;
222}
223
224static inline int tkey_equals(t_key a, t_key b)
225{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700226 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700227}
228
229static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
230{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700231 if (bits == 0 || offset >= KEYLENGTH)
232 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700233 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
234 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700235}
Robert Olsson19baf832005-06-21 12:43:18 -0700236
237static inline int tkey_mismatch(t_key a, int offset, t_key b)
238{
239 t_key diff = a ^ b;
240 int i = offset;
241
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700242 if (!diff)
243 return 0;
244 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700245 i++;
246 return i;
247}
248
Robert Olsson19baf832005-06-21 12:43:18 -0700249/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900250 To understand this stuff, an understanding of keys and all their bits is
251 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700252 all of the bits in that key are significant.
253
254 Consider a node 'n' and its parent 'tp'.
255
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900256 If n is a leaf, every bit in its key is significant. Its presence is
257 necessitated by path compression, since during a tree traversal (when
258 searching for a leaf - unless we are doing an insertion) we will completely
259 ignore all skipped bits we encounter. Thus we need to verify, at the end of
260 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700261 correct key path.
262
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900263 Note that we can never "miss" the correct key in the tree if present by
264 following the wrong path. Path compression ensures that segments of the key
265 that are the same for all keys with a given prefix are skipped, but the
266 skipped part *is* identical for each node in the subtrie below the skipped
267 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700268 call to tkey_sub_equals() in trie_insert().
269
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900270 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700271 have many different meanings.
272
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900273 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700274 _________________________________________________________________
275 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
276 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700278
279 _________________________________________________________________
280 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
281 -----------------------------------------------------------------
282 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
283
284 tp->pos = 7
285 tp->bits = 3
286 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700287 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700288
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900289 First, let's just ignore the bits that come before the parent tp, that is
290 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700291 not use them for anything.
292
293 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900294 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700295 'n' among tp's children.
296
297 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
298 for the node n.
299
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900300 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700301 of the bits are really not needed or indeed known in n->key.
302
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900303 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700304 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900305
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700306
Robert Olsson19baf832005-06-21 12:43:18 -0700307 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
308 at this point.
309
310*/
311
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700312static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700313{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700314 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700315}
316
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800317static const int halve_threshold = 25;
318static const int inflate_threshold = 50;
Jarek Poplawski345aa032009-07-07 19:39:16 -0700319static const int halve_threshold_root = 15;
320static const int inflate_threshold_root = 25;
Robert Olsson19baf832005-06-21 12:43:18 -0700321
Robert Olsson2373ce12005-08-25 13:01:29 -0700322
323static void __alias_free_mem(struct rcu_head *head)
324{
325 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
326 kmem_cache_free(fn_alias_kmem, fa);
327}
328
329static inline void alias_free_mem_rcu(struct fib_alias *fa)
330{
331 call_rcu(&fa->rcu, __alias_free_mem);
332}
333
334static void __leaf_free_rcu(struct rcu_head *head)
335{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800336 struct leaf *l = container_of(head, struct leaf, rcu);
337 kmem_cache_free(trie_leaf_kmem, l);
Robert Olsson2373ce12005-08-25 13:01:29 -0700338}
339
Stephen Hemminger387a5482008-04-10 03:47:34 -0700340static inline void free_leaf(struct leaf *l)
341{
342 call_rcu_bh(&l->rcu, __leaf_free_rcu);
343}
344
Robert Olsson2373ce12005-08-25 13:01:29 -0700345static void __leaf_info_free_rcu(struct rcu_head *head)
346{
347 kfree(container_of(head, struct leaf_info, rcu));
348}
349
350static inline void free_leaf_info(struct leaf_info *leaf)
351{
352 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
353}
354
Eric Dumazet8d965442008-01-13 22:31:44 -0800355static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700356{
Robert Olsson2373ce12005-08-25 13:01:29 -0700357 if (size <= PAGE_SIZE)
Eric Dumazet8d965442008-01-13 22:31:44 -0800358 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700359 else
360 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
361}
Robert Olsson2373ce12005-08-25 13:01:29 -0700362
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700363static void __tnode_vfree(struct work_struct *arg)
364{
365 struct tnode *tn = container_of(arg, struct tnode, work);
366 vfree(tn);
Robert Olsson2373ce12005-08-25 13:01:29 -0700367}
368
369static void __tnode_free_rcu(struct rcu_head *head)
370{
371 struct tnode *tn = container_of(head, struct tnode, rcu);
Eric Dumazet8d965442008-01-13 22:31:44 -0800372 size_t size = sizeof(struct tnode) +
373 (sizeof(struct node *) << tn->bits);
Robert Olsson2373ce12005-08-25 13:01:29 -0700374
375 if (size <= PAGE_SIZE)
376 kfree(tn);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700377 else {
378 INIT_WORK(&tn->work, __tnode_vfree);
379 schedule_work(&tn->work);
380 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700381}
382
383static inline void tnode_free(struct tnode *tn)
384{
Stephen Hemminger387a5482008-04-10 03:47:34 -0700385 if (IS_LEAF(tn))
386 free_leaf((struct leaf *) tn);
387 else
Robert Olsson550e29b2006-04-04 12:53:35 -0700388 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700389}
390
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700391static void tnode_free_safe(struct tnode *tn)
392{
393 BUG_ON(IS_LEAF(tn));
Jarek Poplawski7b855762009-06-18 00:28:51 -0700394 tn->tnode_free = tnode_free_head;
395 tnode_free_head = tn;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700396}
397
398static void tnode_free_flush(void)
399{
400 struct tnode *tn;
401
402 while ((tn = tnode_free_head)) {
403 tnode_free_head = tn->tnode_free;
404 tn->tnode_free = NULL;
405 tnode_free(tn);
406 }
407}
408
Robert Olsson19baf832005-06-21 12:43:18 -0700409static struct leaf *leaf_new(void)
410{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800411 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700412 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700413 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700414 INIT_HLIST_HEAD(&l->list);
415 }
416 return l;
417}
418
419static struct leaf_info *leaf_info_new(int plen)
420{
421 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700422 if (li) {
423 li->plen = plen;
424 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700425 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700426 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700427}
428
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800429static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700430{
Eric Dumazet8d965442008-01-13 22:31:44 -0800431 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700432 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700433
Olof Johansson91b9a272005-08-09 20:24:39 -0700434 if (tn) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700435 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700436 tn->pos = pos;
437 tn->bits = bits;
438 tn->key = key;
439 tn->full_children = 0;
440 tn->empty_children = 1<<bits;
441 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700442
Eric Dumazet8d965442008-01-13 22:31:44 -0800443 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
444 (unsigned long) (sizeof(struct node) << bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700445 return tn;
446}
447
Robert Olsson19baf832005-06-21 12:43:18 -0700448/*
449 * Check whether a tnode 'n' is "full", i.e. it is an internal node
450 * and no bits are skipped. See discussion in dyntree paper p. 6
451 */
452
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700453static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700454{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700455 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700456 return 0;
457
458 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
459}
460
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800461static inline void put_child(struct trie *t, struct tnode *tn, int i,
462 struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700463{
464 tnode_put_child_reorg(tn, i, n, -1);
465}
466
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700467 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700468 * Add a child at position i overwriting the old value.
469 * Update the value of full_children and empty_children.
470 */
471
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800472static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
473 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700474{
Robert Olsson2373ce12005-08-25 13:01:29 -0700475 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700476 int isfull;
477
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700478 BUG_ON(i >= 1<<tn->bits);
479
Robert Olsson19baf832005-06-21 12:43:18 -0700480 /* update emptyChildren */
481 if (n == NULL && chi != NULL)
482 tn->empty_children++;
483 else if (n != NULL && chi == NULL)
484 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700485
Robert Olsson19baf832005-06-21 12:43:18 -0700486 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700487 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700488 wasfull = tnode_full(tn, chi);
489
490 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700491 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700492 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700493 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700494 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700495
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700496 if (n)
Stephen Hemminger06801912007-08-10 15:22:13 -0700497 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700498
Robert Olsson2373ce12005-08-25 13:01:29 -0700499 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700500}
501
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700502static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700503{
504 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700505 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700506 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700507 int inflate_threshold_use;
508 int halve_threshold_use;
Robert Olsson05eee482007-03-19 16:27:37 -0700509 int max_resize;
Robert Olsson19baf832005-06-21 12:43:18 -0700510
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900511 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700512 return NULL;
513
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700514 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
515 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700516
517 /* No children */
518 if (tn->empty_children == tnode_child_length(tn)) {
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700519 tnode_free_safe(tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700520 return NULL;
521 }
522 /* One child */
523 if (tn->empty_children == tnode_child_length(tn) - 1)
524 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700525 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700526
Olof Johansson91b9a272005-08-09 20:24:39 -0700527 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700528 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700529 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700530
531 /* compress one level */
Stephen Hemminger06801912007-08-10 15:22:13 -0700532 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700533 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700534 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700535 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * Double as long as the resulting node has a number of
538 * nonempty nodes that are above the threshold.
539 */
540
541 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700542 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
543 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700545 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700546 * children in the *doubled* node is at least 'high'."
547 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700548 * 'high' in this instance is the variable 'inflate_threshold'. It
549 * is expressed as a percentage, so we multiply it with
550 * tnode_child_length() and instead of multiplying by 2 (since the
551 * child array will be doubled by inflate()) and multiplying
552 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700553 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700554 *
555 * The left-hand side may look a bit weird: tnode_child_length(tn)
556 * - tn->empty_children is of course the number of non-null children
557 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700558 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700559 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700560 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700561 *
Robert Olsson19baf832005-06-21 12:43:18 -0700562 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700563 *
Robert Olsson19baf832005-06-21 12:43:18 -0700564 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700565 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700566 * tn->full_children;
567 *
568 * new_child_length = tnode_child_length(tn) * 2;
569 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700570 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700571 * new_child_length;
572 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700573 *
574 * ...and so on, tho it would mess up the while () loop.
575 *
Robert Olsson19baf832005-06-21 12:43:18 -0700576 * anyway,
577 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
578 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700579 *
Robert Olsson19baf832005-06-21 12:43:18 -0700580 * avoid a division:
581 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
582 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700583 *
Robert Olsson19baf832005-06-21 12:43:18 -0700584 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700585 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700586 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700587 *
Robert Olsson19baf832005-06-21 12:43:18 -0700588 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700589 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700590 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700591 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700592 *
Robert Olsson19baf832005-06-21 12:43:18 -0700593 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700594 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700595 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700596 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700597 *
Robert Olsson19baf832005-06-21 12:43:18 -0700598 */
599
600 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700601
Robert Olssone6308be2005-10-04 13:01:58 -0700602 /* Keep root node larger */
603
Stephen Hemminger132adf52007-03-08 20:44:43 -0800604 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700605 inflate_threshold_use = inflate_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900606 else
Robert Olssone6308be2005-10-04 13:01:58 -0700607 inflate_threshold_use = inflate_threshold;
608
Robert Olsson2f368952005-07-05 15:02:40 -0700609 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700610 max_resize = 10;
611 while ((tn->full_children > 0 && max_resize-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800612 50 * (tn->full_children + tnode_child_length(tn)
613 - tn->empty_children)
614 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700615
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700616 old_tn = tn;
617 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800618
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700619 if (IS_ERR(tn)) {
620 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700621#ifdef CONFIG_IP_FIB_TRIE_STATS
622 t->stats.resize_node_skipped++;
623#endif
624 break;
625 }
Robert Olsson19baf832005-06-21 12:43:18 -0700626 }
627
Robert Olsson05eee482007-03-19 16:27:37 -0700628 if (max_resize < 0) {
629 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800630 pr_warning("Fix inflate_threshold_root."
631 " Now=%d size=%d bits\n",
632 inflate_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700633 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800634 pr_warning("Fix inflate_threshold."
635 " Now=%d size=%d bits\n",
636 inflate_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700637 }
638
Robert Olsson19baf832005-06-21 12:43:18 -0700639 check_tnode(tn);
640
641 /*
642 * Halve as long as the number of empty children in this
643 * node is above threshold.
644 */
Robert Olsson2f368952005-07-05 15:02:40 -0700645
Robert Olssone6308be2005-10-04 13:01:58 -0700646
647 /* Keep root node larger */
648
Stephen Hemminger132adf52007-03-08 20:44:43 -0800649 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700650 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900651 else
Robert Olssone6308be2005-10-04 13:01:58 -0700652 halve_threshold_use = halve_threshold;
653
Robert Olsson2f368952005-07-05 15:02:40 -0700654 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700655 max_resize = 10;
656 while (tn->bits > 1 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700657 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700658 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700659
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700660 old_tn = tn;
661 tn = halve(t, tn);
662 if (IS_ERR(tn)) {
663 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700664#ifdef CONFIG_IP_FIB_TRIE_STATS
665 t->stats.resize_node_skipped++;
666#endif
667 break;
668 }
669 }
670
Robert Olsson05eee482007-03-19 16:27:37 -0700671 if (max_resize < 0) {
672 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800673 pr_warning("Fix halve_threshold_root."
674 " Now=%d size=%d bits\n",
675 halve_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700676 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800677 pr_warning("Fix halve_threshold."
678 " Now=%d size=%d bits\n",
679 halve_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700680 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700681
Robert Olsson19baf832005-06-21 12:43:18 -0700682 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700683 if (tn->empty_children == tnode_child_length(tn) - 1)
684 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700685 struct node *n;
686
Olof Johansson91b9a272005-08-09 20:24:39 -0700687 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700688 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700689 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700690
691 /* compress one level */
692
Stephen Hemminger06801912007-08-10 15:22:13 -0700693 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700694 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700695 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700696 }
697
698 return (struct node *) tn;
699}
700
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700701static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700702{
Robert Olsson19baf832005-06-21 12:43:18 -0700703 struct tnode *oldtnode = tn;
704 int olen = tnode_child_length(tn);
705 int i;
706
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700707 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700708
709 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
710
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700711 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700712 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700713
714 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700715 * Preallocate and store tnodes before the actual work so we
716 * don't get into an inconsistent state if memory allocation
717 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700718 * of tnode is ignored.
719 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700720
721 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800722 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700723
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800724 inode = (struct tnode *) tnode_get_child(oldtnode, i);
Robert Olsson2f368952005-07-05 15:02:40 -0700725 if (inode &&
726 IS_TNODE(inode) &&
727 inode->pos == oldtnode->pos + oldtnode->bits &&
728 inode->bits > 1) {
729 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700730 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700731
Robert Olsson2f368952005-07-05 15:02:40 -0700732 left = tnode_new(inode->key&(~m), inode->pos + 1,
733 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700734 if (!left)
735 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700736
Robert Olsson2f368952005-07-05 15:02:40 -0700737 right = tnode_new(inode->key|m, inode->pos + 1,
738 inode->bits - 1);
739
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900740 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700741 tnode_free(left);
742 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900743 }
Robert Olsson2f368952005-07-05 15:02:40 -0700744
745 put_child(t, tn, 2*i, (struct node *) left);
746 put_child(t, tn, 2*i+1, (struct node *) right);
747 }
748 }
749
Olof Johansson91b9a272005-08-09 20:24:39 -0700750 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800751 struct tnode *inode;
Robert Olsson19baf832005-06-21 12:43:18 -0700752 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700753 struct tnode *left, *right;
754 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700755
Robert Olsson19baf832005-06-21 12:43:18 -0700756 /* An empty child */
757 if (node == NULL)
758 continue;
759
760 /* A leaf or an internal node with skipped bits */
761
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700762 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700763 tn->pos + tn->bits - 1) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800764 if (tkey_extract_bits(node->key,
765 oldtnode->pos + oldtnode->bits,
766 1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700767 put_child(t, tn, 2*i, node);
768 else
769 put_child(t, tn, 2*i+1, node);
770 continue;
771 }
772
773 /* An internal node with two children */
774 inode = (struct tnode *) node;
775
776 if (inode->bits == 1) {
777 put_child(t, tn, 2*i, inode->child[0]);
778 put_child(t, tn, 2*i+1, inode->child[1]);
779
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700780 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700781 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700782 }
783
Olof Johansson91b9a272005-08-09 20:24:39 -0700784 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700785
Olof Johansson91b9a272005-08-09 20:24:39 -0700786 /* We will replace this node 'inode' with two new
787 * ones, 'left' and 'right', each with half of the
788 * original children. The two new nodes will have
789 * a position one bit further down the key and this
790 * means that the "significant" part of their keys
791 * (see the discussion near the top of this file)
792 * will differ by one bit, which will be "0" in
793 * left's key and "1" in right's key. Since we are
794 * moving the key position by one step, the bit that
795 * we are moving away from - the bit at position
796 * (inode->pos) - is the one that will differ between
797 * left and right. So... we synthesize that bit in the
798 * two new keys.
799 * The mask 'm' below will be a single "one" bit at
800 * the position (inode->pos)
801 */
Robert Olsson19baf832005-06-21 12:43:18 -0700802
Olof Johansson91b9a272005-08-09 20:24:39 -0700803 /* Use the old key, but set the new significant
804 * bit to zero.
805 */
Robert Olsson19baf832005-06-21 12:43:18 -0700806
Olof Johansson91b9a272005-08-09 20:24:39 -0700807 left = (struct tnode *) tnode_get_child(tn, 2*i);
808 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700809
Olof Johansson91b9a272005-08-09 20:24:39 -0700810 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700811
Olof Johansson91b9a272005-08-09 20:24:39 -0700812 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
813 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700814
Olof Johansson91b9a272005-08-09 20:24:39 -0700815 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700816
Olof Johansson91b9a272005-08-09 20:24:39 -0700817 size = tnode_child_length(left);
818 for (j = 0; j < size; j++) {
819 put_child(t, left, j, inode->child[j]);
820 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700821 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700822 put_child(t, tn, 2*i, resize(t, left));
823 put_child(t, tn, 2*i+1, resize(t, right));
824
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700825 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700826 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700827 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700828 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700829nomem:
830 {
831 int size = tnode_child_length(tn);
832 int j;
833
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700834 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700835 if (tn->child[j])
836 tnode_free((struct tnode *)tn->child[j]);
837
838 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700839
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700840 return ERR_PTR(-ENOMEM);
841 }
Robert Olsson19baf832005-06-21 12:43:18 -0700842}
843
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700844static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700845{
846 struct tnode *oldtnode = tn;
847 struct node *left, *right;
848 int i;
849 int olen = tnode_child_length(tn);
850
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700851 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700852
853 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700854
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700855 if (!tn)
856 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700857
858 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700859 * Preallocate and store tnodes before the actual work so we
860 * don't get into an inconsistent state if memory allocation
861 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700862 * of tnode is ignored.
863 */
864
Olof Johansson91b9a272005-08-09 20:24:39 -0700865 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700866 left = tnode_get_child(oldtnode, i);
867 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700868
Robert Olsson2f368952005-07-05 15:02:40 -0700869 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700870 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700871 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700872
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700873 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700874
875 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700876 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700877
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700878 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700879 }
Robert Olsson2f368952005-07-05 15:02:40 -0700880
Robert Olsson2f368952005-07-05 15:02:40 -0700881 }
Robert Olsson19baf832005-06-21 12:43:18 -0700882
Olof Johansson91b9a272005-08-09 20:24:39 -0700883 for (i = 0; i < olen; i += 2) {
884 struct tnode *newBinNode;
885
Robert Olsson19baf832005-06-21 12:43:18 -0700886 left = tnode_get_child(oldtnode, i);
887 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700888
Robert Olsson19baf832005-06-21 12:43:18 -0700889 /* At least one of the children is empty */
890 if (left == NULL) {
891 if (right == NULL) /* Both are empty */
892 continue;
893 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700894 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700895 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700896
897 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700898 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700899 continue;
900 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700901
Robert Olsson19baf832005-06-21 12:43:18 -0700902 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700903 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
904 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700905 put_child(t, newBinNode, 0, left);
906 put_child(t, newBinNode, 1, right);
907 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700908 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700909 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700910 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700911nomem:
912 {
913 int size = tnode_child_length(tn);
914 int j;
915
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700916 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700917 if (tn->child[j])
918 tnode_free((struct tnode *)tn->child[j]);
919
920 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700921
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700922 return ERR_PTR(-ENOMEM);
923 }
Robert Olsson19baf832005-06-21 12:43:18 -0700924}
925
Robert Olsson772cb712005-09-19 15:31:18 -0700926/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700927 via get_fa_head and dump */
928
Robert Olsson772cb712005-09-19 15:31:18 -0700929static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700930{
Robert Olsson772cb712005-09-19 15:31:18 -0700931 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700932 struct hlist_node *node;
933 struct leaf_info *li;
934
Robert Olsson2373ce12005-08-25 13:01:29 -0700935 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700936 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700937 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700938
Robert Olsson19baf832005-06-21 12:43:18 -0700939 return NULL;
940}
941
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800942static inline struct list_head *get_fa_head(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700943{
Robert Olsson772cb712005-09-19 15:31:18 -0700944 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700945
Olof Johansson91b9a272005-08-09 20:24:39 -0700946 if (!li)
947 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700948
Olof Johansson91b9a272005-08-09 20:24:39 -0700949 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700950}
951
952static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
953{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900954 struct leaf_info *li = NULL, *last = NULL;
955 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700956
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900957 if (hlist_empty(head)) {
958 hlist_add_head_rcu(&new->hlist, head);
959 } else {
960 hlist_for_each_entry(li, node, head, hlist) {
961 if (new->plen > li->plen)
962 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700963
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900964 last = li;
965 }
966 if (last)
967 hlist_add_after_rcu(&last->hlist, &new->hlist);
968 else
969 hlist_add_before_rcu(&new->hlist, &li->hlist);
970 }
Robert Olsson19baf832005-06-21 12:43:18 -0700971}
972
Robert Olsson2373ce12005-08-25 13:01:29 -0700973/* rcu_read_lock needs to be hold by caller from readside */
974
Robert Olsson19baf832005-06-21 12:43:18 -0700975static struct leaf *
976fib_find_node(struct trie *t, u32 key)
977{
978 int pos;
979 struct tnode *tn;
980 struct node *n;
981
982 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700983 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700984
985 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
986 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700987
Robert Olsson19baf832005-06-21 12:43:18 -0700988 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700989
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700990 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700991 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800992 n = tnode_get_child_rcu(tn,
993 tkey_extract_bits(key,
994 tn->pos,
995 tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700996 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700997 break;
998 }
999 /* Case we have found a leaf. Compare prefixes */
1000
Olof Johansson91b9a272005-08-09 20:24:39 -07001001 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
1002 return (struct leaf *)n;
1003
Robert Olsson19baf832005-06-21 12:43:18 -07001004 return NULL;
1005}
1006
Jarek Poplawski7b855762009-06-18 00:28:51 -07001007static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -07001008{
Robert Olsson19baf832005-06-21 12:43:18 -07001009 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -07001010 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -07001011 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001012
Robert Olsson3ed18d72009-05-21 15:20:59 -07001013 key = tn->key;
1014
Stephen Hemminger06801912007-08-10 15:22:13 -07001015 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -07001016 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1017 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001018 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1019
1020 tnode_put_child_reorg((struct tnode *)tp, cindex,
1021 (struct node *)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -07001022
Stephen Hemminger06801912007-08-10 15:22:13 -07001023 tp = node_parent((struct node *) tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -07001024 if (!tp)
1025 rcu_assign_pointer(t->trie, (struct node *)tn);
1026
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -07001027 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -07001028 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -07001029 break;
Stephen Hemminger06801912007-08-10 15:22:13 -07001030 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001031 }
Stephen Hemminger06801912007-08-10 15:22:13 -07001032
Robert Olsson19baf832005-06-21 12:43:18 -07001033 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -07001034 if (IS_TNODE(tn))
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001035 tn = (struct tnode *)resize(t, (struct tnode *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001036
Jarek Poplawski7b855762009-06-18 00:28:51 -07001037 rcu_assign_pointer(t->trie, (struct node *)tn);
1038 tnode_free_flush();
1039
1040 return;
Robert Olsson19baf832005-06-21 12:43:18 -07001041}
1042
Robert Olsson2373ce12005-08-25 13:01:29 -07001043/* only used from updater-side */
1044
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001045static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -07001046{
1047 int pos, newpos;
1048 struct tnode *tp = NULL, *tn = NULL;
1049 struct node *n;
1050 struct leaf *l;
1051 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001052 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001053 struct leaf_info *li;
1054 t_key cindex;
1055
1056 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001057 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001058
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001059 /* If we point to NULL, stop. Either the tree is empty and we should
1060 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001061 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001062 * If we point to a T_TNODE, check if it matches our key. Note that
1063 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001064 * not be the parent's 'pos'+'bits'!
1065 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001066 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001067 * the index from our key, push the T_TNODE and walk the tree.
1068 *
1069 * If it doesn't, we have to replace it with a new T_TNODE.
1070 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001071 * If we point to a T_LEAF, it might or might not have the same key
1072 * as we do. If it does, just change the value, update the T_LEAF's
1073 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001074 * If it doesn't, we need to replace it with a T_TNODE.
1075 */
1076
1077 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1078 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001079
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001080 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001081
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001082 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001083 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001084 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001085 n = tnode_get_child(tn,
1086 tkey_extract_bits(key,
1087 tn->pos,
1088 tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -07001089
Stephen Hemminger06801912007-08-10 15:22:13 -07001090 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001091 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001092 break;
1093 }
1094
1095 /*
1096 * n ----> NULL, LEAF or TNODE
1097 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001098 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001099 */
1100
Olof Johansson91b9a272005-08-09 20:24:39 -07001101 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001102
1103 /* Case 1: n is a leaf. Compare prefixes */
1104
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001105 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001106 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001107 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001108
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001109 if (!li)
1110 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001111
1112 fa_head = &li->falh;
1113 insert_leaf_info(&l->list, li);
1114 goto done;
1115 }
Robert Olsson19baf832005-06-21 12:43:18 -07001116 l = leaf_new();
1117
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001118 if (!l)
1119 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001120
1121 l->key = key;
1122 li = leaf_info_new(plen);
1123
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001124 if (!li) {
Stephen Hemminger387a5482008-04-10 03:47:34 -07001125 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001126 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001127 }
Robert Olsson19baf832005-06-21 12:43:18 -07001128
1129 fa_head = &li->falh;
1130 insert_leaf_info(&l->list, li);
1131
Robert Olsson19baf832005-06-21 12:43:18 -07001132 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001133 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001134
Stephen Hemminger06801912007-08-10 15:22:13 -07001135 node_set_parent((struct node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001136
Olof Johansson91b9a272005-08-09 20:24:39 -07001137 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1138 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1139 } else {
1140 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001141 /*
1142 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001143 * first tnode need some special handling
1144 */
1145
1146 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001147 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001148 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001149 pos = 0;
1150
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001151 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001152 newpos = tkey_mismatch(key, pos, n->key);
1153 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001154 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001155 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001156 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001157 }
Robert Olsson19baf832005-06-21 12:43:18 -07001158
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001159 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001160 free_leaf_info(li);
Stephen Hemminger387a5482008-04-10 03:47:34 -07001161 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001162 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001163 }
1164
Stephen Hemminger06801912007-08-10 15:22:13 -07001165 node_set_parent((struct node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001166
Olof Johansson91b9a272005-08-09 20:24:39 -07001167 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001168 put_child(t, tn, missbit, (struct node *)l);
1169 put_child(t, tn, 1-missbit, n);
1170
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001171 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001172 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001173 put_child(t, (struct tnode *)tp, cindex,
1174 (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001175 } else {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001176 rcu_assign_pointer(t->trie, (struct node *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001177 tp = tn;
1178 }
1179 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001180
1181 if (tp && tp->pos + tp->bits > 32)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001182 pr_warning("fib_trie"
1183 " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1184 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001185
Robert Olsson19baf832005-06-21 12:43:18 -07001186 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001187
Jarek Poplawski7b855762009-06-18 00:28:51 -07001188 trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001189done:
Robert Olsson19baf832005-06-21 12:43:18 -07001190 return fa_head;
1191}
1192
Robert Olssond562f1f2007-03-26 14:22:22 -07001193/*
1194 * Caller must hold RTNL.
1195 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001196static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001197{
1198 struct trie *t = (struct trie *) tb->tb_data;
1199 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001200 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001201 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001202 int plen = cfg->fc_dst_len;
1203 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001204 u32 key, mask;
1205 int err;
1206 struct leaf *l;
1207
1208 if (plen > 32)
1209 return -EINVAL;
1210
Thomas Graf4e902c52006-08-17 18:14:52 -07001211 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001212
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001213 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001214
Olof Johansson91b9a272005-08-09 20:24:39 -07001215 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001216
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001217 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001218 return -EINVAL;
1219
1220 key = key & mask;
1221
Thomas Graf4e902c52006-08-17 18:14:52 -07001222 fi = fib_create_info(cfg);
1223 if (IS_ERR(fi)) {
1224 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001225 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001226 }
Robert Olsson19baf832005-06-21 12:43:18 -07001227
1228 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001229 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001230
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001231 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001232 fa_head = get_fa_head(l, plen);
1233 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1234 }
1235
1236 /* Now fa, if non-NULL, points to the first fib alias
1237 * with the same keys [prefix,tos,priority], if such key already
1238 * exists or to the node before which we will insert new one.
1239 *
1240 * If fa is NULL, we will need to allocate a new one and
1241 * insert to the head of f.
1242 *
1243 * If f is NULL, no fib node matched the destination key
1244 * and we need to allocate a new one of those as well.
1245 */
1246
Julian Anastasov936f6f82008-01-28 21:18:06 -08001247 if (fa && fa->fa_tos == tos &&
1248 fa->fa_info->fib_priority == fi->fib_priority) {
1249 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001250
1251 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001252 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001253 goto out;
1254
Julian Anastasov936f6f82008-01-28 21:18:06 -08001255 /* We have 2 goals:
1256 * 1. Find exact match for type, scope, fib_info to avoid
1257 * duplicate routes
1258 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1259 */
1260 fa_match = NULL;
1261 fa_first = fa;
1262 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1263 list_for_each_entry_continue(fa, fa_head, fa_list) {
1264 if (fa->fa_tos != tos)
1265 break;
1266 if (fa->fa_info->fib_priority != fi->fib_priority)
1267 break;
1268 if (fa->fa_type == cfg->fc_type &&
1269 fa->fa_scope == cfg->fc_scope &&
1270 fa->fa_info == fi) {
1271 fa_match = fa;
1272 break;
1273 }
1274 }
1275
Thomas Graf4e902c52006-08-17 18:14:52 -07001276 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001277 struct fib_info *fi_drop;
1278 u8 state;
1279
Julian Anastasov936f6f82008-01-28 21:18:06 -08001280 fa = fa_first;
1281 if (fa_match) {
1282 if (fa == fa_match)
1283 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001284 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001285 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001286 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001287 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001288 if (new_fa == NULL)
1289 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001290
1291 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001292 new_fa->fa_tos = fa->fa_tos;
1293 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001294 new_fa->fa_type = cfg->fc_type;
1295 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001296 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001297 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001298
Robert Olsson2373ce12005-08-25 13:01:29 -07001299 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1300 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001301
1302 fib_release_info(fi_drop);
1303 if (state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001304 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Milan Kocianb8f55832007-05-23 14:55:06 -07001305 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1306 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001307
Olof Johansson91b9a272005-08-09 20:24:39 -07001308 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001309 }
1310 /* Error if we find a perfect match which
1311 * uses the same scope, type, and nexthop
1312 * information.
1313 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001314 if (fa_match)
1315 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001316
Thomas Graf4e902c52006-08-17 18:14:52 -07001317 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001318 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001319 }
1320 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001321 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001322 goto out;
1323
1324 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001325 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001326 if (new_fa == NULL)
1327 goto out;
1328
1329 new_fa->fa_info = fi;
1330 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001331 new_fa->fa_type = cfg->fc_type;
1332 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001333 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001334 /*
1335 * Insert new entry to the list.
1336 */
1337
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001338 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001339 fa_head = fib_insert_node(t, key, plen);
1340 if (unlikely(!fa_head)) {
1341 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001342 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001343 }
Robert Olssonf835e472005-06-28 15:00:39 -07001344 }
Robert Olsson19baf832005-06-21 12:43:18 -07001345
Robert Olsson2373ce12005-08-25 13:01:29 -07001346 list_add_tail_rcu(&new_fa->fa_list,
1347 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001348
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001349 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001350 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001351 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001352succeeded:
1353 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001354
1355out_free_new_fa:
1356 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001357out:
1358 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001359err:
Robert Olsson19baf832005-06-21 12:43:18 -07001360 return err;
1361}
1362
Robert Olsson772cb712005-09-19 15:31:18 -07001363/* should be called with rcu_read_lock */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001364static int check_leaf(struct trie *t, struct leaf *l,
1365 t_key key, const struct flowi *flp,
1366 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001367{
Robert Olsson19baf832005-06-21 12:43:18 -07001368 struct leaf_info *li;
1369 struct hlist_head *hhead = &l->list;
1370 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001371
Robert Olsson2373ce12005-08-25 13:01:29 -07001372 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001373 int err;
1374 int plen = li->plen;
1375 __be32 mask = inet_make_mask(plen);
1376
Al Viro888454c2006-09-19 13:42:46 -07001377 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001378 continue;
1379
Rami Rosene204a342009-05-18 01:19:12 +00001380 err = fib_semantic_match(&li->falh, flp, res, plen);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001381
Robert Olsson19baf832005-06-21 12:43:18 -07001382#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001383 if (err <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001384 t->stats.semantic_match_passed++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001385 else
1386 t->stats.semantic_match_miss++;
Robert Olsson19baf832005-06-21 12:43:18 -07001387#endif
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001388 if (err <= 0)
Ben Hutchings2e655572008-07-10 16:52:52 -07001389 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001390 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001391
Ben Hutchings2e655572008-07-10 16:52:52 -07001392 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001393}
1394
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001395static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1396 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001397{
1398 struct trie *t = (struct trie *) tb->tb_data;
Ben Hutchings2e655572008-07-10 16:52:52 -07001399 int ret;
Robert Olsson19baf832005-06-21 12:43:18 -07001400 struct node *n;
1401 struct tnode *pn;
1402 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001403 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001404 int chopped_off;
1405 t_key cindex = 0;
1406 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001407 struct tnode *cn;
1408 t_key node_prefix, key_prefix, pref_mismatch;
1409 int mp;
1410
Robert Olsson2373ce12005-08-25 13:01:29 -07001411 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001412
Robert Olsson2373ce12005-08-25 13:01:29 -07001413 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001414 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001415 goto failed;
1416
1417#ifdef CONFIG_IP_FIB_TRIE_STATS
1418 t->stats.gets++;
1419#endif
1420
1421 /* Just a leaf? */
1422 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001423 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001424 goto found;
Robert Olsson19baf832005-06-21 12:43:18 -07001425 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001426
Robert Olsson19baf832005-06-21 12:43:18 -07001427 pn = (struct tnode *) n;
1428 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001429
Olof Johansson91b9a272005-08-09 20:24:39 -07001430 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001431 pos = pn->pos;
1432 bits = pn->bits;
1433
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001434 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001435 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1436 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001437
1438 n = tnode_get_child(pn, cindex);
1439
1440 if (n == NULL) {
1441#ifdef CONFIG_IP_FIB_TRIE_STATS
1442 t->stats.null_node_hit++;
1443#endif
1444 goto backtrace;
1445 }
1446
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001447 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001448 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1449 if (ret > 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07001450 goto backtrace;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001451 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001452 }
1453
Olof Johansson91b9a272005-08-09 20:24:39 -07001454 cn = (struct tnode *)n;
1455
1456 /*
1457 * It's a tnode, and we can do some extra checks here if we
1458 * like, to avoid descending into a dead-end branch.
1459 * This tnode is in the parent's child array at index
1460 * key[p_pos..p_pos+p_bits] but potentially with some bits
1461 * chopped off, so in reality the index may be just a
1462 * subprefix, padded with zero at the end.
1463 * We can also take a look at any skipped bits in this
1464 * tnode - everything up to p_pos is supposed to be ok,
1465 * and the non-chopped bits of the index (se previous
1466 * paragraph) are also guaranteed ok, but the rest is
1467 * considered unknown.
1468 *
1469 * The skipped bits are key[pos+bits..cn->pos].
1470 */
1471
1472 /* If current_prefix_length < pos+bits, we are already doing
1473 * actual prefix matching, which means everything from
1474 * pos+(bits-chopped_off) onward must be zero along some
1475 * branch of this subtree - otherwise there is *no* valid
1476 * prefix present. Here we can only check the skipped
1477 * bits. Remember, since we have already indexed into the
1478 * parent's child array, we know that the bits we chopped of
1479 * *are* zero.
1480 */
1481
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001482 /* NOTA BENE: Checking only skipped bits
1483 for the new node here */
Olof Johansson91b9a272005-08-09 20:24:39 -07001484
1485 if (current_prefix_length < pos+bits) {
1486 if (tkey_extract_bits(cn->key, current_prefix_length,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001487 cn->pos - current_prefix_length)
1488 || !(cn->child[0]))
Olof Johansson91b9a272005-08-09 20:24:39 -07001489 goto backtrace;
1490 }
1491
1492 /*
1493 * If chopped_off=0, the index is fully validated and we
1494 * only need to look at the skipped bits for this, the new,
1495 * tnode. What we actually want to do is to find out if
1496 * these skipped bits match our key perfectly, or if we will
1497 * have to count on finding a matching prefix further down,
1498 * because if we do, we would like to have some way of
1499 * verifying the existence of such a prefix at this point.
1500 */
1501
1502 /* The only thing we can do at this point is to verify that
1503 * any such matching prefix can indeed be a prefix to our
1504 * key, and if the bits in the node we are inspecting that
1505 * do not match our key are not ZERO, this cannot be true.
1506 * Thus, find out where there is a mismatch (before cn->pos)
1507 * and verify that all the mismatching bits are zero in the
1508 * new tnode's key.
1509 */
1510
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001511 /*
1512 * Note: We aren't very concerned about the piece of
1513 * the key that precede pn->pos+pn->bits, since these
1514 * have already been checked. The bits after cn->pos
1515 * aren't checked since these are by definition
1516 * "unknown" at this point. Thus, what we want to see
1517 * is if we are about to enter the "prefix matching"
1518 * state, and in that case verify that the skipped
1519 * bits that will prevail throughout this subtree are
1520 * zero, as they have to be if we are to find a
1521 * matching prefix.
Olof Johansson91b9a272005-08-09 20:24:39 -07001522 */
1523
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001524 node_prefix = mask_pfx(cn->key, cn->pos);
1525 key_prefix = mask_pfx(key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001526 pref_mismatch = key_prefix^node_prefix;
1527 mp = 0;
1528
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001529 /*
1530 * In short: If skipped bits in this node do not match
1531 * the search key, enter the "prefix matching"
1532 * state.directly.
Olof Johansson91b9a272005-08-09 20:24:39 -07001533 */
1534 if (pref_mismatch) {
1535 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1536 mp++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001537 pref_mismatch = pref_mismatch << 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001538 }
1539 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1540
1541 if (key_prefix != 0)
1542 goto backtrace;
1543
1544 if (current_prefix_length >= cn->pos)
1545 current_prefix_length = mp;
1546 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001547
Olof Johansson91b9a272005-08-09 20:24:39 -07001548 pn = (struct tnode *)n; /* Descend */
1549 chopped_off = 0;
1550 continue;
1551
Robert Olsson19baf832005-06-21 12:43:18 -07001552backtrace:
1553 chopped_off++;
1554
1555 /* As zero don't change the child key (cindex) */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001556 while ((chopped_off <= pn->bits)
1557 && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001558 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001559
1560 /* Decrease current_... with bits chopped off */
1561 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001562 current_prefix_length = pn->pos + pn->bits
1563 - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001564
Robert Olsson19baf832005-06-21 12:43:18 -07001565 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001566 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001567 * chopped off all bits in this tnode walk up to our parent.
1568 */
1569
Olof Johansson91b9a272005-08-09 20:24:39 -07001570 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001571 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001572 } else {
Stephen Hemminger06801912007-08-10 15:22:13 -07001573 struct tnode *parent = node_parent((struct node *) pn);
1574 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001575 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001576
Robert Olsson19baf832005-06-21 12:43:18 -07001577 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001578 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1579 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001580 chopped_off = 0;
1581
1582#ifdef CONFIG_IP_FIB_TRIE_STATS
1583 t->stats.backtrack++;
1584#endif
1585 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001586 }
Robert Olsson19baf832005-06-21 12:43:18 -07001587 }
1588failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001589 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001590found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001591 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001592 return ret;
1593}
1594
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001595/*
1596 * Remove the leaf and return parent.
1597 */
1598static void trie_leaf_remove(struct trie *t, struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001599{
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001600 struct tnode *tp = node_parent((struct node *) l);
Robert Olsson19baf832005-06-21 12:43:18 -07001601
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001602 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001603
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001604 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001605 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001606 put_child(t, (struct tnode *)tp, cindex, NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001607 trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001608 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001609 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001610
Stephen Hemminger387a5482008-04-10 03:47:34 -07001611 free_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001612}
1613
Robert Olssond562f1f2007-03-26 14:22:22 -07001614/*
1615 * Caller must hold RTNL.
1616 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001617static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001618{
1619 struct trie *t = (struct trie *) tb->tb_data;
1620 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001621 int plen = cfg->fc_dst_len;
1622 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001623 struct fib_alias *fa, *fa_to_delete;
1624 struct list_head *fa_head;
1625 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001626 struct leaf_info *li;
1627
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001628 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001629 return -EINVAL;
1630
Thomas Graf4e902c52006-08-17 18:14:52 -07001631 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001632 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001633
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001634 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001635 return -EINVAL;
1636
1637 key = key & mask;
1638 l = fib_find_node(t, key);
1639
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001640 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001641 return -ESRCH;
1642
1643 fa_head = get_fa_head(l, plen);
1644 fa = fib_find_alias(fa_head, tos, 0);
1645
1646 if (!fa)
1647 return -ESRCH;
1648
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001649 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001650
1651 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001652 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1653 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001654 struct fib_info *fi = fa->fa_info;
1655
1656 if (fa->fa_tos != tos)
1657 break;
1658
Thomas Graf4e902c52006-08-17 18:14:52 -07001659 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1660 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1661 fa->fa_scope == cfg->fc_scope) &&
1662 (!cfg->fc_protocol ||
1663 fi->fib_protocol == cfg->fc_protocol) &&
1664 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001665 fa_to_delete = fa;
1666 break;
1667 }
1668 }
1669
Olof Johansson91b9a272005-08-09 20:24:39 -07001670 if (!fa_to_delete)
1671 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001672
Olof Johansson91b9a272005-08-09 20:24:39 -07001673 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001674 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001675 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001676
Olof Johansson91b9a272005-08-09 20:24:39 -07001677 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001678 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001679
Robert Olsson2373ce12005-08-25 13:01:29 -07001680 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001681
Olof Johansson91b9a272005-08-09 20:24:39 -07001682 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001683 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001684 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001685 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001686
1687 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001688 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001689
1690 if (fa->fa_state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001691 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001692
Robert Olsson2373ce12005-08-25 13:01:29 -07001693 fib_release_info(fa->fa_info);
1694 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001695 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001696}
1697
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001698static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001699{
1700 struct fib_alias *fa, *fa_node;
1701 int found = 0;
1702
1703 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1704 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001705
Robert Olsson2373ce12005-08-25 13:01:29 -07001706 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1707 list_del_rcu(&fa->fa_list);
1708 fib_release_info(fa->fa_info);
1709 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001710 found++;
1711 }
1712 }
1713 return found;
1714}
1715
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001716static int trie_flush_leaf(struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001717{
1718 int found = 0;
1719 struct hlist_head *lih = &l->list;
1720 struct hlist_node *node, *tmp;
1721 struct leaf_info *li = NULL;
1722
1723 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001724 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001725
1726 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001727 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001728 free_leaf_info(li);
1729 }
1730 }
1731 return found;
1732}
1733
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001734/*
1735 * Scan for the next right leaf starting at node p->child[idx]
1736 * Since we have back pointer, no recursion necessary.
1737 */
1738static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001739{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001740 do {
1741 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001742
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001743 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001744 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001745 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001746 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001747
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001748 while (idx < 1u << p->bits) {
1749 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001750 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001751 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001752
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001753 if (IS_LEAF(c)) {
1754 prefetch(p->child[idx]);
1755 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001756 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001757
1758 /* Rescan start scanning in new node */
1759 p = (struct tnode *) c;
1760 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001761 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001762
1763 /* Node empty, walk back up to parent */
Olof Johansson91b9a272005-08-09 20:24:39 -07001764 c = (struct node *) p;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001765 } while ( (p = node_parent_rcu(c)) != NULL);
1766
1767 return NULL; /* Root of trie */
1768}
1769
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001770static struct leaf *trie_firstleaf(struct trie *t)
1771{
1772 struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
1773
1774 if (!n)
1775 return NULL;
1776
1777 if (IS_LEAF(n)) /* trie is just a leaf */
1778 return (struct leaf *) n;
1779
1780 return leaf_walk_rcu(n, NULL);
1781}
1782
1783static struct leaf *trie_nextleaf(struct leaf *l)
1784{
1785 struct node *c = (struct node *) l;
1786 struct tnode *p = node_parent(c);
1787
1788 if (!p)
1789 return NULL; /* trie with just one leaf */
1790
1791 return leaf_walk_rcu(p, c);
Robert Olsson19baf832005-06-21 12:43:18 -07001792}
1793
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001794static struct leaf *trie_leafindex(struct trie *t, int index)
1795{
1796 struct leaf *l = trie_firstleaf(t);
1797
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001798 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001799 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001800
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001801 return l;
1802}
1803
1804
Robert Olssond562f1f2007-03-26 14:22:22 -07001805/*
1806 * Caller must hold RTNL.
1807 */
Robert Olsson19baf832005-06-21 12:43:18 -07001808static int fn_trie_flush(struct fib_table *tb)
1809{
1810 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001811 struct leaf *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001812 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001813
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001814 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001815 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001816
1817 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001818 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001819 ll = l;
1820 }
1821
1822 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001823 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001824
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001825 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001826 return found;
1827}
1828
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001829static void fn_trie_select_default(struct fib_table *tb,
1830 const struct flowi *flp,
1831 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001832{
1833 struct trie *t = (struct trie *) tb->tb_data;
1834 int order, last_idx;
1835 struct fib_info *fi = NULL;
1836 struct fib_info *last_resort;
1837 struct fib_alias *fa = NULL;
1838 struct list_head *fa_head;
1839 struct leaf *l;
1840
1841 last_idx = -1;
1842 last_resort = NULL;
1843 order = -1;
1844
Robert Olsson2373ce12005-08-25 13:01:29 -07001845 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001846
Robert Olsson19baf832005-06-21 12:43:18 -07001847 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001848 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001849 goto out;
1850
1851 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001852 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001853 goto out;
1854
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001855 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001856 goto out;
1857
Robert Olsson2373ce12005-08-25 13:01:29 -07001858 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001859 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001860
Robert Olsson19baf832005-06-21 12:43:18 -07001861 if (fa->fa_scope != res->scope ||
1862 fa->fa_type != RTN_UNICAST)
1863 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001864
Robert Olsson19baf832005-06-21 12:43:18 -07001865 if (next_fi->fib_priority > res->fi->fib_priority)
1866 break;
1867 if (!next_fi->fib_nh[0].nh_gw ||
1868 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1869 continue;
1870 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001871
Robert Olsson19baf832005-06-21 12:43:18 -07001872 if (fi == NULL) {
1873 if (next_fi != res->fi)
1874 break;
1875 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -08001876 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001877 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001878 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001879 goto out;
1880 }
1881 fi = next_fi;
1882 order++;
1883 }
1884 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -08001885 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001886 goto out;
1887 }
1888
Denis V. Lunev971b8932007-12-08 00:32:23 -08001889 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1890 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001891 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001892 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001893 goto out;
1894 }
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001895 if (last_idx >= 0)
1896 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001897 tb->tb_default = last_idx;
1898out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001899 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001900}
1901
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001902static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1903 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001904 struct sk_buff *skb, struct netlink_callback *cb)
1905{
1906 int i, s_i;
1907 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001908 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001909
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001910 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001911 i = 0;
1912
Robert Olsson2373ce12005-08-25 13:01:29 -07001913 /* rcu_read_lock is hold by caller */
1914
1915 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001916 if (i < s_i) {
1917 i++;
1918 continue;
1919 }
Robert Olsson19baf832005-06-21 12:43:18 -07001920
1921 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1922 cb->nlh->nlmsg_seq,
1923 RTM_NEWROUTE,
1924 tb->tb_id,
1925 fa->fa_type,
1926 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001927 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001928 plen,
1929 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001930 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001931 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001932 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001933 }
Robert Olsson19baf832005-06-21 12:43:18 -07001934 i++;
1935 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001936 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001937 return skb->len;
1938}
1939
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001940static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1941 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001942{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001943 struct leaf_info *li;
1944 struct hlist_node *node;
1945 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001946
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001947 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001948 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001949
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001950 /* rcu_read_lock is hold by caller */
1951 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1952 if (i < s_i) {
1953 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001954 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001955 }
Robert Olsson19baf832005-06-21 12:43:18 -07001956
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001957 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001958 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001959
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001960 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001961 continue;
1962
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001963 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001964 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001965 return -1;
1966 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001967 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001968 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001969
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001970 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001971 return skb->len;
1972}
1973
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001974static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1975 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001976{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001977 struct leaf *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001978 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001979 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001980 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001981
Robert Olsson2373ce12005-08-25 13:01:29 -07001982 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001983 /* Dump starting at last key.
1984 * Note: 0.0.0.0/0 (ie default) is first key.
1985 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001986 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001987 l = trie_firstleaf(t);
1988 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001989 /* Normally, continue from last key, but if that is missing
1990 * fallback to using slow rescan
1991 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001992 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001993 if (!l)
1994 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001995 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001996
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001997 while (l) {
1998 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001999 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002000 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002001 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002002 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002003 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002004
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002005 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002006 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002007 memset(&cb->args[4], 0,
2008 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07002009 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002010 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07002011 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002012
Robert Olsson19baf832005-06-21 12:43:18 -07002013 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07002014}
2015
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002016void __init fib_hash_init(void)
2017{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002018 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2019 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08002020 0, SLAB_PANIC, NULL);
2021
2022 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2023 max(sizeof(struct leaf),
2024 sizeof(struct leaf_info)),
2025 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002026}
Robert Olsson19baf832005-06-21 12:43:18 -07002027
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002028
2029/* Fix more generic FIB names for init later */
2030struct fib_table *fib_hash_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07002031{
2032 struct fib_table *tb;
2033 struct trie *t;
2034
Robert Olsson19baf832005-06-21 12:43:18 -07002035 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
2036 GFP_KERNEL);
2037 if (tb == NULL)
2038 return NULL;
2039
2040 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08002041 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002042 tb->tb_lookup = fn_trie_lookup;
2043 tb->tb_insert = fn_trie_insert;
2044 tb->tb_delete = fn_trie_delete;
2045 tb->tb_flush = fn_trie_flush;
2046 tb->tb_select_default = fn_trie_select_default;
2047 tb->tb_dump = fn_trie_dump;
Robert Olsson19baf832005-06-21 12:43:18 -07002048
2049 t = (struct trie *) tb->tb_data;
Stephen Hemmingerc28a1cf2008-01-12 20:49:13 -08002050 memset(t, 0, sizeof(*t));
Robert Olsson19baf832005-06-21 12:43:18 -07002051
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002052 if (id == RT_TABLE_LOCAL)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002053 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07002054
2055 return tb;
2056}
2057
Robert Olsson19baf832005-06-21 12:43:18 -07002058#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002059/* Depth first Trie walk iterator */
2060struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002061 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002062 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002063 struct tnode *tnode;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002064 unsigned index;
2065 unsigned depth;
2066};
Robert Olsson19baf832005-06-21 12:43:18 -07002067
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002068static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07002069{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002070 struct tnode *tn = iter->tnode;
2071 unsigned cindex = iter->index;
2072 struct tnode *p;
2073
Eric W. Biederman6640e692007-01-24 14:42:04 -08002074 /* A single entry routing table */
2075 if (!tn)
2076 return NULL;
2077
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002078 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2079 iter->tnode, iter->index, iter->depth);
2080rescan:
2081 while (cindex < (1<<tn->bits)) {
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002082 struct node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002083
2084 if (n) {
2085 if (IS_LEAF(n)) {
2086 iter->tnode = tn;
2087 iter->index = cindex + 1;
2088 } else {
2089 /* push down one level */
2090 iter->tnode = (struct tnode *) n;
2091 iter->index = 0;
2092 ++iter->depth;
2093 }
2094 return n;
2095 }
2096
2097 ++cindex;
2098 }
2099
2100 /* Current node exhausted, pop back up */
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002101 p = node_parent_rcu((struct node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002102 if (p) {
2103 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2104 tn = p;
2105 --iter->depth;
2106 goto rescan;
2107 }
2108
2109 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002110 return NULL;
2111}
2112
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002113static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2114 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002115{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002116 struct node *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002117
Stephen Hemminger132adf52007-03-08 20:44:43 -08002118 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002119 return NULL;
2120
2121 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002122 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002123 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002124
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002125 if (IS_TNODE(n)) {
2126 iter->tnode = (struct tnode *) n;
2127 iter->index = 0;
2128 iter->depth = 1;
2129 } else {
2130 iter->tnode = NULL;
2131 iter->index = 0;
2132 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002133 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002134
2135 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002136}
2137
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002138static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002139{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002140 struct node *n;
2141 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002142
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002143 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002144
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002145 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002146 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002147 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08002148 struct leaf *l = (struct leaf *)n;
2149 struct leaf_info *li;
2150 struct hlist_node *tmp;
2151
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002152 s->leaves++;
2153 s->totdepth += iter.depth;
2154 if (iter.depth > s->maxdepth)
2155 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08002156
2157 hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
2158 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002159 } else {
2160 const struct tnode *tn = (const struct tnode *) n;
2161 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002162
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002163 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002164 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002165 s->nodesizes[tn->bits]++;
2166
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002167 for (i = 0; i < (1<<tn->bits); i++)
2168 if (!tn->child[i])
2169 s->nullpointers++;
2170 }
2171 }
2172 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002173}
2174
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002175/*
Robert Olsson19baf832005-06-21 12:43:18 -07002176 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002177 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002178static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002179{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002180 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002181
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002182 if (stat->leaves)
2183 avdepth = stat->totdepth*100 / stat->leaves;
2184 else
2185 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002186
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002187 seq_printf(seq, "\tAver depth: %u.%02d\n",
2188 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002189 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002190
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002191 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002192 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08002193
2194 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2195 bytes += sizeof(struct leaf_info) * stat->prefixes;
2196
Stephen Hemminger187b5182008-01-12 20:55:55 -08002197 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002198 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002199
Robert Olsson06ef9212006-03-20 21:35:01 -08002200 max = MAX_STAT_DEPTH;
2201 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002202 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002203
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002204 pointers = 0;
2205 for (i = 1; i <= max; i++)
2206 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002207 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002208 pointers += (1<<i) * stat->nodesizes[i];
2209 }
2210 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002211 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002212
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002213 bytes += sizeof(struct node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002214 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2215 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002216}
Robert Olsson19baf832005-06-21 12:43:18 -07002217
2218#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002219static void trie_show_usage(struct seq_file *seq,
2220 const struct trie_use_stats *stats)
2221{
2222 seq_printf(seq, "\nCounters:\n---------\n");
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002223 seq_printf(seq, "gets = %u\n", stats->gets);
2224 seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2225 seq_printf(seq, "semantic match passed = %u\n",
2226 stats->semantic_match_passed);
2227 seq_printf(seq, "semantic match miss = %u\n",
2228 stats->semantic_match_miss);
2229 seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2230 seq_printf(seq, "skipped node resize = %u\n\n",
2231 stats->resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002232}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002233#endif /* CONFIG_IP_FIB_TRIE_STATS */
2234
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002235static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002236{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002237 if (tb->tb_id == RT_TABLE_LOCAL)
2238 seq_puts(seq, "Local:\n");
2239 else if (tb->tb_id == RT_TABLE_MAIN)
2240 seq_puts(seq, "Main:\n");
2241 else
2242 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002243}
Robert Olsson19baf832005-06-21 12:43:18 -07002244
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002245
Robert Olsson19baf832005-06-21 12:43:18 -07002246static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2247{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002248 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002249 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002250
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002251 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002252 "Basic info: size of leaf:"
2253 " %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002254 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002255
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002256 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2257 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2258 struct hlist_node *node;
2259 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002260
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002261 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2262 struct trie *t = (struct trie *) tb->tb_data;
2263 struct trie_stat stat;
2264
2265 if (!t)
2266 continue;
2267
2268 fib_table_print(seq, tb);
2269
2270 trie_collect_stats(t, &stat);
2271 trie_show_stats(seq, &stat);
2272#ifdef CONFIG_IP_FIB_TRIE_STATS
2273 trie_show_usage(seq, &t->stats);
2274#endif
2275 }
2276 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002277
Robert Olsson19baf832005-06-21 12:43:18 -07002278 return 0;
2279}
2280
Robert Olsson19baf832005-06-21 12:43:18 -07002281static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2282{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002283 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002284}
2285
Arjan van de Ven9a321442007-02-12 00:55:35 -08002286static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002287 .owner = THIS_MODULE,
2288 .open = fib_triestat_seq_open,
2289 .read = seq_read,
2290 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002291 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002292};
2293
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002294static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002295{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002296 struct fib_trie_iter *iter = seq->private;
2297 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002298 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002299 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002300
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002301 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2302 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2303 struct hlist_node *node;
2304 struct fib_table *tb;
2305
2306 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2307 struct node *n;
2308
2309 for (n = fib_trie_get_first(iter,
2310 (struct trie *) tb->tb_data);
2311 n; n = fib_trie_get_next(iter))
2312 if (pos == idx++) {
2313 iter->tb = tb;
2314 return n;
2315 }
2316 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002317 }
Robert Olsson19baf832005-06-21 12:43:18 -07002318
Robert Olsson19baf832005-06-21 12:43:18 -07002319 return NULL;
2320}
2321
2322static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002323 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002324{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002325 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002326 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002327}
2328
2329static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2330{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002331 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002332 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002333 struct fib_table *tb = iter->tb;
2334 struct hlist_node *tb_node;
2335 unsigned int h;
2336 struct node *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002337
Robert Olsson19baf832005-06-21 12:43:18 -07002338 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002339 /* next node in same table */
2340 n = fib_trie_get_next(iter);
2341 if (n)
2342 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002343
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002344 /* walk rest of this hash chain */
2345 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2346 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
2347 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2348 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2349 if (n)
2350 goto found;
2351 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002352
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002353 /* new hash chain */
2354 while (++h < FIB_TABLE_HASHSZ) {
2355 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2356 hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
2357 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2358 if (n)
2359 goto found;
2360 }
2361 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002362 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002363
2364found:
2365 iter->tb = tb;
2366 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002367}
2368
2369static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002370 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002371{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002372 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002373}
2374
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002375static void seq_indent(struct seq_file *seq, int n)
2376{
2377 while (n-- > 0) seq_puts(seq, " ");
2378}
Robert Olsson19baf832005-06-21 12:43:18 -07002379
Eric Dumazet28d36e32008-01-14 23:09:56 -08002380static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002381{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002382 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002383 case RT_SCOPE_UNIVERSE: return "universe";
2384 case RT_SCOPE_SITE: return "site";
2385 case RT_SCOPE_LINK: return "link";
2386 case RT_SCOPE_HOST: return "host";
2387 case RT_SCOPE_NOWHERE: return "nowhere";
2388 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002389 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002390 return buf;
2391 }
2392}
2393
2394static const char *rtn_type_names[__RTN_MAX] = {
2395 [RTN_UNSPEC] = "UNSPEC",
2396 [RTN_UNICAST] = "UNICAST",
2397 [RTN_LOCAL] = "LOCAL",
2398 [RTN_BROADCAST] = "BROADCAST",
2399 [RTN_ANYCAST] = "ANYCAST",
2400 [RTN_MULTICAST] = "MULTICAST",
2401 [RTN_BLACKHOLE] = "BLACKHOLE",
2402 [RTN_UNREACHABLE] = "UNREACHABLE",
2403 [RTN_PROHIBIT] = "PROHIBIT",
2404 [RTN_THROW] = "THROW",
2405 [RTN_NAT] = "NAT",
2406 [RTN_XRESOLVE] = "XRESOLVE",
2407};
2408
Eric Dumazet28d36e32008-01-14 23:09:56 -08002409static inline const char *rtn_type(char *buf, size_t len, unsigned t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002410{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002411 if (t < __RTN_MAX && rtn_type_names[t])
2412 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002413 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002414 return buf;
2415}
2416
2417/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002418static int fib_trie_seq_show(struct seq_file *seq, void *v)
2419{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002420 const struct fib_trie_iter *iter = seq->private;
2421 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002422
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002423 if (!node_parent_rcu(n))
2424 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002425
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002426 if (IS_TNODE(n)) {
2427 struct tnode *tn = (struct tnode *) n;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07002428 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002429
Robert Olsson1d25cd62005-09-19 15:29:52 -07002430 seq_indent(seq, iter->depth-1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002431 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
2432 &prf, tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002433 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002434
Olof Johansson91b9a272005-08-09 20:24:39 -07002435 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002436 struct leaf *l = (struct leaf *) n;
Stephen Hemminger13280422008-01-22 21:54:37 -08002437 struct leaf_info *li;
2438 struct hlist_node *node;
Al Viro32ab5f82006-09-26 22:21:45 -07002439 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002440
2441 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002442 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002443
Stephen Hemminger13280422008-01-22 21:54:37 -08002444 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2445 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002446
Stephen Hemminger13280422008-01-22 21:54:37 -08002447 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2448 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002449
Stephen Hemminger13280422008-01-22 21:54:37 -08002450 seq_indent(seq, iter->depth+1);
2451 seq_printf(seq, " /%d %s %s", li->plen,
2452 rtn_scope(buf1, sizeof(buf1),
2453 fa->fa_scope),
2454 rtn_type(buf2, sizeof(buf2),
2455 fa->fa_type));
2456 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002457 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002458 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002459 }
2460 }
Robert Olsson19baf832005-06-21 12:43:18 -07002461 }
2462
2463 return 0;
2464}
2465
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002466static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002467 .start = fib_trie_seq_start,
2468 .next = fib_trie_seq_next,
2469 .stop = fib_trie_seq_stop,
2470 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002471};
2472
2473static int fib_trie_seq_open(struct inode *inode, struct file *file)
2474{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002475 return seq_open_net(inode, file, &fib_trie_seq_ops,
2476 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002477}
2478
Arjan van de Ven9a321442007-02-12 00:55:35 -08002479static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002480 .owner = THIS_MODULE,
2481 .open = fib_trie_seq_open,
2482 .read = seq_read,
2483 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002484 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002485};
2486
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002487struct fib_route_iter {
2488 struct seq_net_private p;
2489 struct trie *main_trie;
2490 loff_t pos;
2491 t_key key;
2492};
2493
2494static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
2495{
2496 struct leaf *l = NULL;
2497 struct trie *t = iter->main_trie;
2498
2499 /* use cache location of last found key */
2500 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2501 pos -= iter->pos;
2502 else {
2503 iter->pos = 0;
2504 l = trie_firstleaf(t);
2505 }
2506
2507 while (l && pos-- > 0) {
2508 iter->pos++;
2509 l = trie_nextleaf(l);
2510 }
2511
2512 if (l)
2513 iter->key = pos; /* remember it */
2514 else
2515 iter->pos = 0; /* forget it */
2516
2517 return l;
2518}
2519
2520static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2521 __acquires(RCU)
2522{
2523 struct fib_route_iter *iter = seq->private;
2524 struct fib_table *tb;
2525
2526 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002527 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002528 if (!tb)
2529 return NULL;
2530
2531 iter->main_trie = (struct trie *) tb->tb_data;
2532 if (*pos == 0)
2533 return SEQ_START_TOKEN;
2534 else
2535 return fib_route_get_idx(iter, *pos - 1);
2536}
2537
2538static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2539{
2540 struct fib_route_iter *iter = seq->private;
2541 struct leaf *l = v;
2542
2543 ++*pos;
2544 if (v == SEQ_START_TOKEN) {
2545 iter->pos = 0;
2546 l = trie_firstleaf(iter->main_trie);
2547 } else {
2548 iter->pos++;
2549 l = trie_nextleaf(l);
2550 }
2551
2552 if (l)
2553 iter->key = l->key;
2554 else
2555 iter->pos = 0;
2556 return l;
2557}
2558
2559static void fib_route_seq_stop(struct seq_file *seq, void *v)
2560 __releases(RCU)
2561{
2562 rcu_read_unlock();
2563}
2564
Al Viro32ab5f82006-09-26 22:21:45 -07002565static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002566{
2567 static unsigned type2flags[RTN_MAX + 1] = {
2568 [7] = RTF_REJECT, [8] = RTF_REJECT,
2569 };
2570 unsigned flags = type2flags[type];
2571
2572 if (fi && fi->fib_nh->nh_gw)
2573 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002574 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002575 flags |= RTF_HOST;
2576 flags |= RTF_UP;
2577 return flags;
2578}
2579
2580/*
2581 * This outputs /proc/net/route.
2582 * The format of the file is not supposed to be changed
2583 * and needs to be same as fib_hash output to avoid breaking
2584 * legacy utilities
2585 */
2586static int fib_route_seq_show(struct seq_file *seq, void *v)
2587{
2588 struct leaf *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002589 struct leaf_info *li;
2590 struct hlist_node *node;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002591
2592 if (v == SEQ_START_TOKEN) {
2593 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2594 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2595 "\tWindow\tIRTT");
2596 return 0;
2597 }
2598
Stephen Hemminger13280422008-01-22 21:54:37 -08002599 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002600 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002601 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002602
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002603 mask = inet_make_mask(li->plen);
2604 prefix = htonl(l->key);
2605
2606 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002607 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002608 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002609 int len;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002610
2611 if (fa->fa_type == RTN_BROADCAST
2612 || fa->fa_type == RTN_MULTICAST)
2613 continue;
2614
2615 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002616 seq_printf(seq,
2617 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2618 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002619 fi->fib_dev ? fi->fib_dev->name : "*",
2620 prefix,
2621 fi->fib_nh->nh_gw, flags, 0, 0,
2622 fi->fib_priority,
2623 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002624 (fi->fib_advmss ?
2625 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002626 fi->fib_window,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002627 fi->fib_rtt >> 3, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002628 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002629 seq_printf(seq,
2630 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2631 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002632 prefix, 0, flags, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002633 mask, 0, 0, 0, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002634
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002635 seq_printf(seq, "%*s\n", 127 - len, "");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002636 }
2637 }
2638
2639 return 0;
2640}
2641
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002642static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002643 .start = fib_route_seq_start,
2644 .next = fib_route_seq_next,
2645 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002646 .show = fib_route_seq_show,
2647};
2648
2649static int fib_route_seq_open(struct inode *inode, struct file *file)
2650{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002651 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002652 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002653}
2654
Arjan van de Ven9a321442007-02-12 00:55:35 -08002655static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002656 .owner = THIS_MODULE,
2657 .open = fib_route_seq_open,
2658 .read = seq_read,
2659 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002660 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002661};
2662
Denis V. Lunev61a02652008-01-10 03:21:09 -08002663int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002664{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002665 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002666 goto out1;
2667
Denis V. Lunev61a02652008-01-10 03:21:09 -08002668 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2669 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002670 goto out2;
2671
Denis V. Lunev61a02652008-01-10 03:21:09 -08002672 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002673 goto out3;
2674
Robert Olsson19baf832005-06-21 12:43:18 -07002675 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002676
2677out3:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002678 proc_net_remove(net, "fib_triestat");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002679out2:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002680 proc_net_remove(net, "fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002681out1:
2682 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002683}
2684
Denis V. Lunev61a02652008-01-10 03:21:09 -08002685void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002686{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002687 proc_net_remove(net, "fib_trie");
2688 proc_net_remove(net, "fib_triestat");
2689 proc_net_remove(net, "route");
Robert Olsson19baf832005-06-21 12:43:18 -07002690}
2691
2692#endif /* CONFIG_PROC_FS */