blob: 012cf5a685814bdff7c538ccbdc8805519ed0939 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Robert Olsson05eee482007-03-19 16:27:37 -070051#define VERSION "0.408"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
54#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070055#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070056#include <linux/types.h>
57#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070058#include <linux/mm.h>
59#include <linux/string.h>
60#include <linux/socket.h>
61#include <linux/sockios.h>
62#include <linux/errno.h>
63#include <linux/in.h>
64#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080065#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070066#include <linux/netdevice.h>
67#include <linux/if_arp.h>
68#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070069#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070070#include <linux/skbuff.h>
71#include <linux/netlink.h>
72#include <linux/init.h>
73#include <linux/list.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020074#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070075#include <net/ip.h>
76#include <net/protocol.h>
77#include <net/route.h>
78#include <net/tcp.h>
79#include <net/sock.h>
80#include <net/ip_fib.h>
81#include "fib_lookup.h"
82
Robert Olsson06ef9212006-03-20 21:35:01 -080083#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070084
Robert Olsson19baf832005-06-21 12:43:18 -070085#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
Robert Olsson2373ce12005-08-25 13:01:29 -070092#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
93
Olof Johansson91b9a272005-08-09 20:24:39 -070094#define IS_TNODE(n) (!(n->parent & T_LEAF))
95#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -070096
97struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -070098 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -080099 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700100};
101
102struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700103 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -0800104 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700105 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700106 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700107};
108
109struct leaf_info {
110 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700111 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700112 int plen;
113 struct list_head falh;
114};
115
116struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700117 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -0800118 t_key key;
Eric Dumazet112d8cf2008-01-12 21:27:41 -0800119 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
120 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
Eric Dumazet8d965442008-01-13 22:31:44 -0800121 unsigned int full_children; /* KEYLENGTH bits needed */
122 unsigned int empty_children; /* KEYLENGTH bits needed */
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700123 union {
124 struct rcu_head rcu;
125 struct work_struct work;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700126 struct tnode *tnode_free;
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700127 };
Olof Johansson91b9a272005-08-09 20:24:39 -0700128 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700129};
130
131#ifdef CONFIG_IP_FIB_TRIE_STATS
132struct trie_use_stats {
133 unsigned int gets;
134 unsigned int backtrack;
135 unsigned int semantic_match_passed;
136 unsigned int semantic_match_miss;
137 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700138 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800148 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800149 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700150};
Robert Olsson19baf832005-06-21 12:43:18 -0700151
152struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700153 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700154#ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700157};
158
Robert Olsson19baf832005-06-21 12:43:18 -0700159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
161 int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700162static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700163static struct tnode *inflate(struct trie *t, struct tnode *tn);
164static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head;
Robert Olsson19baf832005-06-21 12:43:18 -0700167
Christoph Lametere18b8902006-12-06 20:33:20 -0800168static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800169static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700170
Stephen Hemminger06801912007-08-10 15:22:13 -0700171static inline struct tnode *node_parent(struct node *node)
172{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800173 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
174}
Stephen Hemminger06801912007-08-10 15:22:13 -0700175
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800176static inline struct tnode *node_parent_rcu(struct node *node)
177{
178 struct tnode *ret = node_parent(node);
179
Stephen Hemminger06801912007-08-10 15:22:13 -0700180 return rcu_dereference(ret);
181}
182
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700183/* Same as rcu_assign_pointer
184 * but that macro() assumes that value is a pointer.
185 */
Stephen Hemminger06801912007-08-10 15:22:13 -0700186static inline void node_set_parent(struct node *node, struct tnode *ptr)
187{
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700188 smp_wmb();
189 node->parent = (unsigned long)ptr | NODE_TYPE(node);
Stephen Hemminger06801912007-08-10 15:22:13 -0700190}
Robert Olsson2373ce12005-08-25 13:01:29 -0700191
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800192static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700193{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800194 BUG_ON(i >= 1U << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700195
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800196 return tn->child[i];
197}
198
199static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
200{
201 struct node *ret = tnode_get_child(tn, i);
202
203 return rcu_dereference(ret);
Robert Olsson19baf832005-06-21 12:43:18 -0700204}
205
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700206static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700207{
Olof Johansson91b9a272005-08-09 20:24:39 -0700208 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700209}
210
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700211static inline t_key mask_pfx(t_key k, unsigned short l)
212{
213 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
214}
215
Robert Olsson19baf832005-06-21 12:43:18 -0700216static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
217{
Olof Johansson91b9a272005-08-09 20:24:39 -0700218 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700219 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700220 else
Robert Olsson19baf832005-06-21 12:43:18 -0700221 return 0;
222}
223
224static inline int tkey_equals(t_key a, t_key b)
225{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700226 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700227}
228
229static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
230{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700231 if (bits == 0 || offset >= KEYLENGTH)
232 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700233 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
234 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700235}
Robert Olsson19baf832005-06-21 12:43:18 -0700236
237static inline int tkey_mismatch(t_key a, int offset, t_key b)
238{
239 t_key diff = a ^ b;
240 int i = offset;
241
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700242 if (!diff)
243 return 0;
244 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700245 i++;
246 return i;
247}
248
Robert Olsson19baf832005-06-21 12:43:18 -0700249/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900250 To understand this stuff, an understanding of keys and all their bits is
251 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700252 all of the bits in that key are significant.
253
254 Consider a node 'n' and its parent 'tp'.
255
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900256 If n is a leaf, every bit in its key is significant. Its presence is
257 necessitated by path compression, since during a tree traversal (when
258 searching for a leaf - unless we are doing an insertion) we will completely
259 ignore all skipped bits we encounter. Thus we need to verify, at the end of
260 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700261 correct key path.
262
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900263 Note that we can never "miss" the correct key in the tree if present by
264 following the wrong path. Path compression ensures that segments of the key
265 that are the same for all keys with a given prefix are skipped, but the
266 skipped part *is* identical for each node in the subtrie below the skipped
267 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700268 call to tkey_sub_equals() in trie_insert().
269
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900270 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700271 have many different meanings.
272
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900273 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700274 _________________________________________________________________
275 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
276 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700278
279 _________________________________________________________________
280 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
281 -----------------------------------------------------------------
282 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
283
284 tp->pos = 7
285 tp->bits = 3
286 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700287 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700288
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900289 First, let's just ignore the bits that come before the parent tp, that is
290 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700291 not use them for anything.
292
293 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900294 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700295 'n' among tp's children.
296
297 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
298 for the node n.
299
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900300 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700301 of the bits are really not needed or indeed known in n->key.
302
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900303 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700304 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900305
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700306
Robert Olsson19baf832005-06-21 12:43:18 -0700307 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
308 at this point.
309
310*/
311
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700312static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700313{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700314 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700315}
316
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800317static const int halve_threshold = 25;
318static const int inflate_threshold = 50;
319static const int halve_threshold_root = 8;
320static const int inflate_threshold_root = 15;
Robert Olsson19baf832005-06-21 12:43:18 -0700321
Robert Olsson2373ce12005-08-25 13:01:29 -0700322
323static void __alias_free_mem(struct rcu_head *head)
324{
325 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
326 kmem_cache_free(fn_alias_kmem, fa);
327}
328
329static inline void alias_free_mem_rcu(struct fib_alias *fa)
330{
331 call_rcu(&fa->rcu, __alias_free_mem);
332}
333
334static void __leaf_free_rcu(struct rcu_head *head)
335{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800336 struct leaf *l = container_of(head, struct leaf, rcu);
337 kmem_cache_free(trie_leaf_kmem, l);
Robert Olsson2373ce12005-08-25 13:01:29 -0700338}
339
Stephen Hemminger387a5482008-04-10 03:47:34 -0700340static inline void free_leaf(struct leaf *l)
341{
342 call_rcu_bh(&l->rcu, __leaf_free_rcu);
343}
344
Robert Olsson2373ce12005-08-25 13:01:29 -0700345static void __leaf_info_free_rcu(struct rcu_head *head)
346{
347 kfree(container_of(head, struct leaf_info, rcu));
348}
349
350static inline void free_leaf_info(struct leaf_info *leaf)
351{
352 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
353}
354
Eric Dumazet8d965442008-01-13 22:31:44 -0800355static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700356{
Robert Olsson2373ce12005-08-25 13:01:29 -0700357 if (size <= PAGE_SIZE)
Eric Dumazet8d965442008-01-13 22:31:44 -0800358 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700359 else
360 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
361}
Robert Olsson2373ce12005-08-25 13:01:29 -0700362
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700363static void __tnode_vfree(struct work_struct *arg)
364{
365 struct tnode *tn = container_of(arg, struct tnode, work);
366 vfree(tn);
Robert Olsson2373ce12005-08-25 13:01:29 -0700367}
368
369static void __tnode_free_rcu(struct rcu_head *head)
370{
371 struct tnode *tn = container_of(head, struct tnode, rcu);
Eric Dumazet8d965442008-01-13 22:31:44 -0800372 size_t size = sizeof(struct tnode) +
373 (sizeof(struct node *) << tn->bits);
Robert Olsson2373ce12005-08-25 13:01:29 -0700374
375 if (size <= PAGE_SIZE)
376 kfree(tn);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700377 else {
378 INIT_WORK(&tn->work, __tnode_vfree);
379 schedule_work(&tn->work);
380 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700381}
382
383static inline void tnode_free(struct tnode *tn)
384{
Stephen Hemminger387a5482008-04-10 03:47:34 -0700385 if (IS_LEAF(tn))
386 free_leaf((struct leaf *) tn);
387 else
Robert Olsson550e29b2006-04-04 12:53:35 -0700388 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700389}
390
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700391static void tnode_free_safe(struct tnode *tn)
392{
393 BUG_ON(IS_LEAF(tn));
Jarek Poplawski7b855762009-06-18 00:28:51 -0700394 tn->tnode_free = tnode_free_head;
395 tnode_free_head = tn;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700396}
397
398static void tnode_free_flush(void)
399{
400 struct tnode *tn;
401
402 while ((tn = tnode_free_head)) {
403 tnode_free_head = tn->tnode_free;
404 tn->tnode_free = NULL;
405 tnode_free(tn);
406 }
407}
408
Robert Olsson19baf832005-06-21 12:43:18 -0700409static struct leaf *leaf_new(void)
410{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800411 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700412 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700413 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700414 INIT_HLIST_HEAD(&l->list);
415 }
416 return l;
417}
418
419static struct leaf_info *leaf_info_new(int plen)
420{
421 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700422 if (li) {
423 li->plen = plen;
424 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700425 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700426 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700427}
428
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800429static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700430{
Eric Dumazet8d965442008-01-13 22:31:44 -0800431 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700432 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700433
Olof Johansson91b9a272005-08-09 20:24:39 -0700434 if (tn) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700435 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700436 tn->pos = pos;
437 tn->bits = bits;
438 tn->key = key;
439 tn->full_children = 0;
440 tn->empty_children = 1<<bits;
441 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700442
Eric Dumazet8d965442008-01-13 22:31:44 -0800443 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
444 (unsigned long) (sizeof(struct node) << bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700445 return tn;
446}
447
Robert Olsson19baf832005-06-21 12:43:18 -0700448/*
449 * Check whether a tnode 'n' is "full", i.e. it is an internal node
450 * and no bits are skipped. See discussion in dyntree paper p. 6
451 */
452
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700453static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700454{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700455 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700456 return 0;
457
458 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
459}
460
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800461static inline void put_child(struct trie *t, struct tnode *tn, int i,
462 struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700463{
464 tnode_put_child_reorg(tn, i, n, -1);
465}
466
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700467 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700468 * Add a child at position i overwriting the old value.
469 * Update the value of full_children and empty_children.
470 */
471
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800472static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
473 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700474{
Robert Olsson2373ce12005-08-25 13:01:29 -0700475 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700476 int isfull;
477
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700478 BUG_ON(i >= 1<<tn->bits);
479
Robert Olsson19baf832005-06-21 12:43:18 -0700480 /* update emptyChildren */
481 if (n == NULL && chi != NULL)
482 tn->empty_children++;
483 else if (n != NULL && chi == NULL)
484 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700485
Robert Olsson19baf832005-06-21 12:43:18 -0700486 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700487 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700488 wasfull = tnode_full(tn, chi);
489
490 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700491 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700492 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700493 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700494 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700495
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700496 if (n)
Stephen Hemminger06801912007-08-10 15:22:13 -0700497 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700498
Robert Olsson2373ce12005-08-25 13:01:29 -0700499 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700500}
501
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700502static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700503{
504 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700505 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700506 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700507 int inflate_threshold_use;
508 int halve_threshold_use;
Robert Olsson05eee482007-03-19 16:27:37 -0700509 int max_resize;
Robert Olsson19baf832005-06-21 12:43:18 -0700510
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900511 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700512 return NULL;
513
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700514 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
515 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700516
517 /* No children */
518 if (tn->empty_children == tnode_child_length(tn)) {
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700519 tnode_free_safe(tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700520 return NULL;
521 }
522 /* One child */
523 if (tn->empty_children == tnode_child_length(tn) - 1)
524 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700525 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700526
Olof Johansson91b9a272005-08-09 20:24:39 -0700527 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700528 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700529 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700530
531 /* compress one level */
Stephen Hemminger06801912007-08-10 15:22:13 -0700532 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700533 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700534 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700535 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * Double as long as the resulting node has a number of
538 * nonempty nodes that are above the threshold.
539 */
540
541 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700542 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
543 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700545 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700546 * children in the *doubled* node is at least 'high'."
547 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700548 * 'high' in this instance is the variable 'inflate_threshold'. It
549 * is expressed as a percentage, so we multiply it with
550 * tnode_child_length() and instead of multiplying by 2 (since the
551 * child array will be doubled by inflate()) and multiplying
552 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700553 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700554 *
555 * The left-hand side may look a bit weird: tnode_child_length(tn)
556 * - tn->empty_children is of course the number of non-null children
557 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700558 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700559 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700560 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700561 *
Robert Olsson19baf832005-06-21 12:43:18 -0700562 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700563 *
Robert Olsson19baf832005-06-21 12:43:18 -0700564 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700565 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700566 * tn->full_children;
567 *
568 * new_child_length = tnode_child_length(tn) * 2;
569 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700570 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700571 * new_child_length;
572 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700573 *
574 * ...and so on, tho it would mess up the while () loop.
575 *
Robert Olsson19baf832005-06-21 12:43:18 -0700576 * anyway,
577 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
578 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700579 *
Robert Olsson19baf832005-06-21 12:43:18 -0700580 * avoid a division:
581 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
582 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700583 *
Robert Olsson19baf832005-06-21 12:43:18 -0700584 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700585 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700586 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700587 *
Robert Olsson19baf832005-06-21 12:43:18 -0700588 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700589 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700590 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700591 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700592 *
Robert Olsson19baf832005-06-21 12:43:18 -0700593 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700594 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700595 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700596 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700597 *
Robert Olsson19baf832005-06-21 12:43:18 -0700598 */
599
600 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700601
Robert Olssone6308be2005-10-04 13:01:58 -0700602 /* Keep root node larger */
603
Stephen Hemminger132adf52007-03-08 20:44:43 -0800604 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700605 inflate_threshold_use = inflate_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900606 else
Robert Olssone6308be2005-10-04 13:01:58 -0700607 inflate_threshold_use = inflate_threshold;
608
Robert Olsson2f368952005-07-05 15:02:40 -0700609 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700610 max_resize = 10;
611 while ((tn->full_children > 0 && max_resize-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800612 50 * (tn->full_children + tnode_child_length(tn)
613 - tn->empty_children)
614 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700615
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700616 old_tn = tn;
617 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800618
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700619 if (IS_ERR(tn)) {
620 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700621#ifdef CONFIG_IP_FIB_TRIE_STATS
622 t->stats.resize_node_skipped++;
623#endif
624 break;
625 }
Robert Olsson19baf832005-06-21 12:43:18 -0700626 }
627
Robert Olsson05eee482007-03-19 16:27:37 -0700628 if (max_resize < 0) {
629 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800630 pr_warning("Fix inflate_threshold_root."
631 " Now=%d size=%d bits\n",
632 inflate_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700633 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800634 pr_warning("Fix inflate_threshold."
635 " Now=%d size=%d bits\n",
636 inflate_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700637 }
638
Robert Olsson19baf832005-06-21 12:43:18 -0700639 check_tnode(tn);
640
641 /*
642 * Halve as long as the number of empty children in this
643 * node is above threshold.
644 */
Robert Olsson2f368952005-07-05 15:02:40 -0700645
Robert Olssone6308be2005-10-04 13:01:58 -0700646
647 /* Keep root node larger */
648
Stephen Hemminger132adf52007-03-08 20:44:43 -0800649 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700650 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900651 else
Robert Olssone6308be2005-10-04 13:01:58 -0700652 halve_threshold_use = halve_threshold;
653
Robert Olsson2f368952005-07-05 15:02:40 -0700654 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700655 max_resize = 10;
656 while (tn->bits > 1 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700657 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700658 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700659
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700660 old_tn = tn;
661 tn = halve(t, tn);
662 if (IS_ERR(tn)) {
663 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700664#ifdef CONFIG_IP_FIB_TRIE_STATS
665 t->stats.resize_node_skipped++;
666#endif
667 break;
668 }
669 }
670
Robert Olsson05eee482007-03-19 16:27:37 -0700671 if (max_resize < 0) {
672 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800673 pr_warning("Fix halve_threshold_root."
674 " Now=%d size=%d bits\n",
675 halve_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700676 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800677 pr_warning("Fix halve_threshold."
678 " Now=%d size=%d bits\n",
679 halve_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700680 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700681
Robert Olsson19baf832005-06-21 12:43:18 -0700682 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700683 if (tn->empty_children == tnode_child_length(tn) - 1)
684 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700685 struct node *n;
686
Olof Johansson91b9a272005-08-09 20:24:39 -0700687 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700688 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700689 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700690
691 /* compress one level */
692
Stephen Hemminger06801912007-08-10 15:22:13 -0700693 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700694 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700695 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700696 }
697
698 return (struct node *) tn;
699}
700
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700701static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700702{
Robert Olsson19baf832005-06-21 12:43:18 -0700703 struct tnode *oldtnode = tn;
704 int olen = tnode_child_length(tn);
705 int i;
706
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700707 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700708
709 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
710
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700711 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700712 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700713
714 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700715 * Preallocate and store tnodes before the actual work so we
716 * don't get into an inconsistent state if memory allocation
717 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700718 * of tnode is ignored.
719 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700720
721 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800722 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700723
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800724 inode = (struct tnode *) tnode_get_child(oldtnode, i);
Robert Olsson2f368952005-07-05 15:02:40 -0700725 if (inode &&
726 IS_TNODE(inode) &&
727 inode->pos == oldtnode->pos + oldtnode->bits &&
728 inode->bits > 1) {
729 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700730 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700731
Robert Olsson2f368952005-07-05 15:02:40 -0700732 left = tnode_new(inode->key&(~m), inode->pos + 1,
733 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700734 if (!left)
735 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700736
Robert Olsson2f368952005-07-05 15:02:40 -0700737 right = tnode_new(inode->key|m, inode->pos + 1,
738 inode->bits - 1);
739
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900740 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700741 tnode_free(left);
742 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900743 }
Robert Olsson2f368952005-07-05 15:02:40 -0700744
745 put_child(t, tn, 2*i, (struct node *) left);
746 put_child(t, tn, 2*i+1, (struct node *) right);
747 }
748 }
749
Olof Johansson91b9a272005-08-09 20:24:39 -0700750 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800751 struct tnode *inode;
Robert Olsson19baf832005-06-21 12:43:18 -0700752 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700753 struct tnode *left, *right;
754 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700755
Robert Olsson19baf832005-06-21 12:43:18 -0700756 /* An empty child */
757 if (node == NULL)
758 continue;
759
760 /* A leaf or an internal node with skipped bits */
761
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700762 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700763 tn->pos + tn->bits - 1) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800764 if (tkey_extract_bits(node->key,
765 oldtnode->pos + oldtnode->bits,
766 1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700767 put_child(t, tn, 2*i, node);
768 else
769 put_child(t, tn, 2*i+1, node);
770 continue;
771 }
772
773 /* An internal node with two children */
774 inode = (struct tnode *) node;
775
776 if (inode->bits == 1) {
777 put_child(t, tn, 2*i, inode->child[0]);
778 put_child(t, tn, 2*i+1, inode->child[1]);
779
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700780 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700781 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700782 }
783
Olof Johansson91b9a272005-08-09 20:24:39 -0700784 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700785
Olof Johansson91b9a272005-08-09 20:24:39 -0700786 /* We will replace this node 'inode' with two new
787 * ones, 'left' and 'right', each with half of the
788 * original children. The two new nodes will have
789 * a position one bit further down the key and this
790 * means that the "significant" part of their keys
791 * (see the discussion near the top of this file)
792 * will differ by one bit, which will be "0" in
793 * left's key and "1" in right's key. Since we are
794 * moving the key position by one step, the bit that
795 * we are moving away from - the bit at position
796 * (inode->pos) - is the one that will differ between
797 * left and right. So... we synthesize that bit in the
798 * two new keys.
799 * The mask 'm' below will be a single "one" bit at
800 * the position (inode->pos)
801 */
Robert Olsson19baf832005-06-21 12:43:18 -0700802
Olof Johansson91b9a272005-08-09 20:24:39 -0700803 /* Use the old key, but set the new significant
804 * bit to zero.
805 */
Robert Olsson19baf832005-06-21 12:43:18 -0700806
Olof Johansson91b9a272005-08-09 20:24:39 -0700807 left = (struct tnode *) tnode_get_child(tn, 2*i);
808 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700809
Olof Johansson91b9a272005-08-09 20:24:39 -0700810 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700811
Olof Johansson91b9a272005-08-09 20:24:39 -0700812 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
813 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700814
Olof Johansson91b9a272005-08-09 20:24:39 -0700815 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700816
Olof Johansson91b9a272005-08-09 20:24:39 -0700817 size = tnode_child_length(left);
818 for (j = 0; j < size; j++) {
819 put_child(t, left, j, inode->child[j]);
820 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700821 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700822 put_child(t, tn, 2*i, resize(t, left));
823 put_child(t, tn, 2*i+1, resize(t, right));
824
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700825 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700826 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700827 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700828 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700829nomem:
830 {
831 int size = tnode_child_length(tn);
832 int j;
833
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700834 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700835 if (tn->child[j])
836 tnode_free((struct tnode *)tn->child[j]);
837
838 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700839
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700840 return ERR_PTR(-ENOMEM);
841 }
Robert Olsson19baf832005-06-21 12:43:18 -0700842}
843
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700844static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700845{
846 struct tnode *oldtnode = tn;
847 struct node *left, *right;
848 int i;
849 int olen = tnode_child_length(tn);
850
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700851 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700852
853 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700854
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700855 if (!tn)
856 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700857
858 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700859 * Preallocate and store tnodes before the actual work so we
860 * don't get into an inconsistent state if memory allocation
861 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700862 * of tnode is ignored.
863 */
864
Olof Johansson91b9a272005-08-09 20:24:39 -0700865 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700866 left = tnode_get_child(oldtnode, i);
867 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700868
Robert Olsson2f368952005-07-05 15:02:40 -0700869 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700870 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700871 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700872
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700873 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700874
875 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700876 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700877
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700878 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700879 }
Robert Olsson2f368952005-07-05 15:02:40 -0700880
Robert Olsson2f368952005-07-05 15:02:40 -0700881 }
Robert Olsson19baf832005-06-21 12:43:18 -0700882
Olof Johansson91b9a272005-08-09 20:24:39 -0700883 for (i = 0; i < olen; i += 2) {
884 struct tnode *newBinNode;
885
Robert Olsson19baf832005-06-21 12:43:18 -0700886 left = tnode_get_child(oldtnode, i);
887 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700888
Robert Olsson19baf832005-06-21 12:43:18 -0700889 /* At least one of the children is empty */
890 if (left == NULL) {
891 if (right == NULL) /* Both are empty */
892 continue;
893 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700894 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700895 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700896
897 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700898 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700899 continue;
900 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700901
Robert Olsson19baf832005-06-21 12:43:18 -0700902 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700903 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
904 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700905 put_child(t, newBinNode, 0, left);
906 put_child(t, newBinNode, 1, right);
907 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700908 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700909 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700910 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700911nomem:
912 {
913 int size = tnode_child_length(tn);
914 int j;
915
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700916 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700917 if (tn->child[j])
918 tnode_free((struct tnode *)tn->child[j]);
919
920 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700921
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700922 return ERR_PTR(-ENOMEM);
923 }
Robert Olsson19baf832005-06-21 12:43:18 -0700924}
925
Robert Olsson772cb712005-09-19 15:31:18 -0700926/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700927 via get_fa_head and dump */
928
Robert Olsson772cb712005-09-19 15:31:18 -0700929static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700930{
Robert Olsson772cb712005-09-19 15:31:18 -0700931 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700932 struct hlist_node *node;
933 struct leaf_info *li;
934
Robert Olsson2373ce12005-08-25 13:01:29 -0700935 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700936 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700937 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700938
Robert Olsson19baf832005-06-21 12:43:18 -0700939 return NULL;
940}
941
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800942static inline struct list_head *get_fa_head(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700943{
Robert Olsson772cb712005-09-19 15:31:18 -0700944 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700945
Olof Johansson91b9a272005-08-09 20:24:39 -0700946 if (!li)
947 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700948
Olof Johansson91b9a272005-08-09 20:24:39 -0700949 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700950}
951
952static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
953{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900954 struct leaf_info *li = NULL, *last = NULL;
955 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700956
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900957 if (hlist_empty(head)) {
958 hlist_add_head_rcu(&new->hlist, head);
959 } else {
960 hlist_for_each_entry(li, node, head, hlist) {
961 if (new->plen > li->plen)
962 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700963
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900964 last = li;
965 }
966 if (last)
967 hlist_add_after_rcu(&last->hlist, &new->hlist);
968 else
969 hlist_add_before_rcu(&new->hlist, &li->hlist);
970 }
Robert Olsson19baf832005-06-21 12:43:18 -0700971}
972
Robert Olsson2373ce12005-08-25 13:01:29 -0700973/* rcu_read_lock needs to be hold by caller from readside */
974
Robert Olsson19baf832005-06-21 12:43:18 -0700975static struct leaf *
976fib_find_node(struct trie *t, u32 key)
977{
978 int pos;
979 struct tnode *tn;
980 struct node *n;
981
982 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700983 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700984
985 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
986 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700987
Robert Olsson19baf832005-06-21 12:43:18 -0700988 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700989
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700990 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700991 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800992 n = tnode_get_child_rcu(tn,
993 tkey_extract_bits(key,
994 tn->pos,
995 tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700996 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700997 break;
998 }
999 /* Case we have found a leaf. Compare prefixes */
1000
Olof Johansson91b9a272005-08-09 20:24:39 -07001001 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
1002 return (struct leaf *)n;
1003
Robert Olsson19baf832005-06-21 12:43:18 -07001004 return NULL;
1005}
1006
Jarek Poplawski7b855762009-06-18 00:28:51 -07001007static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -07001008{
Robert Olsson19baf832005-06-21 12:43:18 -07001009 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -07001010 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -07001011 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001012
Robert Olsson3ed18d72009-05-21 15:20:59 -07001013 key = tn->key;
1014
Stephen Hemminger06801912007-08-10 15:22:13 -07001015 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -07001016 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1017 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001018 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1019
1020 tnode_put_child_reorg((struct tnode *)tp, cindex,
1021 (struct node *)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -07001022
Stephen Hemminger06801912007-08-10 15:22:13 -07001023 tp = node_parent((struct node *) tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -07001024 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -07001025 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -07001026 break;
Stephen Hemminger06801912007-08-10 15:22:13 -07001027 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001028 }
Stephen Hemminger06801912007-08-10 15:22:13 -07001029
Robert Olsson19baf832005-06-21 12:43:18 -07001030 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -07001031 if (IS_TNODE(tn))
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001032 tn = (struct tnode *)resize(t, (struct tnode *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001033
Jarek Poplawski7b855762009-06-18 00:28:51 -07001034 rcu_assign_pointer(t->trie, (struct node *)tn);
1035 tnode_free_flush();
1036
1037 return;
Robert Olsson19baf832005-06-21 12:43:18 -07001038}
1039
Robert Olsson2373ce12005-08-25 13:01:29 -07001040/* only used from updater-side */
1041
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001042static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -07001043{
1044 int pos, newpos;
1045 struct tnode *tp = NULL, *tn = NULL;
1046 struct node *n;
1047 struct leaf *l;
1048 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001049 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001050 struct leaf_info *li;
1051 t_key cindex;
1052
1053 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001054 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001055
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001056 /* If we point to NULL, stop. Either the tree is empty and we should
1057 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001058 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001059 * If we point to a T_TNODE, check if it matches our key. Note that
1060 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001061 * not be the parent's 'pos'+'bits'!
1062 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001063 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001064 * the index from our key, push the T_TNODE and walk the tree.
1065 *
1066 * If it doesn't, we have to replace it with a new T_TNODE.
1067 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001068 * If we point to a T_LEAF, it might or might not have the same key
1069 * as we do. If it does, just change the value, update the T_LEAF's
1070 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001071 * If it doesn't, we need to replace it with a T_TNODE.
1072 */
1073
1074 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1075 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001076
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001077 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001078
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001079 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001080 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001081 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001082 n = tnode_get_child(tn,
1083 tkey_extract_bits(key,
1084 tn->pos,
1085 tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -07001086
Stephen Hemminger06801912007-08-10 15:22:13 -07001087 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001088 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001089 break;
1090 }
1091
1092 /*
1093 * n ----> NULL, LEAF or TNODE
1094 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001095 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001096 */
1097
Olof Johansson91b9a272005-08-09 20:24:39 -07001098 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001099
1100 /* Case 1: n is a leaf. Compare prefixes */
1101
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001102 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001103 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001104 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001105
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001106 if (!li)
1107 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001108
1109 fa_head = &li->falh;
1110 insert_leaf_info(&l->list, li);
1111 goto done;
1112 }
Robert Olsson19baf832005-06-21 12:43:18 -07001113 l = leaf_new();
1114
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001115 if (!l)
1116 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001117
1118 l->key = key;
1119 li = leaf_info_new(plen);
1120
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001121 if (!li) {
Stephen Hemminger387a5482008-04-10 03:47:34 -07001122 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001123 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001124 }
Robert Olsson19baf832005-06-21 12:43:18 -07001125
1126 fa_head = &li->falh;
1127 insert_leaf_info(&l->list, li);
1128
Robert Olsson19baf832005-06-21 12:43:18 -07001129 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001130 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001131
Stephen Hemminger06801912007-08-10 15:22:13 -07001132 node_set_parent((struct node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001133
Olof Johansson91b9a272005-08-09 20:24:39 -07001134 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1135 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1136 } else {
1137 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001138 /*
1139 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001140 * first tnode need some special handling
1141 */
1142
1143 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001144 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001145 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001146 pos = 0;
1147
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001148 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001149 newpos = tkey_mismatch(key, pos, n->key);
1150 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001151 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001152 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001153 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001154 }
Robert Olsson19baf832005-06-21 12:43:18 -07001155
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001156 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001157 free_leaf_info(li);
Stephen Hemminger387a5482008-04-10 03:47:34 -07001158 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001159 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001160 }
1161
Stephen Hemminger06801912007-08-10 15:22:13 -07001162 node_set_parent((struct node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001163
Olof Johansson91b9a272005-08-09 20:24:39 -07001164 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001165 put_child(t, tn, missbit, (struct node *)l);
1166 put_child(t, tn, 1-missbit, n);
1167
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001168 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001169 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001170 put_child(t, (struct tnode *)tp, cindex,
1171 (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001172 } else {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001173 rcu_assign_pointer(t->trie, (struct node *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001174 tp = tn;
1175 }
1176 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001177
1178 if (tp && tp->pos + tp->bits > 32)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001179 pr_warning("fib_trie"
1180 " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1181 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001182
Robert Olsson19baf832005-06-21 12:43:18 -07001183 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001184
Jarek Poplawski7b855762009-06-18 00:28:51 -07001185 trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001186done:
Robert Olsson19baf832005-06-21 12:43:18 -07001187 return fa_head;
1188}
1189
Robert Olssond562f1f2007-03-26 14:22:22 -07001190/*
1191 * Caller must hold RTNL.
1192 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001193static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001194{
1195 struct trie *t = (struct trie *) tb->tb_data;
1196 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001197 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001198 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001199 int plen = cfg->fc_dst_len;
1200 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001201 u32 key, mask;
1202 int err;
1203 struct leaf *l;
1204
1205 if (plen > 32)
1206 return -EINVAL;
1207
Thomas Graf4e902c52006-08-17 18:14:52 -07001208 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001209
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001210 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001211
Olof Johansson91b9a272005-08-09 20:24:39 -07001212 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001213
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001214 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001215 return -EINVAL;
1216
1217 key = key & mask;
1218
Thomas Graf4e902c52006-08-17 18:14:52 -07001219 fi = fib_create_info(cfg);
1220 if (IS_ERR(fi)) {
1221 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001222 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001223 }
Robert Olsson19baf832005-06-21 12:43:18 -07001224
1225 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001226 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001227
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001228 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001229 fa_head = get_fa_head(l, plen);
1230 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1231 }
1232
1233 /* Now fa, if non-NULL, points to the first fib alias
1234 * with the same keys [prefix,tos,priority], if such key already
1235 * exists or to the node before which we will insert new one.
1236 *
1237 * If fa is NULL, we will need to allocate a new one and
1238 * insert to the head of f.
1239 *
1240 * If f is NULL, no fib node matched the destination key
1241 * and we need to allocate a new one of those as well.
1242 */
1243
Julian Anastasov936f6f82008-01-28 21:18:06 -08001244 if (fa && fa->fa_tos == tos &&
1245 fa->fa_info->fib_priority == fi->fib_priority) {
1246 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001247
1248 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001249 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001250 goto out;
1251
Julian Anastasov936f6f82008-01-28 21:18:06 -08001252 /* We have 2 goals:
1253 * 1. Find exact match for type, scope, fib_info to avoid
1254 * duplicate routes
1255 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1256 */
1257 fa_match = NULL;
1258 fa_first = fa;
1259 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1260 list_for_each_entry_continue(fa, fa_head, fa_list) {
1261 if (fa->fa_tos != tos)
1262 break;
1263 if (fa->fa_info->fib_priority != fi->fib_priority)
1264 break;
1265 if (fa->fa_type == cfg->fc_type &&
1266 fa->fa_scope == cfg->fc_scope &&
1267 fa->fa_info == fi) {
1268 fa_match = fa;
1269 break;
1270 }
1271 }
1272
Thomas Graf4e902c52006-08-17 18:14:52 -07001273 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001274 struct fib_info *fi_drop;
1275 u8 state;
1276
Julian Anastasov936f6f82008-01-28 21:18:06 -08001277 fa = fa_first;
1278 if (fa_match) {
1279 if (fa == fa_match)
1280 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001281 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001282 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001283 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001284 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001285 if (new_fa == NULL)
1286 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001287
1288 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001289 new_fa->fa_tos = fa->fa_tos;
1290 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001291 new_fa->fa_type = cfg->fc_type;
1292 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001293 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001294 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001295
Robert Olsson2373ce12005-08-25 13:01:29 -07001296 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1297 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001298
1299 fib_release_info(fi_drop);
1300 if (state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001301 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Milan Kocianb8f55832007-05-23 14:55:06 -07001302 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1303 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001304
Olof Johansson91b9a272005-08-09 20:24:39 -07001305 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001306 }
1307 /* Error if we find a perfect match which
1308 * uses the same scope, type, and nexthop
1309 * information.
1310 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001311 if (fa_match)
1312 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001313
Thomas Graf4e902c52006-08-17 18:14:52 -07001314 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001315 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001316 }
1317 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001318 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001319 goto out;
1320
1321 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001322 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001323 if (new_fa == NULL)
1324 goto out;
1325
1326 new_fa->fa_info = fi;
1327 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001328 new_fa->fa_type = cfg->fc_type;
1329 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001330 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001331 /*
1332 * Insert new entry to the list.
1333 */
1334
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001335 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001336 fa_head = fib_insert_node(t, key, plen);
1337 if (unlikely(!fa_head)) {
1338 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001339 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001340 }
Robert Olssonf835e472005-06-28 15:00:39 -07001341 }
Robert Olsson19baf832005-06-21 12:43:18 -07001342
Robert Olsson2373ce12005-08-25 13:01:29 -07001343 list_add_tail_rcu(&new_fa->fa_list,
1344 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001345
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001346 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001347 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001348 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001349succeeded:
1350 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001351
1352out_free_new_fa:
1353 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001354out:
1355 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001356err:
Robert Olsson19baf832005-06-21 12:43:18 -07001357 return err;
1358}
1359
Robert Olsson772cb712005-09-19 15:31:18 -07001360/* should be called with rcu_read_lock */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001361static int check_leaf(struct trie *t, struct leaf *l,
1362 t_key key, const struct flowi *flp,
1363 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001364{
Robert Olsson19baf832005-06-21 12:43:18 -07001365 struct leaf_info *li;
1366 struct hlist_head *hhead = &l->list;
1367 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001368
Robert Olsson2373ce12005-08-25 13:01:29 -07001369 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001370 int err;
1371 int plen = li->plen;
1372 __be32 mask = inet_make_mask(plen);
1373
Al Viro888454c2006-09-19 13:42:46 -07001374 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001375 continue;
1376
Rami Rosene204a342009-05-18 01:19:12 +00001377 err = fib_semantic_match(&li->falh, flp, res, plen);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001378
Robert Olsson19baf832005-06-21 12:43:18 -07001379#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001380 if (err <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001381 t->stats.semantic_match_passed++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001382 else
1383 t->stats.semantic_match_miss++;
Robert Olsson19baf832005-06-21 12:43:18 -07001384#endif
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001385 if (err <= 0)
Ben Hutchings2e655572008-07-10 16:52:52 -07001386 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001387 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001388
Ben Hutchings2e655572008-07-10 16:52:52 -07001389 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001390}
1391
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001392static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1393 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001394{
1395 struct trie *t = (struct trie *) tb->tb_data;
Ben Hutchings2e655572008-07-10 16:52:52 -07001396 int ret;
Robert Olsson19baf832005-06-21 12:43:18 -07001397 struct node *n;
1398 struct tnode *pn;
1399 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001400 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001401 int chopped_off;
1402 t_key cindex = 0;
1403 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001404 struct tnode *cn;
1405 t_key node_prefix, key_prefix, pref_mismatch;
1406 int mp;
1407
Robert Olsson2373ce12005-08-25 13:01:29 -07001408 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001409
Robert Olsson2373ce12005-08-25 13:01:29 -07001410 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001411 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001412 goto failed;
1413
1414#ifdef CONFIG_IP_FIB_TRIE_STATS
1415 t->stats.gets++;
1416#endif
1417
1418 /* Just a leaf? */
1419 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001420 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001421 goto found;
Robert Olsson19baf832005-06-21 12:43:18 -07001422 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001423
Robert Olsson19baf832005-06-21 12:43:18 -07001424 pn = (struct tnode *) n;
1425 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001426
Olof Johansson91b9a272005-08-09 20:24:39 -07001427 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001428 pos = pn->pos;
1429 bits = pn->bits;
1430
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001431 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001432 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1433 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001434
1435 n = tnode_get_child(pn, cindex);
1436
1437 if (n == NULL) {
1438#ifdef CONFIG_IP_FIB_TRIE_STATS
1439 t->stats.null_node_hit++;
1440#endif
1441 goto backtrace;
1442 }
1443
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001444 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001445 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1446 if (ret > 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07001447 goto backtrace;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001448 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001449 }
1450
Olof Johansson91b9a272005-08-09 20:24:39 -07001451 cn = (struct tnode *)n;
1452
1453 /*
1454 * It's a tnode, and we can do some extra checks here if we
1455 * like, to avoid descending into a dead-end branch.
1456 * This tnode is in the parent's child array at index
1457 * key[p_pos..p_pos+p_bits] but potentially with some bits
1458 * chopped off, so in reality the index may be just a
1459 * subprefix, padded with zero at the end.
1460 * We can also take a look at any skipped bits in this
1461 * tnode - everything up to p_pos is supposed to be ok,
1462 * and the non-chopped bits of the index (se previous
1463 * paragraph) are also guaranteed ok, but the rest is
1464 * considered unknown.
1465 *
1466 * The skipped bits are key[pos+bits..cn->pos].
1467 */
1468
1469 /* If current_prefix_length < pos+bits, we are already doing
1470 * actual prefix matching, which means everything from
1471 * pos+(bits-chopped_off) onward must be zero along some
1472 * branch of this subtree - otherwise there is *no* valid
1473 * prefix present. Here we can only check the skipped
1474 * bits. Remember, since we have already indexed into the
1475 * parent's child array, we know that the bits we chopped of
1476 * *are* zero.
1477 */
1478
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001479 /* NOTA BENE: Checking only skipped bits
1480 for the new node here */
Olof Johansson91b9a272005-08-09 20:24:39 -07001481
1482 if (current_prefix_length < pos+bits) {
1483 if (tkey_extract_bits(cn->key, current_prefix_length,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001484 cn->pos - current_prefix_length)
1485 || !(cn->child[0]))
Olof Johansson91b9a272005-08-09 20:24:39 -07001486 goto backtrace;
1487 }
1488
1489 /*
1490 * If chopped_off=0, the index is fully validated and we
1491 * only need to look at the skipped bits for this, the new,
1492 * tnode. What we actually want to do is to find out if
1493 * these skipped bits match our key perfectly, or if we will
1494 * have to count on finding a matching prefix further down,
1495 * because if we do, we would like to have some way of
1496 * verifying the existence of such a prefix at this point.
1497 */
1498
1499 /* The only thing we can do at this point is to verify that
1500 * any such matching prefix can indeed be a prefix to our
1501 * key, and if the bits in the node we are inspecting that
1502 * do not match our key are not ZERO, this cannot be true.
1503 * Thus, find out where there is a mismatch (before cn->pos)
1504 * and verify that all the mismatching bits are zero in the
1505 * new tnode's key.
1506 */
1507
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001508 /*
1509 * Note: We aren't very concerned about the piece of
1510 * the key that precede pn->pos+pn->bits, since these
1511 * have already been checked. The bits after cn->pos
1512 * aren't checked since these are by definition
1513 * "unknown" at this point. Thus, what we want to see
1514 * is if we are about to enter the "prefix matching"
1515 * state, and in that case verify that the skipped
1516 * bits that will prevail throughout this subtree are
1517 * zero, as they have to be if we are to find a
1518 * matching prefix.
Olof Johansson91b9a272005-08-09 20:24:39 -07001519 */
1520
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001521 node_prefix = mask_pfx(cn->key, cn->pos);
1522 key_prefix = mask_pfx(key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001523 pref_mismatch = key_prefix^node_prefix;
1524 mp = 0;
1525
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001526 /*
1527 * In short: If skipped bits in this node do not match
1528 * the search key, enter the "prefix matching"
1529 * state.directly.
Olof Johansson91b9a272005-08-09 20:24:39 -07001530 */
1531 if (pref_mismatch) {
1532 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1533 mp++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001534 pref_mismatch = pref_mismatch << 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001535 }
1536 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1537
1538 if (key_prefix != 0)
1539 goto backtrace;
1540
1541 if (current_prefix_length >= cn->pos)
1542 current_prefix_length = mp;
1543 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001544
Olof Johansson91b9a272005-08-09 20:24:39 -07001545 pn = (struct tnode *)n; /* Descend */
1546 chopped_off = 0;
1547 continue;
1548
Robert Olsson19baf832005-06-21 12:43:18 -07001549backtrace:
1550 chopped_off++;
1551
1552 /* As zero don't change the child key (cindex) */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001553 while ((chopped_off <= pn->bits)
1554 && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001555 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001556
1557 /* Decrease current_... with bits chopped off */
1558 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001559 current_prefix_length = pn->pos + pn->bits
1560 - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001561
Robert Olsson19baf832005-06-21 12:43:18 -07001562 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001563 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001564 * chopped off all bits in this tnode walk up to our parent.
1565 */
1566
Olof Johansson91b9a272005-08-09 20:24:39 -07001567 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001568 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001569 } else {
Stephen Hemminger06801912007-08-10 15:22:13 -07001570 struct tnode *parent = node_parent((struct node *) pn);
1571 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001572 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001573
Robert Olsson19baf832005-06-21 12:43:18 -07001574 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001575 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1576 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001577 chopped_off = 0;
1578
1579#ifdef CONFIG_IP_FIB_TRIE_STATS
1580 t->stats.backtrack++;
1581#endif
1582 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001583 }
Robert Olsson19baf832005-06-21 12:43:18 -07001584 }
1585failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001586 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001587found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001588 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001589 return ret;
1590}
1591
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001592/*
1593 * Remove the leaf and return parent.
1594 */
1595static void trie_leaf_remove(struct trie *t, struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001596{
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001597 struct tnode *tp = node_parent((struct node *) l);
Robert Olsson19baf832005-06-21 12:43:18 -07001598
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001599 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001600
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001601 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001602 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001603 put_child(t, (struct tnode *)tp, cindex, NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001604 trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001605 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001606 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001607
Stephen Hemminger387a5482008-04-10 03:47:34 -07001608 free_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001609}
1610
Robert Olssond562f1f2007-03-26 14:22:22 -07001611/*
1612 * Caller must hold RTNL.
1613 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001614static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001615{
1616 struct trie *t = (struct trie *) tb->tb_data;
1617 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001618 int plen = cfg->fc_dst_len;
1619 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001620 struct fib_alias *fa, *fa_to_delete;
1621 struct list_head *fa_head;
1622 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001623 struct leaf_info *li;
1624
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001625 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001626 return -EINVAL;
1627
Thomas Graf4e902c52006-08-17 18:14:52 -07001628 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001629 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001630
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001631 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001632 return -EINVAL;
1633
1634 key = key & mask;
1635 l = fib_find_node(t, key);
1636
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001637 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001638 return -ESRCH;
1639
1640 fa_head = get_fa_head(l, plen);
1641 fa = fib_find_alias(fa_head, tos, 0);
1642
1643 if (!fa)
1644 return -ESRCH;
1645
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001646 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001647
1648 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001649 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1650 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001651 struct fib_info *fi = fa->fa_info;
1652
1653 if (fa->fa_tos != tos)
1654 break;
1655
Thomas Graf4e902c52006-08-17 18:14:52 -07001656 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1657 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1658 fa->fa_scope == cfg->fc_scope) &&
1659 (!cfg->fc_protocol ||
1660 fi->fib_protocol == cfg->fc_protocol) &&
1661 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001662 fa_to_delete = fa;
1663 break;
1664 }
1665 }
1666
Olof Johansson91b9a272005-08-09 20:24:39 -07001667 if (!fa_to_delete)
1668 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001669
Olof Johansson91b9a272005-08-09 20:24:39 -07001670 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001671 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001672 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001673
Olof Johansson91b9a272005-08-09 20:24:39 -07001674 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001675 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001676
Robert Olsson2373ce12005-08-25 13:01:29 -07001677 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001678
Olof Johansson91b9a272005-08-09 20:24:39 -07001679 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001680 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001681 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001682 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001683
1684 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001685 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001686
1687 if (fa->fa_state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001688 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001689
Robert Olsson2373ce12005-08-25 13:01:29 -07001690 fib_release_info(fa->fa_info);
1691 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001692 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001693}
1694
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001695static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001696{
1697 struct fib_alias *fa, *fa_node;
1698 int found = 0;
1699
1700 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1701 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001702
Robert Olsson2373ce12005-08-25 13:01:29 -07001703 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1704 list_del_rcu(&fa->fa_list);
1705 fib_release_info(fa->fa_info);
1706 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001707 found++;
1708 }
1709 }
1710 return found;
1711}
1712
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001713static int trie_flush_leaf(struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001714{
1715 int found = 0;
1716 struct hlist_head *lih = &l->list;
1717 struct hlist_node *node, *tmp;
1718 struct leaf_info *li = NULL;
1719
1720 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001721 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001722
1723 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001724 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001725 free_leaf_info(li);
1726 }
1727 }
1728 return found;
1729}
1730
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001731/*
1732 * Scan for the next right leaf starting at node p->child[idx]
1733 * Since we have back pointer, no recursion necessary.
1734 */
1735static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001736{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001737 do {
1738 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001739
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001740 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001741 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001742 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001743 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001744
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001745 while (idx < 1u << p->bits) {
1746 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001747 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001748 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001749
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001750 if (IS_LEAF(c)) {
1751 prefetch(p->child[idx]);
1752 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001753 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001754
1755 /* Rescan start scanning in new node */
1756 p = (struct tnode *) c;
1757 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001758 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001759
1760 /* Node empty, walk back up to parent */
Olof Johansson91b9a272005-08-09 20:24:39 -07001761 c = (struct node *) p;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001762 } while ( (p = node_parent_rcu(c)) != NULL);
1763
1764 return NULL; /* Root of trie */
1765}
1766
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001767static struct leaf *trie_firstleaf(struct trie *t)
1768{
1769 struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
1770
1771 if (!n)
1772 return NULL;
1773
1774 if (IS_LEAF(n)) /* trie is just a leaf */
1775 return (struct leaf *) n;
1776
1777 return leaf_walk_rcu(n, NULL);
1778}
1779
1780static struct leaf *trie_nextleaf(struct leaf *l)
1781{
1782 struct node *c = (struct node *) l;
1783 struct tnode *p = node_parent(c);
1784
1785 if (!p)
1786 return NULL; /* trie with just one leaf */
1787
1788 return leaf_walk_rcu(p, c);
Robert Olsson19baf832005-06-21 12:43:18 -07001789}
1790
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001791static struct leaf *trie_leafindex(struct trie *t, int index)
1792{
1793 struct leaf *l = trie_firstleaf(t);
1794
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001795 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001796 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001797
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001798 return l;
1799}
1800
1801
Robert Olssond562f1f2007-03-26 14:22:22 -07001802/*
1803 * Caller must hold RTNL.
1804 */
Robert Olsson19baf832005-06-21 12:43:18 -07001805static int fn_trie_flush(struct fib_table *tb)
1806{
1807 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001808 struct leaf *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001809 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001810
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001811 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001812 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001813
1814 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001815 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001816 ll = l;
1817 }
1818
1819 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001820 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001821
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001822 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001823 return found;
1824}
1825
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001826static void fn_trie_select_default(struct fib_table *tb,
1827 const struct flowi *flp,
1828 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001829{
1830 struct trie *t = (struct trie *) tb->tb_data;
1831 int order, last_idx;
1832 struct fib_info *fi = NULL;
1833 struct fib_info *last_resort;
1834 struct fib_alias *fa = NULL;
1835 struct list_head *fa_head;
1836 struct leaf *l;
1837
1838 last_idx = -1;
1839 last_resort = NULL;
1840 order = -1;
1841
Robert Olsson2373ce12005-08-25 13:01:29 -07001842 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001843
Robert Olsson19baf832005-06-21 12:43:18 -07001844 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001845 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001846 goto out;
1847
1848 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001849 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001850 goto out;
1851
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001852 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001853 goto out;
1854
Robert Olsson2373ce12005-08-25 13:01:29 -07001855 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001856 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001857
Robert Olsson19baf832005-06-21 12:43:18 -07001858 if (fa->fa_scope != res->scope ||
1859 fa->fa_type != RTN_UNICAST)
1860 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001861
Robert Olsson19baf832005-06-21 12:43:18 -07001862 if (next_fi->fib_priority > res->fi->fib_priority)
1863 break;
1864 if (!next_fi->fib_nh[0].nh_gw ||
1865 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1866 continue;
1867 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001868
Robert Olsson19baf832005-06-21 12:43:18 -07001869 if (fi == NULL) {
1870 if (next_fi != res->fi)
1871 break;
1872 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -08001873 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001874 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001875 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001876 goto out;
1877 }
1878 fi = next_fi;
1879 order++;
1880 }
1881 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -08001882 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001883 goto out;
1884 }
1885
Denis V. Lunev971b8932007-12-08 00:32:23 -08001886 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1887 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001888 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001889 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001890 goto out;
1891 }
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001892 if (last_idx >= 0)
1893 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001894 tb->tb_default = last_idx;
1895out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001896 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001897}
1898
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001899static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1900 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001901 struct sk_buff *skb, struct netlink_callback *cb)
1902{
1903 int i, s_i;
1904 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001905 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001906
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001907 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001908 i = 0;
1909
Robert Olsson2373ce12005-08-25 13:01:29 -07001910 /* rcu_read_lock is hold by caller */
1911
1912 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001913 if (i < s_i) {
1914 i++;
1915 continue;
1916 }
Robert Olsson19baf832005-06-21 12:43:18 -07001917
1918 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1919 cb->nlh->nlmsg_seq,
1920 RTM_NEWROUTE,
1921 tb->tb_id,
1922 fa->fa_type,
1923 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001924 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001925 plen,
1926 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001927 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001928 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001929 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001930 }
Robert Olsson19baf832005-06-21 12:43:18 -07001931 i++;
1932 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001933 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001934 return skb->len;
1935}
1936
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001937static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1938 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001939{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001940 struct leaf_info *li;
1941 struct hlist_node *node;
1942 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001943
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001944 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001945 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001946
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001947 /* rcu_read_lock is hold by caller */
1948 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1949 if (i < s_i) {
1950 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001951 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001952 }
Robert Olsson19baf832005-06-21 12:43:18 -07001953
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001954 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001955 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001956
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001957 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001958 continue;
1959
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001960 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001961 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001962 return -1;
1963 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001964 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001965 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001966
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001967 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001968 return skb->len;
1969}
1970
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001971static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1972 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001973{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001974 struct leaf *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001975 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001976 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001977 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001978
Robert Olsson2373ce12005-08-25 13:01:29 -07001979 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001980 /* Dump starting at last key.
1981 * Note: 0.0.0.0/0 (ie default) is first key.
1982 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001983 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001984 l = trie_firstleaf(t);
1985 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001986 /* Normally, continue from last key, but if that is missing
1987 * fallback to using slow rescan
1988 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001989 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001990 if (!l)
1991 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001992 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001993
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001994 while (l) {
1995 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001996 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001997 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001998 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001999 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002000 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002001
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002002 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002003 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002004 memset(&cb->args[4], 0,
2005 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07002006 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002007 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07002008 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002009
Robert Olsson19baf832005-06-21 12:43:18 -07002010 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07002011}
2012
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002013void __init fib_hash_init(void)
2014{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002015 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2016 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08002017 0, SLAB_PANIC, NULL);
2018
2019 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2020 max(sizeof(struct leaf),
2021 sizeof(struct leaf_info)),
2022 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002023}
Robert Olsson19baf832005-06-21 12:43:18 -07002024
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002025
2026/* Fix more generic FIB names for init later */
2027struct fib_table *fib_hash_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07002028{
2029 struct fib_table *tb;
2030 struct trie *t;
2031
Robert Olsson19baf832005-06-21 12:43:18 -07002032 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
2033 GFP_KERNEL);
2034 if (tb == NULL)
2035 return NULL;
2036
2037 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08002038 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002039 tb->tb_lookup = fn_trie_lookup;
2040 tb->tb_insert = fn_trie_insert;
2041 tb->tb_delete = fn_trie_delete;
2042 tb->tb_flush = fn_trie_flush;
2043 tb->tb_select_default = fn_trie_select_default;
2044 tb->tb_dump = fn_trie_dump;
Robert Olsson19baf832005-06-21 12:43:18 -07002045
2046 t = (struct trie *) tb->tb_data;
Stephen Hemmingerc28a1cf2008-01-12 20:49:13 -08002047 memset(t, 0, sizeof(*t));
Robert Olsson19baf832005-06-21 12:43:18 -07002048
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002049 if (id == RT_TABLE_LOCAL)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002050 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07002051
2052 return tb;
2053}
2054
Robert Olsson19baf832005-06-21 12:43:18 -07002055#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002056/* Depth first Trie walk iterator */
2057struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002058 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002059 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002060 struct tnode *tnode;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002061 unsigned index;
2062 unsigned depth;
2063};
Robert Olsson19baf832005-06-21 12:43:18 -07002064
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002065static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07002066{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002067 struct tnode *tn = iter->tnode;
2068 unsigned cindex = iter->index;
2069 struct tnode *p;
2070
Eric W. Biederman6640e692007-01-24 14:42:04 -08002071 /* A single entry routing table */
2072 if (!tn)
2073 return NULL;
2074
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002075 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2076 iter->tnode, iter->index, iter->depth);
2077rescan:
2078 while (cindex < (1<<tn->bits)) {
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002079 struct node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002080
2081 if (n) {
2082 if (IS_LEAF(n)) {
2083 iter->tnode = tn;
2084 iter->index = cindex + 1;
2085 } else {
2086 /* push down one level */
2087 iter->tnode = (struct tnode *) n;
2088 iter->index = 0;
2089 ++iter->depth;
2090 }
2091 return n;
2092 }
2093
2094 ++cindex;
2095 }
2096
2097 /* Current node exhausted, pop back up */
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002098 p = node_parent_rcu((struct node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002099 if (p) {
2100 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2101 tn = p;
2102 --iter->depth;
2103 goto rescan;
2104 }
2105
2106 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002107 return NULL;
2108}
2109
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002110static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2111 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002112{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002113 struct node *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002114
Stephen Hemminger132adf52007-03-08 20:44:43 -08002115 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002116 return NULL;
2117
2118 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002119 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002120 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002121
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002122 if (IS_TNODE(n)) {
2123 iter->tnode = (struct tnode *) n;
2124 iter->index = 0;
2125 iter->depth = 1;
2126 } else {
2127 iter->tnode = NULL;
2128 iter->index = 0;
2129 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002130 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002131
2132 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002133}
2134
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002135static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002136{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002137 struct node *n;
2138 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002139
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002140 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002141
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002142 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002143 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002144 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08002145 struct leaf *l = (struct leaf *)n;
2146 struct leaf_info *li;
2147 struct hlist_node *tmp;
2148
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002149 s->leaves++;
2150 s->totdepth += iter.depth;
2151 if (iter.depth > s->maxdepth)
2152 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08002153
2154 hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
2155 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002156 } else {
2157 const struct tnode *tn = (const struct tnode *) n;
2158 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002159
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002160 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002161 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002162 s->nodesizes[tn->bits]++;
2163
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002164 for (i = 0; i < (1<<tn->bits); i++)
2165 if (!tn->child[i])
2166 s->nullpointers++;
2167 }
2168 }
2169 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002170}
2171
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002172/*
Robert Olsson19baf832005-06-21 12:43:18 -07002173 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002174 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002175static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002176{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002177 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002178
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002179 if (stat->leaves)
2180 avdepth = stat->totdepth*100 / stat->leaves;
2181 else
2182 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002183
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002184 seq_printf(seq, "\tAver depth: %u.%02d\n",
2185 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002186 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002187
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002188 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002189 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08002190
2191 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2192 bytes += sizeof(struct leaf_info) * stat->prefixes;
2193
Stephen Hemminger187b5182008-01-12 20:55:55 -08002194 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002195 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002196
Robert Olsson06ef9212006-03-20 21:35:01 -08002197 max = MAX_STAT_DEPTH;
2198 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002199 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002200
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002201 pointers = 0;
2202 for (i = 1; i <= max; i++)
2203 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002204 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002205 pointers += (1<<i) * stat->nodesizes[i];
2206 }
2207 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002208 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002209
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002210 bytes += sizeof(struct node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002211 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2212 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002213}
Robert Olsson19baf832005-06-21 12:43:18 -07002214
2215#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002216static void trie_show_usage(struct seq_file *seq,
2217 const struct trie_use_stats *stats)
2218{
2219 seq_printf(seq, "\nCounters:\n---------\n");
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002220 seq_printf(seq, "gets = %u\n", stats->gets);
2221 seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2222 seq_printf(seq, "semantic match passed = %u\n",
2223 stats->semantic_match_passed);
2224 seq_printf(seq, "semantic match miss = %u\n",
2225 stats->semantic_match_miss);
2226 seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2227 seq_printf(seq, "skipped node resize = %u\n\n",
2228 stats->resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002229}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002230#endif /* CONFIG_IP_FIB_TRIE_STATS */
2231
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002232static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002233{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002234 if (tb->tb_id == RT_TABLE_LOCAL)
2235 seq_puts(seq, "Local:\n");
2236 else if (tb->tb_id == RT_TABLE_MAIN)
2237 seq_puts(seq, "Main:\n");
2238 else
2239 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002240}
Robert Olsson19baf832005-06-21 12:43:18 -07002241
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002242
Robert Olsson19baf832005-06-21 12:43:18 -07002243static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2244{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002245 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002246 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002247
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002248 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002249 "Basic info: size of leaf:"
2250 " %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002251 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002252
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002253 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2254 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2255 struct hlist_node *node;
2256 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002257
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002258 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2259 struct trie *t = (struct trie *) tb->tb_data;
2260 struct trie_stat stat;
2261
2262 if (!t)
2263 continue;
2264
2265 fib_table_print(seq, tb);
2266
2267 trie_collect_stats(t, &stat);
2268 trie_show_stats(seq, &stat);
2269#ifdef CONFIG_IP_FIB_TRIE_STATS
2270 trie_show_usage(seq, &t->stats);
2271#endif
2272 }
2273 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002274
Robert Olsson19baf832005-06-21 12:43:18 -07002275 return 0;
2276}
2277
Robert Olsson19baf832005-06-21 12:43:18 -07002278static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2279{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002280 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002281}
2282
Arjan van de Ven9a321442007-02-12 00:55:35 -08002283static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002284 .owner = THIS_MODULE,
2285 .open = fib_triestat_seq_open,
2286 .read = seq_read,
2287 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002288 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002289};
2290
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002291static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002292{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002293 struct fib_trie_iter *iter = seq->private;
2294 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002295 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002296 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002297
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002298 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2299 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2300 struct hlist_node *node;
2301 struct fib_table *tb;
2302
2303 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2304 struct node *n;
2305
2306 for (n = fib_trie_get_first(iter,
2307 (struct trie *) tb->tb_data);
2308 n; n = fib_trie_get_next(iter))
2309 if (pos == idx++) {
2310 iter->tb = tb;
2311 return n;
2312 }
2313 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002314 }
Robert Olsson19baf832005-06-21 12:43:18 -07002315
Robert Olsson19baf832005-06-21 12:43:18 -07002316 return NULL;
2317}
2318
2319static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002320 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002321{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002322 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002323 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002324}
2325
2326static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2327{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002328 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002329 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002330 struct fib_table *tb = iter->tb;
2331 struct hlist_node *tb_node;
2332 unsigned int h;
2333 struct node *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002334
Robert Olsson19baf832005-06-21 12:43:18 -07002335 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002336 /* next node in same table */
2337 n = fib_trie_get_next(iter);
2338 if (n)
2339 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002340
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002341 /* walk rest of this hash chain */
2342 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2343 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
2344 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2345 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2346 if (n)
2347 goto found;
2348 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002349
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002350 /* new hash chain */
2351 while (++h < FIB_TABLE_HASHSZ) {
2352 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2353 hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
2354 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2355 if (n)
2356 goto found;
2357 }
2358 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002359 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002360
2361found:
2362 iter->tb = tb;
2363 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002364}
2365
2366static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002367 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002368{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002369 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002370}
2371
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002372static void seq_indent(struct seq_file *seq, int n)
2373{
2374 while (n-- > 0) seq_puts(seq, " ");
2375}
Robert Olsson19baf832005-06-21 12:43:18 -07002376
Eric Dumazet28d36e32008-01-14 23:09:56 -08002377static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002378{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002379 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002380 case RT_SCOPE_UNIVERSE: return "universe";
2381 case RT_SCOPE_SITE: return "site";
2382 case RT_SCOPE_LINK: return "link";
2383 case RT_SCOPE_HOST: return "host";
2384 case RT_SCOPE_NOWHERE: return "nowhere";
2385 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002386 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002387 return buf;
2388 }
2389}
2390
2391static const char *rtn_type_names[__RTN_MAX] = {
2392 [RTN_UNSPEC] = "UNSPEC",
2393 [RTN_UNICAST] = "UNICAST",
2394 [RTN_LOCAL] = "LOCAL",
2395 [RTN_BROADCAST] = "BROADCAST",
2396 [RTN_ANYCAST] = "ANYCAST",
2397 [RTN_MULTICAST] = "MULTICAST",
2398 [RTN_BLACKHOLE] = "BLACKHOLE",
2399 [RTN_UNREACHABLE] = "UNREACHABLE",
2400 [RTN_PROHIBIT] = "PROHIBIT",
2401 [RTN_THROW] = "THROW",
2402 [RTN_NAT] = "NAT",
2403 [RTN_XRESOLVE] = "XRESOLVE",
2404};
2405
Eric Dumazet28d36e32008-01-14 23:09:56 -08002406static inline const char *rtn_type(char *buf, size_t len, unsigned t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002407{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002408 if (t < __RTN_MAX && rtn_type_names[t])
2409 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002410 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002411 return buf;
2412}
2413
2414/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002415static int fib_trie_seq_show(struct seq_file *seq, void *v)
2416{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002417 const struct fib_trie_iter *iter = seq->private;
2418 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002419
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002420 if (!node_parent_rcu(n))
2421 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002422
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002423 if (IS_TNODE(n)) {
2424 struct tnode *tn = (struct tnode *) n;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07002425 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002426
Robert Olsson1d25cd62005-09-19 15:29:52 -07002427 seq_indent(seq, iter->depth-1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002428 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
2429 &prf, tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002430 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002431
Olof Johansson91b9a272005-08-09 20:24:39 -07002432 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002433 struct leaf *l = (struct leaf *) n;
Stephen Hemminger13280422008-01-22 21:54:37 -08002434 struct leaf_info *li;
2435 struct hlist_node *node;
Al Viro32ab5f82006-09-26 22:21:45 -07002436 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002437
2438 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002439 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002440
Stephen Hemminger13280422008-01-22 21:54:37 -08002441 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2442 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002443
Stephen Hemminger13280422008-01-22 21:54:37 -08002444 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2445 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002446
Stephen Hemminger13280422008-01-22 21:54:37 -08002447 seq_indent(seq, iter->depth+1);
2448 seq_printf(seq, " /%d %s %s", li->plen,
2449 rtn_scope(buf1, sizeof(buf1),
2450 fa->fa_scope),
2451 rtn_type(buf2, sizeof(buf2),
2452 fa->fa_type));
2453 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002454 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002455 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002456 }
2457 }
Robert Olsson19baf832005-06-21 12:43:18 -07002458 }
2459
2460 return 0;
2461}
2462
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002463static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002464 .start = fib_trie_seq_start,
2465 .next = fib_trie_seq_next,
2466 .stop = fib_trie_seq_stop,
2467 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002468};
2469
2470static int fib_trie_seq_open(struct inode *inode, struct file *file)
2471{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002472 return seq_open_net(inode, file, &fib_trie_seq_ops,
2473 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002474}
2475
Arjan van de Ven9a321442007-02-12 00:55:35 -08002476static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002477 .owner = THIS_MODULE,
2478 .open = fib_trie_seq_open,
2479 .read = seq_read,
2480 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002481 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002482};
2483
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002484struct fib_route_iter {
2485 struct seq_net_private p;
2486 struct trie *main_trie;
2487 loff_t pos;
2488 t_key key;
2489};
2490
2491static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
2492{
2493 struct leaf *l = NULL;
2494 struct trie *t = iter->main_trie;
2495
2496 /* use cache location of last found key */
2497 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2498 pos -= iter->pos;
2499 else {
2500 iter->pos = 0;
2501 l = trie_firstleaf(t);
2502 }
2503
2504 while (l && pos-- > 0) {
2505 iter->pos++;
2506 l = trie_nextleaf(l);
2507 }
2508
2509 if (l)
2510 iter->key = pos; /* remember it */
2511 else
2512 iter->pos = 0; /* forget it */
2513
2514 return l;
2515}
2516
2517static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2518 __acquires(RCU)
2519{
2520 struct fib_route_iter *iter = seq->private;
2521 struct fib_table *tb;
2522
2523 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002524 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002525 if (!tb)
2526 return NULL;
2527
2528 iter->main_trie = (struct trie *) tb->tb_data;
2529 if (*pos == 0)
2530 return SEQ_START_TOKEN;
2531 else
2532 return fib_route_get_idx(iter, *pos - 1);
2533}
2534
2535static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2536{
2537 struct fib_route_iter *iter = seq->private;
2538 struct leaf *l = v;
2539
2540 ++*pos;
2541 if (v == SEQ_START_TOKEN) {
2542 iter->pos = 0;
2543 l = trie_firstleaf(iter->main_trie);
2544 } else {
2545 iter->pos++;
2546 l = trie_nextleaf(l);
2547 }
2548
2549 if (l)
2550 iter->key = l->key;
2551 else
2552 iter->pos = 0;
2553 return l;
2554}
2555
2556static void fib_route_seq_stop(struct seq_file *seq, void *v)
2557 __releases(RCU)
2558{
2559 rcu_read_unlock();
2560}
2561
Al Viro32ab5f82006-09-26 22:21:45 -07002562static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002563{
2564 static unsigned type2flags[RTN_MAX + 1] = {
2565 [7] = RTF_REJECT, [8] = RTF_REJECT,
2566 };
2567 unsigned flags = type2flags[type];
2568
2569 if (fi && fi->fib_nh->nh_gw)
2570 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002571 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002572 flags |= RTF_HOST;
2573 flags |= RTF_UP;
2574 return flags;
2575}
2576
2577/*
2578 * This outputs /proc/net/route.
2579 * The format of the file is not supposed to be changed
2580 * and needs to be same as fib_hash output to avoid breaking
2581 * legacy utilities
2582 */
2583static int fib_route_seq_show(struct seq_file *seq, void *v)
2584{
2585 struct leaf *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002586 struct leaf_info *li;
2587 struct hlist_node *node;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002588
2589 if (v == SEQ_START_TOKEN) {
2590 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2591 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2592 "\tWindow\tIRTT");
2593 return 0;
2594 }
2595
Stephen Hemminger13280422008-01-22 21:54:37 -08002596 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002597 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002598 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002599
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002600 mask = inet_make_mask(li->plen);
2601 prefix = htonl(l->key);
2602
2603 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002604 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002605 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002606 int len;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002607
2608 if (fa->fa_type == RTN_BROADCAST
2609 || fa->fa_type == RTN_MULTICAST)
2610 continue;
2611
2612 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002613 seq_printf(seq,
2614 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2615 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002616 fi->fib_dev ? fi->fib_dev->name : "*",
2617 prefix,
2618 fi->fib_nh->nh_gw, flags, 0, 0,
2619 fi->fib_priority,
2620 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002621 (fi->fib_advmss ?
2622 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002623 fi->fib_window,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002624 fi->fib_rtt >> 3, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002625 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002626 seq_printf(seq,
2627 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2628 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002629 prefix, 0, flags, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002630 mask, 0, 0, 0, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002631
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002632 seq_printf(seq, "%*s\n", 127 - len, "");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002633 }
2634 }
2635
2636 return 0;
2637}
2638
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002639static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002640 .start = fib_route_seq_start,
2641 .next = fib_route_seq_next,
2642 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002643 .show = fib_route_seq_show,
2644};
2645
2646static int fib_route_seq_open(struct inode *inode, struct file *file)
2647{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002648 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002649 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002650}
2651
Arjan van de Ven9a321442007-02-12 00:55:35 -08002652static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002653 .owner = THIS_MODULE,
2654 .open = fib_route_seq_open,
2655 .read = seq_read,
2656 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002657 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002658};
2659
Denis V. Lunev61a02652008-01-10 03:21:09 -08002660int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002661{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002662 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002663 goto out1;
2664
Denis V. Lunev61a02652008-01-10 03:21:09 -08002665 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2666 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002667 goto out2;
2668
Denis V. Lunev61a02652008-01-10 03:21:09 -08002669 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002670 goto out3;
2671
Robert Olsson19baf832005-06-21 12:43:18 -07002672 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002673
2674out3:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002675 proc_net_remove(net, "fib_triestat");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002676out2:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002677 proc_net_remove(net, "fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002678out1:
2679 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002680}
2681
Denis V. Lunev61a02652008-01-10 03:21:09 -08002682void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002683{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002684 proc_net_remove(net, "fib_trie");
2685 proc_net_remove(net, "fib_triestat");
2686 proc_net_remove(net, "route");
Robert Olsson19baf832005-06-21 12:43:18 -07002687}
2688
2689#endif /* CONFIG_PROC_FS */