blob: c331c433acf2f1fe3374f0c001c800be57b50903 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080044 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070051 */
52
Robert Olsson550e29b2006-04-04 12:53:35 -070053#define VERSION "0.407"
Robert Olsson19baf832005-06-21 12:43:18 -070054
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <asm/uaccess.h>
56#include <asm/system.h>
57#include <asm/bitops.h>
58#include <linux/types.h>
59#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070060#include <linux/mm.h>
61#include <linux/string.h>
62#include <linux/socket.h>
63#include <linux/sockios.h>
64#include <linux/errno.h>
65#include <linux/in.h>
66#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080067#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070068#include <linux/netdevice.h>
69#include <linux/if_arp.h>
70#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070071#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070072#include <linux/skbuff.h>
73#include <linux/netlink.h>
74#include <linux/init.h>
75#include <linux/list.h>
76#include <net/ip.h>
77#include <net/protocol.h>
78#include <net/route.h>
79#include <net/tcp.h>
80#include <net/sock.h>
81#include <net/ip_fib.h>
82#include "fib_lookup.h"
83
84#undef CONFIG_IP_FIB_TRIE_STATS
Robert Olsson06ef9212006-03-20 21:35:01 -080085#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087#define KEYLENGTH (8*sizeof(t_key))
88#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
89#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
90
Robert Olsson19baf832005-06-21 12:43:18 -070091typedef unsigned int t_key;
92
93#define T_TNODE 0
94#define T_LEAF 1
95#define NODE_TYPE_MASK 0x1UL
Olof Johansson91b9a272005-08-09 20:24:39 -070096#define NODE_PARENT(node) \
Robert Olsson2373ce12005-08-25 13:01:29 -070097 ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
98
99#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
100
101#define NODE_SET_PARENT(node, ptr) \
102 rcu_assign_pointer((node)->parent, \
103 ((unsigned long)(ptr)) | NODE_TYPE(node))
Robert Olsson19baf832005-06-21 12:43:18 -0700104
Olof Johansson91b9a272005-08-09 20:24:39 -0700105#define IS_TNODE(n) (!(n->parent & T_LEAF))
106#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -0700107
108struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -0700109 t_key key;
110 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700111};
112
113struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700114 t_key key;
115 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700116 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700117 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700118};
119
120struct leaf_info {
121 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700122 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700123 int plen;
124 struct list_head falh;
125};
126
127struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700128 t_key key;
129 unsigned long parent;
130 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
131 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
132 unsigned short full_children; /* KEYLENGTH bits needed */
133 unsigned short empty_children; /* KEYLENGTH bits needed */
Robert Olsson2373ce12005-08-25 13:01:29 -0700134 struct rcu_head rcu;
Olof Johansson91b9a272005-08-09 20:24:39 -0700135 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700136};
137
138#ifdef CONFIG_IP_FIB_TRIE_STATS
139struct trie_use_stats {
140 unsigned int gets;
141 unsigned int backtrack;
142 unsigned int semantic_match_passed;
143 unsigned int semantic_match_miss;
144 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700145 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700146};
147#endif
148
149struct trie_stat {
150 unsigned int totdepth;
151 unsigned int maxdepth;
152 unsigned int tnodes;
153 unsigned int leaves;
154 unsigned int nullpointers;
Robert Olsson06ef9212006-03-20 21:35:01 -0800155 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700156};
Robert Olsson19baf832005-06-21 12:43:18 -0700157
158struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700159 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700160#ifdef CONFIG_IP_FIB_TRIE_STATS
161 struct trie_use_stats stats;
162#endif
Olof Johansson91b9a272005-08-09 20:24:39 -0700163 int size;
Robert Olsson19baf832005-06-21 12:43:18 -0700164 unsigned int revision;
165};
166
Robert Olsson19baf832005-06-21 12:43:18 -0700167static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
168static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700169static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700170static struct tnode *inflate(struct trie *t, struct tnode *tn);
171static struct tnode *halve(struct trie *t, struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700172static void tnode_free(struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700173
Christoph Lametere18b8902006-12-06 20:33:20 -0800174static struct kmem_cache *fn_alias_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700175static struct trie *trie_local = NULL, *trie_main = NULL;
176
Robert Olsson2373ce12005-08-25 13:01:29 -0700177
178/* rcu_read_lock needs to be hold by caller from readside */
179
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700180static inline struct node *tnode_get_child(struct tnode *tn, int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700181{
Olof Johansson91b9a272005-08-09 20:24:39 -0700182 BUG_ON(i >= 1 << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700183
Robert Olsson2373ce12005-08-25 13:01:29 -0700184 return rcu_dereference(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700185}
186
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700187static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700188{
Olof Johansson91b9a272005-08-09 20:24:39 -0700189 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700190}
191
Robert Olsson19baf832005-06-21 12:43:18 -0700192static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
193{
Olof Johansson91b9a272005-08-09 20:24:39 -0700194 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700195 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700196 else
Robert Olsson19baf832005-06-21 12:43:18 -0700197 return 0;
198}
199
200static inline int tkey_equals(t_key a, t_key b)
201{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700202 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700203}
204
205static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
206{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700207 if (bits == 0 || offset >= KEYLENGTH)
208 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700209 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
210 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700211}
Robert Olsson19baf832005-06-21 12:43:18 -0700212
213static inline int tkey_mismatch(t_key a, int offset, t_key b)
214{
215 t_key diff = a ^ b;
216 int i = offset;
217
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700218 if (!diff)
219 return 0;
220 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700221 i++;
222 return i;
223}
224
Robert Olsson19baf832005-06-21 12:43:18 -0700225/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900226 To understand this stuff, an understanding of keys and all their bits is
227 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700228 all of the bits in that key are significant.
229
230 Consider a node 'n' and its parent 'tp'.
231
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900232 If n is a leaf, every bit in its key is significant. Its presence is
233 necessitated by path compression, since during a tree traversal (when
234 searching for a leaf - unless we are doing an insertion) we will completely
235 ignore all skipped bits we encounter. Thus we need to verify, at the end of
236 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700237 correct key path.
238
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900239 Note that we can never "miss" the correct key in the tree if present by
240 following the wrong path. Path compression ensures that segments of the key
241 that are the same for all keys with a given prefix are skipped, but the
242 skipped part *is* identical for each node in the subtrie below the skipped
243 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700244 call to tkey_sub_equals() in trie_insert().
245
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900246 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700247 have many different meanings.
248
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900249 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700250 _________________________________________________________________
251 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
252 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900253 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700254
255 _________________________________________________________________
256 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
257 -----------------------------------------------------------------
258 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
259
260 tp->pos = 7
261 tp->bits = 3
262 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700263 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700264
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900265 First, let's just ignore the bits that come before the parent tp, that is
266 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700267 not use them for anything.
268
269 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900270 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700271 'n' among tp's children.
272
273 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
274 for the node n.
275
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900276 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700277 of the bits are really not needed or indeed known in n->key.
278
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900279 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700280 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700282
Robert Olsson19baf832005-06-21 12:43:18 -0700283 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
284 at this point.
285
286*/
287
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700288static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700289{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700290 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700291}
292
293static int halve_threshold = 25;
294static int inflate_threshold = 50;
Robert Olssone6308be2005-10-04 13:01:58 -0700295static int halve_threshold_root = 15;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900296static int inflate_threshold_root = 25;
Robert Olsson19baf832005-06-21 12:43:18 -0700297
Robert Olsson2373ce12005-08-25 13:01:29 -0700298
299static void __alias_free_mem(struct rcu_head *head)
300{
301 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
302 kmem_cache_free(fn_alias_kmem, fa);
303}
304
305static inline void alias_free_mem_rcu(struct fib_alias *fa)
306{
307 call_rcu(&fa->rcu, __alias_free_mem);
308}
309
310static void __leaf_free_rcu(struct rcu_head *head)
311{
312 kfree(container_of(head, struct leaf, rcu));
313}
314
Robert Olsson2373ce12005-08-25 13:01:29 -0700315static void __leaf_info_free_rcu(struct rcu_head *head)
316{
317 kfree(container_of(head, struct leaf_info, rcu));
318}
319
320static inline void free_leaf_info(struct leaf_info *leaf)
321{
322 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
323}
324
325static struct tnode *tnode_alloc(unsigned int size)
326{
327 struct page *pages;
328
329 if (size <= PAGE_SIZE)
330 return kcalloc(size, 1, GFP_KERNEL);
331
332 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
333 if (!pages)
334 return NULL;
335
336 return page_address(pages);
337}
338
339static void __tnode_free_rcu(struct rcu_head *head)
340{
341 struct tnode *tn = container_of(head, struct tnode, rcu);
342 unsigned int size = sizeof(struct tnode) +
343 (1 << tn->bits) * sizeof(struct node *);
344
345 if (size <= PAGE_SIZE)
346 kfree(tn);
347 else
348 free_pages((unsigned long)tn, get_order(size));
349}
350
351static inline void tnode_free(struct tnode *tn)
352{
Stephen Hemminger132adf52007-03-08 20:44:43 -0800353 if (IS_LEAF(tn)) {
Robert Olsson550e29b2006-04-04 12:53:35 -0700354 struct leaf *l = (struct leaf *) tn;
355 call_rcu_bh(&l->rcu, __leaf_free_rcu);
Stephen Hemminger132adf52007-03-08 20:44:43 -0800356 } else
Robert Olsson550e29b2006-04-04 12:53:35 -0700357 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700358}
359
Robert Olsson19baf832005-06-21 12:43:18 -0700360static struct leaf *leaf_new(void)
361{
362 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700363 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700364 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700365 INIT_HLIST_HEAD(&l->list);
366 }
367 return l;
368}
369
370static struct leaf_info *leaf_info_new(int plen)
371{
372 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700373 if (li) {
374 li->plen = plen;
375 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700376 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700377 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700378}
379
Robert Olsson19baf832005-06-21 12:43:18 -0700380static struct tnode* tnode_new(t_key key, int pos, int bits)
381{
382 int nchildren = 1<<bits;
383 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700384 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700385
Olof Johansson91b9a272005-08-09 20:24:39 -0700386 if (tn) {
Robert Olsson19baf832005-06-21 12:43:18 -0700387 memset(tn, 0, sz);
Robert Olsson2373ce12005-08-25 13:01:29 -0700388 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700389 tn->pos = pos;
390 tn->bits = bits;
391 tn->key = key;
392 tn->full_children = 0;
393 tn->empty_children = 1<<bits;
394 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700395
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700396 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
397 (unsigned int) (sizeof(struct node) * 1<<bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700398 return tn;
399}
400
Robert Olsson19baf832005-06-21 12:43:18 -0700401/*
402 * Check whether a tnode 'n' is "full", i.e. it is an internal node
403 * and no bits are skipped. See discussion in dyntree paper p. 6
404 */
405
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700406static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700407{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700408 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700409 return 0;
410
411 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
412}
413
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700414static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700415{
416 tnode_put_child_reorg(tn, i, n, -1);
417}
418
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700419 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700420 * Add a child at position i overwriting the old value.
421 * Update the value of full_children and empty_children.
422 */
423
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700424static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700425{
Robert Olsson2373ce12005-08-25 13:01:29 -0700426 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700427 int isfull;
428
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700429 BUG_ON(i >= 1<<tn->bits);
430
Robert Olsson19baf832005-06-21 12:43:18 -0700431
432 /* update emptyChildren */
433 if (n == NULL && chi != NULL)
434 tn->empty_children++;
435 else if (n != NULL && chi == NULL)
436 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700437
Robert Olsson19baf832005-06-21 12:43:18 -0700438 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700439 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700440 wasfull = tnode_full(tn, chi);
441
442 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700443 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700444 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700445 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700446 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700447
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700448 if (n)
449 NODE_SET_PARENT(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700450
Robert Olsson2373ce12005-08-25 13:01:29 -0700451 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700452}
453
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700454static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700455{
456 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700457 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700458 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700459 int inflate_threshold_use;
460 int halve_threshold_use;
Robert Olsson19baf832005-06-21 12:43:18 -0700461
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900462 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700463 return NULL;
464
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700465 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
466 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700467
468 /* No children */
469 if (tn->empty_children == tnode_child_length(tn)) {
470 tnode_free(tn);
471 return NULL;
472 }
473 /* One child */
474 if (tn->empty_children == tnode_child_length(tn) - 1)
475 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700476 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700477
Olof Johansson91b9a272005-08-09 20:24:39 -0700478 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700479 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700480 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700481
482 /* compress one level */
Robert Olsson2373ce12005-08-25 13:01:29 -0700483 NODE_SET_PARENT(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700484 tnode_free(tn);
485 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700486 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700487 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700488 * Double as long as the resulting node has a number of
489 * nonempty nodes that are above the threshold.
490 */
491
492 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700493 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
494 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700495 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700496 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700497 * children in the *doubled* node is at least 'high'."
498 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700499 * 'high' in this instance is the variable 'inflate_threshold'. It
500 * is expressed as a percentage, so we multiply it with
501 * tnode_child_length() and instead of multiplying by 2 (since the
502 * child array will be doubled by inflate()) and multiplying
503 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700504 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700505 *
506 * The left-hand side may look a bit weird: tnode_child_length(tn)
507 * - tn->empty_children is of course the number of non-null children
508 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700509 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700510 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700511 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700512 *
Robert Olsson19baf832005-06-21 12:43:18 -0700513 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700514 *
Robert Olsson19baf832005-06-21 12:43:18 -0700515 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700516 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700517 * tn->full_children;
518 *
519 * new_child_length = tnode_child_length(tn) * 2;
520 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700521 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700522 * new_child_length;
523 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700524 *
525 * ...and so on, tho it would mess up the while () loop.
526 *
Robert Olsson19baf832005-06-21 12:43:18 -0700527 * anyway,
528 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
529 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700530 *
Robert Olsson19baf832005-06-21 12:43:18 -0700531 * avoid a division:
532 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
533 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700534 *
Robert Olsson19baf832005-06-21 12:43:18 -0700535 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700537 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700538 *
Robert Olsson19baf832005-06-21 12:43:18 -0700539 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700540 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700541 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700542 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700543 *
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700545 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700546 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700547 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700548 *
Robert Olsson19baf832005-06-21 12:43:18 -0700549 */
550
551 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700552
Robert Olssone6308be2005-10-04 13:01:58 -0700553 /* Keep root node larger */
554
Stephen Hemminger132adf52007-03-08 20:44:43 -0800555 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700556 inflate_threshold_use = inflate_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900557 else
Robert Olssone6308be2005-10-04 13:01:58 -0700558 inflate_threshold_use = inflate_threshold;
559
Robert Olsson2f368952005-07-05 15:02:40 -0700560 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700561 while ((tn->full_children > 0 &&
562 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
Robert Olssone6308be2005-10-04 13:01:58 -0700563 inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700564
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700565 old_tn = tn;
566 tn = inflate(t, tn);
567 if (IS_ERR(tn)) {
568 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700569#ifdef CONFIG_IP_FIB_TRIE_STATS
570 t->stats.resize_node_skipped++;
571#endif
572 break;
573 }
Robert Olsson19baf832005-06-21 12:43:18 -0700574 }
575
576 check_tnode(tn);
577
578 /*
579 * Halve as long as the number of empty children in this
580 * node is above threshold.
581 */
Robert Olsson2f368952005-07-05 15:02:40 -0700582
Robert Olssone6308be2005-10-04 13:01:58 -0700583
584 /* Keep root node larger */
585
Stephen Hemminger132adf52007-03-08 20:44:43 -0800586 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700587 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900588 else
Robert Olssone6308be2005-10-04 13:01:58 -0700589 halve_threshold_use = halve_threshold;
590
Robert Olsson2f368952005-07-05 15:02:40 -0700591 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700592 while (tn->bits > 1 &&
593 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700594 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700595
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700596 old_tn = tn;
597 tn = halve(t, tn);
598 if (IS_ERR(tn)) {
599 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700600#ifdef CONFIG_IP_FIB_TRIE_STATS
601 t->stats.resize_node_skipped++;
602#endif
603 break;
604 }
605 }
606
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700607
Robert Olsson19baf832005-06-21 12:43:18 -0700608 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700609 if (tn->empty_children == tnode_child_length(tn) - 1)
610 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700611 struct node *n;
612
Olof Johansson91b9a272005-08-09 20:24:39 -0700613 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700614 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700615 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700616
617 /* compress one level */
618
Robert Olsson2373ce12005-08-25 13:01:29 -0700619 NODE_SET_PARENT(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700620 tnode_free(tn);
621 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700622 }
623
624 return (struct node *) tn;
625}
626
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700627static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700628{
629 struct tnode *inode;
630 struct tnode *oldtnode = tn;
631 int olen = tnode_child_length(tn);
632 int i;
633
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700634 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700635
636 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
637
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700638 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700639 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700640
641 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700642 * Preallocate and store tnodes before the actual work so we
643 * don't get into an inconsistent state if memory allocation
644 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700645 * of tnode is ignored.
646 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700647
648 for (i = 0; i < olen; i++) {
Robert Olsson2f368952005-07-05 15:02:40 -0700649 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
650
651 if (inode &&
652 IS_TNODE(inode) &&
653 inode->pos == oldtnode->pos + oldtnode->bits &&
654 inode->bits > 1) {
655 struct tnode *left, *right;
Robert Olsson2f368952005-07-05 15:02:40 -0700656 t_key m = TKEY_GET_MASK(inode->pos, 1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700657
Robert Olsson2f368952005-07-05 15:02:40 -0700658 left = tnode_new(inode->key&(~m), inode->pos + 1,
659 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700660 if (!left)
661 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700662
Robert Olsson2f368952005-07-05 15:02:40 -0700663 right = tnode_new(inode->key|m, inode->pos + 1,
664 inode->bits - 1);
665
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900666 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700667 tnode_free(left);
668 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900669 }
Robert Olsson2f368952005-07-05 15:02:40 -0700670
671 put_child(t, tn, 2*i, (struct node *) left);
672 put_child(t, tn, 2*i+1, (struct node *) right);
673 }
674 }
675
Olof Johansson91b9a272005-08-09 20:24:39 -0700676 for (i = 0; i < olen; i++) {
Robert Olsson19baf832005-06-21 12:43:18 -0700677 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700678 struct tnode *left, *right;
679 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700680
Robert Olsson19baf832005-06-21 12:43:18 -0700681 /* An empty child */
682 if (node == NULL)
683 continue;
684
685 /* A leaf or an internal node with skipped bits */
686
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700687 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700688 tn->pos + tn->bits - 1) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700689 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
Robert Olsson19baf832005-06-21 12:43:18 -0700690 1) == 0)
691 put_child(t, tn, 2*i, node);
692 else
693 put_child(t, tn, 2*i+1, node);
694 continue;
695 }
696
697 /* An internal node with two children */
698 inode = (struct tnode *) node;
699
700 if (inode->bits == 1) {
701 put_child(t, tn, 2*i, inode->child[0]);
702 put_child(t, tn, 2*i+1, inode->child[1]);
703
704 tnode_free(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700705 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700706 }
707
Olof Johansson91b9a272005-08-09 20:24:39 -0700708 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700709
Olof Johansson91b9a272005-08-09 20:24:39 -0700710 /* We will replace this node 'inode' with two new
711 * ones, 'left' and 'right', each with half of the
712 * original children. The two new nodes will have
713 * a position one bit further down the key and this
714 * means that the "significant" part of their keys
715 * (see the discussion near the top of this file)
716 * will differ by one bit, which will be "0" in
717 * left's key and "1" in right's key. Since we are
718 * moving the key position by one step, the bit that
719 * we are moving away from - the bit at position
720 * (inode->pos) - is the one that will differ between
721 * left and right. So... we synthesize that bit in the
722 * two new keys.
723 * The mask 'm' below will be a single "one" bit at
724 * the position (inode->pos)
725 */
Robert Olsson19baf832005-06-21 12:43:18 -0700726
Olof Johansson91b9a272005-08-09 20:24:39 -0700727 /* Use the old key, but set the new significant
728 * bit to zero.
729 */
Robert Olsson19baf832005-06-21 12:43:18 -0700730
Olof Johansson91b9a272005-08-09 20:24:39 -0700731 left = (struct tnode *) tnode_get_child(tn, 2*i);
732 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700733
Olof Johansson91b9a272005-08-09 20:24:39 -0700734 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700735
Olof Johansson91b9a272005-08-09 20:24:39 -0700736 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
737 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700738
Olof Johansson91b9a272005-08-09 20:24:39 -0700739 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700740
Olof Johansson91b9a272005-08-09 20:24:39 -0700741 size = tnode_child_length(left);
742 for (j = 0; j < size; j++) {
743 put_child(t, left, j, inode->child[j]);
744 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700745 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700746 put_child(t, tn, 2*i, resize(t, left));
747 put_child(t, tn, 2*i+1, resize(t, right));
748
749 tnode_free(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700750 }
751 tnode_free(oldtnode);
752 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700753nomem:
754 {
755 int size = tnode_child_length(tn);
756 int j;
757
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700758 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700759 if (tn->child[j])
760 tnode_free((struct tnode *)tn->child[j]);
761
762 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700763
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700764 return ERR_PTR(-ENOMEM);
765 }
Robert Olsson19baf832005-06-21 12:43:18 -0700766}
767
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700768static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700769{
770 struct tnode *oldtnode = tn;
771 struct node *left, *right;
772 int i;
773 int olen = tnode_child_length(tn);
774
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700775 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700776
777 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700778
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700779 if (!tn)
780 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700781
782 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700783 * Preallocate and store tnodes before the actual work so we
784 * don't get into an inconsistent state if memory allocation
785 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700786 * of tnode is ignored.
787 */
788
Olof Johansson91b9a272005-08-09 20:24:39 -0700789 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700790 left = tnode_get_child(oldtnode, i);
791 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700792
Robert Olsson2f368952005-07-05 15:02:40 -0700793 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700794 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700795 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700796
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700797 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700798
799 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700800 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700801
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700802 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700803 }
Robert Olsson2f368952005-07-05 15:02:40 -0700804
Robert Olsson2f368952005-07-05 15:02:40 -0700805 }
Robert Olsson19baf832005-06-21 12:43:18 -0700806
Olof Johansson91b9a272005-08-09 20:24:39 -0700807 for (i = 0; i < olen; i += 2) {
808 struct tnode *newBinNode;
809
Robert Olsson19baf832005-06-21 12:43:18 -0700810 left = tnode_get_child(oldtnode, i);
811 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700812
Robert Olsson19baf832005-06-21 12:43:18 -0700813 /* At least one of the children is empty */
814 if (left == NULL) {
815 if (right == NULL) /* Both are empty */
816 continue;
817 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700818 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700819 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700820
821 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700822 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700823 continue;
824 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700825
Robert Olsson19baf832005-06-21 12:43:18 -0700826 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700827 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
828 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700829 put_child(t, newBinNode, 0, left);
830 put_child(t, newBinNode, 1, right);
831 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700832 }
833 tnode_free(oldtnode);
834 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700835nomem:
836 {
837 int size = tnode_child_length(tn);
838 int j;
839
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700840 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700841 if (tn->child[j])
842 tnode_free((struct tnode *)tn->child[j]);
843
844 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700845
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700846 return ERR_PTR(-ENOMEM);
847 }
Robert Olsson19baf832005-06-21 12:43:18 -0700848}
849
Olof Johansson91b9a272005-08-09 20:24:39 -0700850static void trie_init(struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -0700851{
Olof Johansson91b9a272005-08-09 20:24:39 -0700852 if (!t)
853 return;
854
855 t->size = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700856 rcu_assign_pointer(t->trie, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700857 t->revision = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700858#ifdef CONFIG_IP_FIB_TRIE_STATS
Olof Johansson91b9a272005-08-09 20:24:39 -0700859 memset(&t->stats, 0, sizeof(struct trie_use_stats));
Robert Olsson19baf832005-06-21 12:43:18 -0700860#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700861}
862
Robert Olsson772cb712005-09-19 15:31:18 -0700863/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700864 via get_fa_head and dump */
865
Robert Olsson772cb712005-09-19 15:31:18 -0700866static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700867{
Robert Olsson772cb712005-09-19 15:31:18 -0700868 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700869 struct hlist_node *node;
870 struct leaf_info *li;
871
Robert Olsson2373ce12005-08-25 13:01:29 -0700872 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700873 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700874 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700875
Robert Olsson19baf832005-06-21 12:43:18 -0700876 return NULL;
877}
878
879static inline struct list_head * get_fa_head(struct leaf *l, int plen)
880{
Robert Olsson772cb712005-09-19 15:31:18 -0700881 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700882
Olof Johansson91b9a272005-08-09 20:24:39 -0700883 if (!li)
884 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700885
Olof Johansson91b9a272005-08-09 20:24:39 -0700886 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700887}
888
889static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
890{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900891 struct leaf_info *li = NULL, *last = NULL;
892 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700893
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900894 if (hlist_empty(head)) {
895 hlist_add_head_rcu(&new->hlist, head);
896 } else {
897 hlist_for_each_entry(li, node, head, hlist) {
898 if (new->plen > li->plen)
899 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700900
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900901 last = li;
902 }
903 if (last)
904 hlist_add_after_rcu(&last->hlist, &new->hlist);
905 else
906 hlist_add_before_rcu(&new->hlist, &li->hlist);
907 }
Robert Olsson19baf832005-06-21 12:43:18 -0700908}
909
Robert Olsson2373ce12005-08-25 13:01:29 -0700910/* rcu_read_lock needs to be hold by caller from readside */
911
Robert Olsson19baf832005-06-21 12:43:18 -0700912static struct leaf *
913fib_find_node(struct trie *t, u32 key)
914{
915 int pos;
916 struct tnode *tn;
917 struct node *n;
918
919 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700920 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700921
922 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
923 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700924
Robert Olsson19baf832005-06-21 12:43:18 -0700925 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700926
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700927 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700928 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700929 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700930 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700931 break;
932 }
933 /* Case we have found a leaf. Compare prefixes */
934
Olof Johansson91b9a272005-08-09 20:24:39 -0700935 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
936 return (struct leaf *)n;
937
Robert Olsson19baf832005-06-21 12:43:18 -0700938 return NULL;
939}
940
941static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
942{
Robert Olsson19baf832005-06-21 12:43:18 -0700943 int wasfull;
944 t_key cindex, key;
945 struct tnode *tp = NULL;
946
Robert Olsson19baf832005-06-21 12:43:18 -0700947 key = tn->key;
Robert Olsson19baf832005-06-21 12:43:18 -0700948
949 while (tn != NULL && NODE_PARENT(tn) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700950
951 tp = NODE_PARENT(tn);
952 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
953 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
954 tn = (struct tnode *) resize (t, (struct tnode *)tn);
955 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700956
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700957 if (!NODE_PARENT(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700958 break;
959
960 tn = NODE_PARENT(tn);
961 }
962 /* Handle last (top) tnode */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700963 if (IS_TNODE(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700964 tn = (struct tnode*) resize(t, (struct tnode *)tn);
965
966 return (struct node*) tn;
967}
968
Robert Olsson2373ce12005-08-25 13:01:29 -0700969/* only used from updater-side */
970
Robert Olssonf835e472005-06-28 15:00:39 -0700971static struct list_head *
972fib_insert_node(struct trie *t, int *err, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700973{
974 int pos, newpos;
975 struct tnode *tp = NULL, *tn = NULL;
976 struct node *n;
977 struct leaf *l;
978 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700979 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700980 struct leaf_info *li;
981 t_key cindex;
982
983 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700984 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700985
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700986 /* If we point to NULL, stop. Either the tree is empty and we should
987 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -0700988 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700989 * If we point to a T_TNODE, check if it matches our key. Note that
990 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -0700991 * not be the parent's 'pos'+'bits'!
992 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700993 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -0700994 * the index from our key, push the T_TNODE and walk the tree.
995 *
996 * If it doesn't, we have to replace it with a new T_TNODE.
997 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700998 * If we point to a T_LEAF, it might or might not have the same key
999 * as we do. If it does, just change the value, update the T_LEAF's
1000 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001001 * If it doesn't, we need to replace it with a T_TNODE.
1002 */
1003
1004 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1005 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001006
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001007 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001008
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001009 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001010 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001011 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001012 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1013
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001014 BUG_ON(n && NODE_PARENT(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001015 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001016 break;
1017 }
1018
1019 /*
1020 * n ----> NULL, LEAF or TNODE
1021 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001022 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001023 */
1024
Olof Johansson91b9a272005-08-09 20:24:39 -07001025 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001026
1027 /* Case 1: n is a leaf. Compare prefixes */
1028
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001029 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001030 struct leaf *l = (struct leaf *) n;
1031
Robert Olsson19baf832005-06-21 12:43:18 -07001032 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001033
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001034 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001035 *err = -ENOMEM;
1036 goto err;
1037 }
Robert Olsson19baf832005-06-21 12:43:18 -07001038
1039 fa_head = &li->falh;
1040 insert_leaf_info(&l->list, li);
1041 goto done;
1042 }
1043 t->size++;
1044 l = leaf_new();
1045
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001046 if (!l) {
Robert Olssonf835e472005-06-28 15:00:39 -07001047 *err = -ENOMEM;
1048 goto err;
1049 }
Robert Olsson19baf832005-06-21 12:43:18 -07001050
1051 l->key = key;
1052 li = leaf_info_new(plen);
1053
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001054 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001055 tnode_free((struct tnode *) l);
1056 *err = -ENOMEM;
1057 goto err;
1058 }
Robert Olsson19baf832005-06-21 12:43:18 -07001059
1060 fa_head = &li->falh;
1061 insert_leaf_info(&l->list, li);
1062
Robert Olsson19baf832005-06-21 12:43:18 -07001063 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001064 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001065
1066 NODE_SET_PARENT(l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001067
Olof Johansson91b9a272005-08-09 20:24:39 -07001068 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1069 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1070 } else {
1071 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001072 /*
1073 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001074 * first tnode need some special handling
1075 */
1076
1077 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001078 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001079 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001080 pos = 0;
1081
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001082 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001083 newpos = tkey_mismatch(key, pos, n->key);
1084 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001085 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001086 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001087 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001088 }
Robert Olsson19baf832005-06-21 12:43:18 -07001089
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001090 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001091 free_leaf_info(li);
1092 tnode_free((struct tnode *) l);
1093 *err = -ENOMEM;
1094 goto err;
Olof Johansson91b9a272005-08-09 20:24:39 -07001095 }
1096
Robert Olsson19baf832005-06-21 12:43:18 -07001097 NODE_SET_PARENT(tn, tp);
1098
Olof Johansson91b9a272005-08-09 20:24:39 -07001099 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001100 put_child(t, tn, missbit, (struct node *)l);
1101 put_child(t, tn, 1-missbit, n);
1102
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001103 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001104 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1105 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001106 } else {
Robert Olsson2373ce12005-08-25 13:01:29 -07001107 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001108 tp = tn;
1109 }
1110 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001111
1112 if (tp && tp->pos + tp->bits > 32)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001113 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
Robert Olsson19baf832005-06-21 12:43:18 -07001114 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001115
Robert Olsson19baf832005-06-21 12:43:18 -07001116 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001117
1118 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Robert Olssonf835e472005-06-28 15:00:39 -07001119done:
1120 t->revision++;
Olof Johansson91b9a272005-08-09 20:24:39 -07001121err:
Robert Olsson19baf832005-06-21 12:43:18 -07001122 return fa_head;
1123}
1124
Robert Olssond562f1f2007-03-26 14:22:22 -07001125/*
1126 * Caller must hold RTNL.
1127 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001128static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001129{
1130 struct trie *t = (struct trie *) tb->tb_data;
1131 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001132 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001133 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001134 int plen = cfg->fc_dst_len;
1135 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001136 u32 key, mask;
1137 int err;
1138 struct leaf *l;
1139
1140 if (plen > 32)
1141 return -EINVAL;
1142
Thomas Graf4e902c52006-08-17 18:14:52 -07001143 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001144
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001145 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001146
Olof Johansson91b9a272005-08-09 20:24:39 -07001147 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001148
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001149 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001150 return -EINVAL;
1151
1152 key = key & mask;
1153
Thomas Graf4e902c52006-08-17 18:14:52 -07001154 fi = fib_create_info(cfg);
1155 if (IS_ERR(fi)) {
1156 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001157 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001158 }
Robert Olsson19baf832005-06-21 12:43:18 -07001159
1160 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001161 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001162
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001163 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001164 fa_head = get_fa_head(l, plen);
1165 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1166 }
1167
1168 /* Now fa, if non-NULL, points to the first fib alias
1169 * with the same keys [prefix,tos,priority], if such key already
1170 * exists or to the node before which we will insert new one.
1171 *
1172 * If fa is NULL, we will need to allocate a new one and
1173 * insert to the head of f.
1174 *
1175 * If f is NULL, no fib node matched the destination key
1176 * and we need to allocate a new one of those as well.
1177 */
1178
Olof Johansson91b9a272005-08-09 20:24:39 -07001179 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
Robert Olsson19baf832005-06-21 12:43:18 -07001180 struct fib_alias *fa_orig;
1181
1182 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001183 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001184 goto out;
1185
Thomas Graf4e902c52006-08-17 18:14:52 -07001186 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001187 struct fib_info *fi_drop;
1188 u8 state;
1189
Robert Olsson2373ce12005-08-25 13:01:29 -07001190 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001191 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001192 if (new_fa == NULL)
1193 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001194
1195 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001196 new_fa->fa_tos = fa->fa_tos;
1197 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001198 new_fa->fa_type = cfg->fc_type;
1199 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001200 state = fa->fa_state;
Robert Olsson2373ce12005-08-25 13:01:29 -07001201 new_fa->fa_state &= ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001202
Robert Olsson2373ce12005-08-25 13:01:29 -07001203 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1204 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001205
1206 fib_release_info(fi_drop);
1207 if (state & FA_S_ACCESSED)
Olof Johansson91b9a272005-08-09 20:24:39 -07001208 rt_cache_flush(-1);
Robert Olsson19baf832005-06-21 12:43:18 -07001209
Olof Johansson91b9a272005-08-09 20:24:39 -07001210 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001211 }
1212 /* Error if we find a perfect match which
1213 * uses the same scope, type, and nexthop
1214 * information.
1215 */
1216 fa_orig = fa;
1217 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1218 if (fa->fa_tos != tos)
1219 break;
1220 if (fa->fa_info->fib_priority != fi->fib_priority)
1221 break;
Thomas Graf4e902c52006-08-17 18:14:52 -07001222 if (fa->fa_type == cfg->fc_type &&
1223 fa->fa_scope == cfg->fc_scope &&
Robert Olsson19baf832005-06-21 12:43:18 -07001224 fa->fa_info == fi) {
1225 goto out;
1226 }
1227 }
Thomas Graf4e902c52006-08-17 18:14:52 -07001228 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Robert Olsson19baf832005-06-21 12:43:18 -07001229 fa = fa_orig;
1230 }
1231 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001232 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001233 goto out;
1234
1235 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001236 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001237 if (new_fa == NULL)
1238 goto out;
1239
1240 new_fa->fa_info = fi;
1241 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001242 new_fa->fa_type = cfg->fc_type;
1243 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001244 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001245 /*
1246 * Insert new entry to the list.
1247 */
1248
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001249 if (!fa_head) {
Robert Olssonf835e472005-06-28 15:00:39 -07001250 err = 0;
Herbert Xub47b2ec2006-07-12 13:29:56 -07001251 fa_head = fib_insert_node(t, &err, key, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001252 if (err)
Robert Olssonf835e472005-06-28 15:00:39 -07001253 goto out_free_new_fa;
1254 }
Robert Olsson19baf832005-06-21 12:43:18 -07001255
Robert Olsson2373ce12005-08-25 13:01:29 -07001256 list_add_tail_rcu(&new_fa->fa_list,
1257 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001258
1259 rt_cache_flush(-1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001260 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
1261 &cfg->fc_nlinfo);
Robert Olsson19baf832005-06-21 12:43:18 -07001262succeeded:
1263 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001264
1265out_free_new_fa:
1266 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001267out:
1268 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001269err:
Robert Olsson19baf832005-06-21 12:43:18 -07001270 return err;
1271}
1272
Robert Olsson2373ce12005-08-25 13:01:29 -07001273
Robert Olsson772cb712005-09-19 15:31:18 -07001274/* should be called with rcu_read_lock */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001275static inline int check_leaf(struct trie *t, struct leaf *l,
1276 t_key key, int *plen, const struct flowi *flp,
Patrick McHardy06c74272005-08-23 22:06:09 -07001277 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001278{
Patrick McHardy06c74272005-08-23 22:06:09 -07001279 int err, i;
Al Viro888454c2006-09-19 13:42:46 -07001280 __be32 mask;
Robert Olsson19baf832005-06-21 12:43:18 -07001281 struct leaf_info *li;
1282 struct hlist_head *hhead = &l->list;
1283 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001284
Robert Olsson2373ce12005-08-25 13:01:29 -07001285 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001286 i = li->plen;
Al Viro888454c2006-09-19 13:42:46 -07001287 mask = inet_make_mask(i);
1288 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001289 continue;
1290
Al Viro888454c2006-09-19 13:42:46 -07001291 if ((err = fib_semantic_match(&li->falh, flp, res, htonl(l->key), mask, i)) <= 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001292 *plen = i;
1293#ifdef CONFIG_IP_FIB_TRIE_STATS
1294 t->stats.semantic_match_passed++;
1295#endif
Patrick McHardy06c74272005-08-23 22:06:09 -07001296 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001297 }
1298#ifdef CONFIG_IP_FIB_TRIE_STATS
1299 t->stats.semantic_match_miss++;
1300#endif
1301 }
Patrick McHardy06c74272005-08-23 22:06:09 -07001302 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001303}
1304
1305static int
1306fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1307{
1308 struct trie *t = (struct trie *) tb->tb_data;
1309 int plen, ret = 0;
1310 struct node *n;
1311 struct tnode *pn;
1312 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001313 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001314 int chopped_off;
1315 t_key cindex = 0;
1316 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001317 struct tnode *cn;
1318 t_key node_prefix, key_prefix, pref_mismatch;
1319 int mp;
1320
Robert Olsson2373ce12005-08-25 13:01:29 -07001321 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001322
Robert Olsson2373ce12005-08-25 13:01:29 -07001323 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001324 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001325 goto failed;
1326
1327#ifdef CONFIG_IP_FIB_TRIE_STATS
1328 t->stats.gets++;
1329#endif
1330
1331 /* Just a leaf? */
1332 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001333 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001334 goto found;
1335 goto failed;
1336 }
1337 pn = (struct tnode *) n;
1338 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001339
Olof Johansson91b9a272005-08-09 20:24:39 -07001340 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001341 pos = pn->pos;
1342 bits = pn->bits;
1343
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001344 if (!chopped_off)
Robert Olsson19baf832005-06-21 12:43:18 -07001345 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1346
1347 n = tnode_get_child(pn, cindex);
1348
1349 if (n == NULL) {
1350#ifdef CONFIG_IP_FIB_TRIE_STATS
1351 t->stats.null_node_hit++;
1352#endif
1353 goto backtrace;
1354 }
1355
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001356 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001357 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001358 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001359 else
1360 goto backtrace;
1361 }
1362
1363#define HL_OPTIMIZE
1364#ifdef HL_OPTIMIZE
1365 cn = (struct tnode *)n;
1366
1367 /*
1368 * It's a tnode, and we can do some extra checks here if we
1369 * like, to avoid descending into a dead-end branch.
1370 * This tnode is in the parent's child array at index
1371 * key[p_pos..p_pos+p_bits] but potentially with some bits
1372 * chopped off, so in reality the index may be just a
1373 * subprefix, padded with zero at the end.
1374 * We can also take a look at any skipped bits in this
1375 * tnode - everything up to p_pos is supposed to be ok,
1376 * and the non-chopped bits of the index (se previous
1377 * paragraph) are also guaranteed ok, but the rest is
1378 * considered unknown.
1379 *
1380 * The skipped bits are key[pos+bits..cn->pos].
1381 */
1382
1383 /* If current_prefix_length < pos+bits, we are already doing
1384 * actual prefix matching, which means everything from
1385 * pos+(bits-chopped_off) onward must be zero along some
1386 * branch of this subtree - otherwise there is *no* valid
1387 * prefix present. Here we can only check the skipped
1388 * bits. Remember, since we have already indexed into the
1389 * parent's child array, we know that the bits we chopped of
1390 * *are* zero.
1391 */
1392
1393 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1394
1395 if (current_prefix_length < pos+bits) {
1396 if (tkey_extract_bits(cn->key, current_prefix_length,
1397 cn->pos - current_prefix_length) != 0 ||
1398 !(cn->child[0]))
1399 goto backtrace;
1400 }
1401
1402 /*
1403 * If chopped_off=0, the index is fully validated and we
1404 * only need to look at the skipped bits for this, the new,
1405 * tnode. What we actually want to do is to find out if
1406 * these skipped bits match our key perfectly, or if we will
1407 * have to count on finding a matching prefix further down,
1408 * because if we do, we would like to have some way of
1409 * verifying the existence of such a prefix at this point.
1410 */
1411
1412 /* The only thing we can do at this point is to verify that
1413 * any such matching prefix can indeed be a prefix to our
1414 * key, and if the bits in the node we are inspecting that
1415 * do not match our key are not ZERO, this cannot be true.
1416 * Thus, find out where there is a mismatch (before cn->pos)
1417 * and verify that all the mismatching bits are zero in the
1418 * new tnode's key.
1419 */
1420
1421 /* Note: We aren't very concerned about the piece of the key
1422 * that precede pn->pos+pn->bits, since these have already been
1423 * checked. The bits after cn->pos aren't checked since these are
1424 * by definition "unknown" at this point. Thus, what we want to
1425 * see is if we are about to enter the "prefix matching" state,
1426 * and in that case verify that the skipped bits that will prevail
1427 * throughout this subtree are zero, as they have to be if we are
1428 * to find a matching prefix.
1429 */
1430
1431 node_prefix = MASK_PFX(cn->key, cn->pos);
1432 key_prefix = MASK_PFX(key, cn->pos);
1433 pref_mismatch = key_prefix^node_prefix;
1434 mp = 0;
1435
1436 /* In short: If skipped bits in this node do not match the search
1437 * key, enter the "prefix matching" state.directly.
1438 */
1439 if (pref_mismatch) {
1440 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1441 mp++;
1442 pref_mismatch = pref_mismatch <<1;
1443 }
1444 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1445
1446 if (key_prefix != 0)
1447 goto backtrace;
1448
1449 if (current_prefix_length >= cn->pos)
1450 current_prefix_length = mp;
1451 }
1452#endif
1453 pn = (struct tnode *)n; /* Descend */
1454 chopped_off = 0;
1455 continue;
1456
Robert Olsson19baf832005-06-21 12:43:18 -07001457backtrace:
1458 chopped_off++;
1459
1460 /* As zero don't change the child key (cindex) */
Olof Johansson91b9a272005-08-09 20:24:39 -07001461 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001462 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001463
1464 /* Decrease current_... with bits chopped off */
1465 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1466 current_prefix_length = pn->pos + pn->bits - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001467
Robert Olsson19baf832005-06-21 12:43:18 -07001468 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001469 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001470 * chopped off all bits in this tnode walk up to our parent.
1471 */
1472
Olof Johansson91b9a272005-08-09 20:24:39 -07001473 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001474 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001475 } else {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001476 if (NODE_PARENT(pn) == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001477 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001478
Robert Olsson19baf832005-06-21 12:43:18 -07001479 /* Get Child's index */
1480 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1481 pn = NODE_PARENT(pn);
1482 chopped_off = 0;
1483
1484#ifdef CONFIG_IP_FIB_TRIE_STATS
1485 t->stats.backtrack++;
1486#endif
1487 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001488 }
Robert Olsson19baf832005-06-21 12:43:18 -07001489 }
1490failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001491 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001492found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001493 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001494 return ret;
1495}
1496
Robert Olsson2373ce12005-08-25 13:01:29 -07001497/* only called from updater side */
Robert Olsson19baf832005-06-21 12:43:18 -07001498static int trie_leaf_remove(struct trie *t, t_key key)
1499{
1500 t_key cindex;
1501 struct tnode *tp = NULL;
1502 struct node *n = t->trie;
1503 struct leaf *l;
1504
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001505 pr_debug("entering trie_leaf_remove(%p)\n", n);
Robert Olsson19baf832005-06-21 12:43:18 -07001506
1507 /* Note that in the case skipped bits, those bits are *not* checked!
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001508 * When we finish this, we will have NULL or a T_LEAF, and the
Robert Olsson19baf832005-06-21 12:43:18 -07001509 * T_LEAF may or may not match our key.
1510 */
1511
Olof Johansson91b9a272005-08-09 20:24:39 -07001512 while (n != NULL && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001513 struct tnode *tn = (struct tnode *) n;
1514 check_tnode(tn);
1515 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1516
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001517 BUG_ON(n && NODE_PARENT(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001518 }
Robert Olsson19baf832005-06-21 12:43:18 -07001519 l = (struct leaf *) n;
1520
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001521 if (!n || !tkey_equals(l->key, key))
Robert Olsson19baf832005-06-21 12:43:18 -07001522 return 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001523
1524 /*
1525 * Key found.
1526 * Remove the leaf and rebalance the tree
Robert Olsson19baf832005-06-21 12:43:18 -07001527 */
1528
1529 t->revision++;
1530 t->size--;
1531
1532 tp = NODE_PARENT(n);
1533 tnode_free((struct tnode *) n);
1534
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001535 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001536 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1537 put_child(t, (struct tnode *)tp, cindex, NULL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001538 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Olof Johansson91b9a272005-08-09 20:24:39 -07001539 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001540 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001541
1542 return 1;
1543}
1544
Robert Olssond562f1f2007-03-26 14:22:22 -07001545/*
1546 * Caller must hold RTNL.
1547 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001548static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001549{
1550 struct trie *t = (struct trie *) tb->tb_data;
1551 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001552 int plen = cfg->fc_dst_len;
1553 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001554 struct fib_alias *fa, *fa_to_delete;
1555 struct list_head *fa_head;
1556 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001557 struct leaf_info *li;
1558
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001559 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001560 return -EINVAL;
1561
Thomas Graf4e902c52006-08-17 18:14:52 -07001562 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001563 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001564
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001565 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001566 return -EINVAL;
1567
1568 key = key & mask;
1569 l = fib_find_node(t, key);
1570
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001571 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001572 return -ESRCH;
1573
1574 fa_head = get_fa_head(l, plen);
1575 fa = fib_find_alias(fa_head, tos, 0);
1576
1577 if (!fa)
1578 return -ESRCH;
1579
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001580 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001581
1582 fa_to_delete = NULL;
1583 fa_head = fa->fa_list.prev;
Robert Olsson2373ce12005-08-25 13:01:29 -07001584
Robert Olsson19baf832005-06-21 12:43:18 -07001585 list_for_each_entry(fa, fa_head, fa_list) {
1586 struct fib_info *fi = fa->fa_info;
1587
1588 if (fa->fa_tos != tos)
1589 break;
1590
Thomas Graf4e902c52006-08-17 18:14:52 -07001591 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1592 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1593 fa->fa_scope == cfg->fc_scope) &&
1594 (!cfg->fc_protocol ||
1595 fi->fib_protocol == cfg->fc_protocol) &&
1596 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001597 fa_to_delete = fa;
1598 break;
1599 }
1600 }
1601
Olof Johansson91b9a272005-08-09 20:24:39 -07001602 if (!fa_to_delete)
1603 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001604
Olof Johansson91b9a272005-08-09 20:24:39 -07001605 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001606 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
1607 &cfg->fc_nlinfo);
Robert Olsson19baf832005-06-21 12:43:18 -07001608
Olof Johansson91b9a272005-08-09 20:24:39 -07001609 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001610 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001611
Robert Olsson2373ce12005-08-25 13:01:29 -07001612 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001613
Olof Johansson91b9a272005-08-09 20:24:39 -07001614 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001615 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001616 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001617 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001618
1619 if (hlist_empty(&l->list))
1620 trie_leaf_remove(t, key);
1621
1622 if (fa->fa_state & FA_S_ACCESSED)
1623 rt_cache_flush(-1);
1624
Robert Olsson2373ce12005-08-25 13:01:29 -07001625 fib_release_info(fa->fa_info);
1626 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001627 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001628}
1629
1630static int trie_flush_list(struct trie *t, struct list_head *head)
1631{
1632 struct fib_alias *fa, *fa_node;
1633 int found = 0;
1634
1635 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1636 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001637
Robert Olsson2373ce12005-08-25 13:01:29 -07001638 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1639 list_del_rcu(&fa->fa_list);
1640 fib_release_info(fa->fa_info);
1641 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001642 found++;
1643 }
1644 }
1645 return found;
1646}
1647
1648static int trie_flush_leaf(struct trie *t, struct leaf *l)
1649{
1650 int found = 0;
1651 struct hlist_head *lih = &l->list;
1652 struct hlist_node *node, *tmp;
1653 struct leaf_info *li = NULL;
1654
1655 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001656 found += trie_flush_list(t, &li->falh);
1657
1658 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001659 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001660 free_leaf_info(li);
1661 }
1662 }
1663 return found;
1664}
1665
Robert Olsson2373ce12005-08-25 13:01:29 -07001666/* rcu_read_lock needs to be hold by caller from readside */
1667
Robert Olsson19baf832005-06-21 12:43:18 -07001668static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1669{
1670 struct node *c = (struct node *) thisleaf;
1671 struct tnode *p;
1672 int idx;
Robert Olsson2373ce12005-08-25 13:01:29 -07001673 struct node *trie = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -07001674
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001675 if (c == NULL) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001676 if (trie == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001677 return NULL;
1678
Robert Olsson2373ce12005-08-25 13:01:29 -07001679 if (IS_LEAF(trie)) /* trie w. just a leaf */
1680 return (struct leaf *) trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001681
Robert Olsson2373ce12005-08-25 13:01:29 -07001682 p = (struct tnode*) trie; /* Start */
Olof Johansson91b9a272005-08-09 20:24:39 -07001683 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001684 p = (struct tnode *) NODE_PARENT(c);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001685
Robert Olsson19baf832005-06-21 12:43:18 -07001686 while (p) {
1687 int pos, last;
1688
1689 /* Find the next child of the parent */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001690 if (c)
1691 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1692 else
Robert Olsson19baf832005-06-21 12:43:18 -07001693 pos = 0;
1694
1695 last = 1 << p->bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001696 for (idx = pos; idx < last ; idx++) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001697 c = rcu_dereference(p->child[idx]);
1698
1699 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001700 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001701
Olof Johansson91b9a272005-08-09 20:24:39 -07001702 /* Decend if tnode */
Robert Olsson2373ce12005-08-25 13:01:29 -07001703 while (IS_TNODE(c)) {
1704 p = (struct tnode *) c;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001705 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001706
Olof Johansson91b9a272005-08-09 20:24:39 -07001707 /* Rightmost non-NULL branch */
1708 if (p && IS_TNODE(p))
Robert Olsson2373ce12005-08-25 13:01:29 -07001709 while (!(c = rcu_dereference(p->child[idx]))
1710 && idx < (1<<p->bits)) idx++;
Robert Olsson19baf832005-06-21 12:43:18 -07001711
Olof Johansson91b9a272005-08-09 20:24:39 -07001712 /* Done with this tnode? */
Robert Olsson2373ce12005-08-25 13:01:29 -07001713 if (idx >= (1 << p->bits) || !c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001714 goto up;
Robert Olsson19baf832005-06-21 12:43:18 -07001715 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001716 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001717 }
1718up:
1719 /* No more children go up one step */
Olof Johansson91b9a272005-08-09 20:24:39 -07001720 c = (struct node *) p;
Robert Olsson19baf832005-06-21 12:43:18 -07001721 p = (struct tnode *) NODE_PARENT(p);
1722 }
1723 return NULL; /* Ready. Root of trie */
1724}
1725
Robert Olssond562f1f2007-03-26 14:22:22 -07001726/*
1727 * Caller must hold RTNL.
1728 */
Robert Olsson19baf832005-06-21 12:43:18 -07001729static int fn_trie_flush(struct fib_table *tb)
1730{
1731 struct trie *t = (struct trie *) tb->tb_data;
1732 struct leaf *ll = NULL, *l = NULL;
1733 int found = 0, h;
1734
1735 t->revision++;
1736
Olof Johansson91b9a272005-08-09 20:24:39 -07001737 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001738 found += trie_flush_leaf(t, l);
1739
1740 if (ll && hlist_empty(&ll->list))
1741 trie_leaf_remove(t, ll->key);
1742 ll = l;
1743 }
1744
1745 if (ll && hlist_empty(&ll->list))
1746 trie_leaf_remove(t, ll->key);
1747
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001748 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001749 return found;
1750}
1751
Olof Johansson91b9a272005-08-09 20:24:39 -07001752static int trie_last_dflt = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001753
1754static void
1755fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1756{
1757 struct trie *t = (struct trie *) tb->tb_data;
1758 int order, last_idx;
1759 struct fib_info *fi = NULL;
1760 struct fib_info *last_resort;
1761 struct fib_alias *fa = NULL;
1762 struct list_head *fa_head;
1763 struct leaf *l;
1764
1765 last_idx = -1;
1766 last_resort = NULL;
1767 order = -1;
1768
Robert Olsson2373ce12005-08-25 13:01:29 -07001769 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001770
Robert Olsson19baf832005-06-21 12:43:18 -07001771 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001772 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001773 goto out;
1774
1775 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001776 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001777 goto out;
1778
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001779 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001780 goto out;
1781
Robert Olsson2373ce12005-08-25 13:01:29 -07001782 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001783 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001784
Robert Olsson19baf832005-06-21 12:43:18 -07001785 if (fa->fa_scope != res->scope ||
1786 fa->fa_type != RTN_UNICAST)
1787 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001788
Robert Olsson19baf832005-06-21 12:43:18 -07001789 if (next_fi->fib_priority > res->fi->fib_priority)
1790 break;
1791 if (!next_fi->fib_nh[0].nh_gw ||
1792 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1793 continue;
1794 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001795
Robert Olsson19baf832005-06-21 12:43:18 -07001796 if (fi == NULL) {
1797 if (next_fi != res->fi)
1798 break;
1799 } else if (!fib_detect_death(fi, order, &last_resort,
1800 &last_idx, &trie_last_dflt)) {
1801 if (res->fi)
1802 fib_info_put(res->fi);
1803 res->fi = fi;
1804 atomic_inc(&fi->fib_clntref);
1805 trie_last_dflt = order;
1806 goto out;
1807 }
1808 fi = next_fi;
1809 order++;
1810 }
1811 if (order <= 0 || fi == NULL) {
1812 trie_last_dflt = -1;
1813 goto out;
1814 }
1815
1816 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1817 if (res->fi)
1818 fib_info_put(res->fi);
1819 res->fi = fi;
1820 atomic_inc(&fi->fib_clntref);
1821 trie_last_dflt = order;
1822 goto out;
1823 }
1824 if (last_idx >= 0) {
1825 if (res->fi)
1826 fib_info_put(res->fi);
1827 res->fi = last_resort;
1828 if (last_resort)
1829 atomic_inc(&last_resort->fib_clntref);
1830 }
1831 trie_last_dflt = last_idx;
1832 out:;
Robert Olsson2373ce12005-08-25 13:01:29 -07001833 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001834}
1835
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001836static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001837 struct sk_buff *skb, struct netlink_callback *cb)
1838{
1839 int i, s_i;
1840 struct fib_alias *fa;
1841
Al Viro32ab5f82006-09-26 22:21:45 -07001842 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001843
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001844 s_i = cb->args[4];
Robert Olsson19baf832005-06-21 12:43:18 -07001845 i = 0;
1846
Robert Olsson2373ce12005-08-25 13:01:29 -07001847 /* rcu_read_lock is hold by caller */
1848
1849 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001850 if (i < s_i) {
1851 i++;
1852 continue;
1853 }
Stephen Hemminger78c66712005-09-21 00:15:39 -07001854 BUG_ON(!fa->fa_info);
Robert Olsson19baf832005-06-21 12:43:18 -07001855
1856 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1857 cb->nlh->nlmsg_seq,
1858 RTM_NEWROUTE,
1859 tb->tb_id,
1860 fa->fa_type,
1861 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001862 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001863 plen,
1864 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001865 fa->fa_info, 0) < 0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001866 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001867 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001868 }
Robert Olsson19baf832005-06-21 12:43:18 -07001869 i++;
1870 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001871 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001872 return skb->len;
1873}
1874
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001875static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
Robert Olsson19baf832005-06-21 12:43:18 -07001876 struct netlink_callback *cb)
1877{
1878 int h, s_h;
1879 struct list_head *fa_head;
1880 struct leaf *l = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001881
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001882 s_h = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001883
Olof Johansson91b9a272005-08-09 20:24:39 -07001884 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001885 if (h < s_h)
1886 continue;
1887 if (h > s_h)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001888 memset(&cb->args[4], 0,
1889 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001890
1891 fa_head = get_fa_head(l, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001892
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001893 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001894 continue;
1895
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001896 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001897 continue;
1898
1899 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001900 cb->args[3] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001901 return -1;
1902 }
1903 }
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001904 cb->args[3] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001905 return skb->len;
1906}
1907
1908static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1909{
1910 int m, s_m;
1911 struct trie *t = (struct trie *) tb->tb_data;
1912
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001913 s_m = cb->args[2];
Robert Olsson19baf832005-06-21 12:43:18 -07001914
Robert Olsson2373ce12005-08-25 13:01:29 -07001915 rcu_read_lock();
Olof Johansson91b9a272005-08-09 20:24:39 -07001916 for (m = 0; m <= 32; m++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001917 if (m < s_m)
1918 continue;
1919 if (m > s_m)
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001920 memset(&cb->args[3], 0,
1921 sizeof(cb->args) - 3*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001922
1923 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001924 cb->args[2] = m;
Robert Olsson19baf832005-06-21 12:43:18 -07001925 goto out;
1926 }
1927 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001928 rcu_read_unlock();
Patrick McHardy1af5a8c2006-08-10 23:10:46 -07001929 cb->args[2] = m;
Robert Olsson19baf832005-06-21 12:43:18 -07001930 return skb->len;
Olof Johansson91b9a272005-08-09 20:24:39 -07001931out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001932 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001933 return -1;
1934}
1935
1936/* Fix more generic FIB names for init later */
1937
1938#ifdef CONFIG_IP_MULTIPLE_TABLES
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001939struct fib_table * fib_hash_init(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001940#else
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001941struct fib_table * __init fib_hash_init(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001942#endif
1943{
1944 struct fib_table *tb;
1945 struct trie *t;
1946
1947 if (fn_alias_kmem == NULL)
1948 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1949 sizeof(struct fib_alias),
1950 0, SLAB_HWCACHE_ALIGN,
1951 NULL, NULL);
1952
1953 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1954 GFP_KERNEL);
1955 if (tb == NULL)
1956 return NULL;
1957
1958 tb->tb_id = id;
1959 tb->tb_lookup = fn_trie_lookup;
1960 tb->tb_insert = fn_trie_insert;
1961 tb->tb_delete = fn_trie_delete;
1962 tb->tb_flush = fn_trie_flush;
1963 tb->tb_select_default = fn_trie_select_default;
1964 tb->tb_dump = fn_trie_dump;
1965 memset(tb->tb_data, 0, sizeof(struct trie));
1966
1967 t = (struct trie *) tb->tb_data;
1968
1969 trie_init(t);
1970
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001971 if (id == RT_TABLE_LOCAL)
Olof Johansson91b9a272005-08-09 20:24:39 -07001972 trie_local = t;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001973 else if (id == RT_TABLE_MAIN)
Olof Johansson91b9a272005-08-09 20:24:39 -07001974 trie_main = t;
Robert Olsson19baf832005-06-21 12:43:18 -07001975
1976 if (id == RT_TABLE_LOCAL)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001977 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07001978
1979 return tb;
1980}
1981
Robert Olsson19baf832005-06-21 12:43:18 -07001982#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001983/* Depth first Trie walk iterator */
1984struct fib_trie_iter {
1985 struct tnode *tnode;
1986 struct trie *trie;
1987 unsigned index;
1988 unsigned depth;
1989};
Robert Olsson19baf832005-06-21 12:43:18 -07001990
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001991static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001992{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001993 struct tnode *tn = iter->tnode;
1994 unsigned cindex = iter->index;
1995 struct tnode *p;
1996
Eric W. Biederman6640e692007-01-24 14:42:04 -08001997 /* A single entry routing table */
1998 if (!tn)
1999 return NULL;
2000
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002001 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2002 iter->tnode, iter->index, iter->depth);
2003rescan:
2004 while (cindex < (1<<tn->bits)) {
2005 struct node *n = tnode_get_child(tn, cindex);
2006
2007 if (n) {
2008 if (IS_LEAF(n)) {
2009 iter->tnode = tn;
2010 iter->index = cindex + 1;
2011 } else {
2012 /* push down one level */
2013 iter->tnode = (struct tnode *) n;
2014 iter->index = 0;
2015 ++iter->depth;
2016 }
2017 return n;
2018 }
2019
2020 ++cindex;
2021 }
2022
2023 /* Current node exhausted, pop back up */
2024 p = NODE_PARENT(tn);
2025 if (p) {
2026 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2027 tn = p;
2028 --iter->depth;
2029 goto rescan;
2030 }
2031
2032 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002033 return NULL;
2034}
2035
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002036static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2037 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002038{
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002039 struct node *n ;
2040
Stephen Hemminger132adf52007-03-08 20:44:43 -08002041 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002042 return NULL;
2043
2044 n = rcu_dereference(t->trie);
2045
Stephen Hemminger132adf52007-03-08 20:44:43 -08002046 if (!iter)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002047 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002048
Eric W. Biederman6640e692007-01-24 14:42:04 -08002049 if (n) {
2050 if (IS_TNODE(n)) {
2051 iter->tnode = (struct tnode *) n;
2052 iter->trie = t;
2053 iter->index = 0;
2054 iter->depth = 1;
2055 } else {
2056 iter->tnode = NULL;
2057 iter->trie = t;
2058 iter->index = 0;
2059 iter->depth = 0;
2060 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002061 return n;
2062 }
Robert Olsson19baf832005-06-21 12:43:18 -07002063 return NULL;
2064}
2065
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002066static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002067{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002068 struct node *n;
2069 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002070
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002071 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002072
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002073 rcu_read_lock();
2074 for (n = fib_trie_get_first(&iter, t); n;
2075 n = fib_trie_get_next(&iter)) {
2076 if (IS_LEAF(n)) {
2077 s->leaves++;
2078 s->totdepth += iter.depth;
2079 if (iter.depth > s->maxdepth)
2080 s->maxdepth = iter.depth;
2081 } else {
2082 const struct tnode *tn = (const struct tnode *) n;
2083 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002084
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002085 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002086 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002087 s->nodesizes[tn->bits]++;
2088
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002089 for (i = 0; i < (1<<tn->bits); i++)
2090 if (!tn->child[i])
2091 s->nullpointers++;
2092 }
2093 }
2094 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002095}
2096
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002097/*
Robert Olsson19baf832005-06-21 12:43:18 -07002098 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002099 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002100static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002101{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002102 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002103
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002104 if (stat->leaves)
2105 avdepth = stat->totdepth*100 / stat->leaves;
2106 else
2107 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002108
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002109 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2110 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002111
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002112 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Olof Johansson91b9a272005-08-09 20:24:39 -07002113
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002114 bytes = sizeof(struct leaf) * stat->leaves;
2115 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
2116 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002117
Robert Olsson06ef9212006-03-20 21:35:01 -08002118 max = MAX_STAT_DEPTH;
2119 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002120 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002121
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002122 pointers = 0;
2123 for (i = 1; i <= max; i++)
2124 if (stat->nodesizes[i] != 0) {
2125 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2126 pointers += (1<<i) * stat->nodesizes[i];
2127 }
2128 seq_putc(seq, '\n');
2129 seq_printf(seq, "\tPointers: %d\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002130
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002131 bytes += sizeof(struct node *) * pointers;
2132 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2133 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024);
Robert Olsson19baf832005-06-21 12:43:18 -07002134
2135#ifdef CONFIG_IP_FIB_TRIE_STATS
2136 seq_printf(seq, "Counters:\n---------\n");
2137 seq_printf(seq,"gets = %d\n", t->stats.gets);
2138 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2139 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2140 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2141 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
Robert Olsson2f368952005-07-05 15:02:40 -07002142 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002143#ifdef CLEAR_STATS
2144 memset(&(t->stats), 0, sizeof(t->stats));
2145#endif
2146#endif /* CONFIG_IP_FIB_TRIE_STATS */
2147}
2148
2149static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2150{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002151 struct trie_stat *stat;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002152
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002153 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2154 if (!stat)
2155 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002156
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002157 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2158 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002159
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002160 if (trie_local) {
2161 seq_printf(seq, "Local:\n");
2162 trie_collect_stats(trie_local, stat);
2163 trie_show_stats(seq, stat);
Robert Olsson19baf832005-06-21 12:43:18 -07002164 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002165
2166 if (trie_main) {
2167 seq_printf(seq, "Main:\n");
2168 trie_collect_stats(trie_main, stat);
2169 trie_show_stats(seq, stat);
2170 }
2171 kfree(stat);
2172
Robert Olsson19baf832005-06-21 12:43:18 -07002173 return 0;
2174}
2175
Robert Olsson19baf832005-06-21 12:43:18 -07002176static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2177{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002178 return single_open(file, fib_triestat_seq_show, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07002179}
2180
Arjan van de Ven9a321442007-02-12 00:55:35 -08002181static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002182 .owner = THIS_MODULE,
2183 .open = fib_triestat_seq_open,
2184 .read = seq_read,
2185 .llseek = seq_lseek,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002186 .release = single_release,
Robert Olsson19baf832005-06-21 12:43:18 -07002187};
2188
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002189static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2190 loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002191{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002192 loff_t idx = 0;
2193 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -07002194
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002195 for (n = fib_trie_get_first(iter, trie_local);
2196 n; ++idx, n = fib_trie_get_next(iter)) {
2197 if (pos == idx)
2198 return n;
2199 }
Robert Olsson19baf832005-06-21 12:43:18 -07002200
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002201 for (n = fib_trie_get_first(iter, trie_main);
2202 n; ++idx, n = fib_trie_get_next(iter)) {
2203 if (pos == idx)
2204 return n;
2205 }
Robert Olsson19baf832005-06-21 12:43:18 -07002206 return NULL;
2207}
2208
2209static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2210{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002211 rcu_read_lock();
2212 if (*pos == 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07002213 return SEQ_START_TOKEN;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002214 return fib_trie_get_idx(seq->private, *pos - 1);
Robert Olsson19baf832005-06-21 12:43:18 -07002215}
2216
2217static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2218{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002219 struct fib_trie_iter *iter = seq->private;
2220 void *l = v;
2221
Robert Olsson19baf832005-06-21 12:43:18 -07002222 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002223 if (v == SEQ_START_TOKEN)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002224 return fib_trie_get_idx(iter, 0);
Olof Johansson91b9a272005-08-09 20:24:39 -07002225
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002226 v = fib_trie_get_next(iter);
2227 BUG_ON(v == l);
2228 if (v)
2229 return v;
2230
2231 /* continue scan in next trie */
2232 if (iter->trie == trie_local)
2233 return fib_trie_get_first(iter, trie_main);
2234
2235 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002236}
2237
2238static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2239{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002240 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002241}
2242
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002243static void seq_indent(struct seq_file *seq, int n)
2244{
2245 while (n-- > 0) seq_puts(seq, " ");
2246}
Robert Olsson19baf832005-06-21 12:43:18 -07002247
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002248static inline const char *rtn_scope(enum rt_scope_t s)
2249{
2250 static char buf[32];
2251
Stephen Hemminger132adf52007-03-08 20:44:43 -08002252 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002253 case RT_SCOPE_UNIVERSE: return "universe";
2254 case RT_SCOPE_SITE: return "site";
2255 case RT_SCOPE_LINK: return "link";
2256 case RT_SCOPE_HOST: return "host";
2257 case RT_SCOPE_NOWHERE: return "nowhere";
2258 default:
2259 snprintf(buf, sizeof(buf), "scope=%d", s);
2260 return buf;
2261 }
2262}
2263
2264static const char *rtn_type_names[__RTN_MAX] = {
2265 [RTN_UNSPEC] = "UNSPEC",
2266 [RTN_UNICAST] = "UNICAST",
2267 [RTN_LOCAL] = "LOCAL",
2268 [RTN_BROADCAST] = "BROADCAST",
2269 [RTN_ANYCAST] = "ANYCAST",
2270 [RTN_MULTICAST] = "MULTICAST",
2271 [RTN_BLACKHOLE] = "BLACKHOLE",
2272 [RTN_UNREACHABLE] = "UNREACHABLE",
2273 [RTN_PROHIBIT] = "PROHIBIT",
2274 [RTN_THROW] = "THROW",
2275 [RTN_NAT] = "NAT",
2276 [RTN_XRESOLVE] = "XRESOLVE",
2277};
2278
2279static inline const char *rtn_type(unsigned t)
2280{
2281 static char buf[32];
2282
2283 if (t < __RTN_MAX && rtn_type_names[t])
2284 return rtn_type_names[t];
2285 snprintf(buf, sizeof(buf), "type %d", t);
2286 return buf;
2287}
2288
2289/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002290static int fib_trie_seq_show(struct seq_file *seq, void *v)
2291{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002292 const struct fib_trie_iter *iter = seq->private;
2293 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002294
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002295 if (v == SEQ_START_TOKEN)
2296 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002297
Robert Olsson095b8502007-01-26 19:06:01 -08002298 if (!NODE_PARENT(n)) {
2299 if (iter->trie == trie_local)
2300 seq_puts(seq, "<local>:\n");
2301 else
2302 seq_puts(seq, "<main>:\n");
2303 }
2304
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002305 if (IS_TNODE(n)) {
2306 struct tnode *tn = (struct tnode *) n;
Al Viro32ab5f82006-09-26 22:21:45 -07002307 __be32 prf = htonl(MASK_PFX(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002308
Robert Olsson1d25cd62005-09-19 15:29:52 -07002309 seq_indent(seq, iter->depth-1);
2310 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002311 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002312 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002313
Olof Johansson91b9a272005-08-09 20:24:39 -07002314 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002315 struct leaf *l = (struct leaf *) n;
2316 int i;
Al Viro32ab5f82006-09-26 22:21:45 -07002317 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002318
2319 seq_indent(seq, iter->depth);
2320 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2321 for (i = 32; i >= 0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002322 struct leaf_info *li = find_leaf_info(l, i);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002323 if (li) {
2324 struct fib_alias *fa;
2325 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2326 seq_indent(seq, iter->depth+1);
2327 seq_printf(seq, " /%d %s %s", i,
2328 rtn_scope(fa->fa_scope),
2329 rtn_type(fa->fa_type));
2330 if (fa->fa_tos)
2331 seq_printf(seq, "tos =%d\n",
2332 fa->fa_tos);
2333 seq_putc(seq, '\n');
2334 }
2335 }
2336 }
Robert Olsson19baf832005-06-21 12:43:18 -07002337 }
2338
2339 return 0;
2340}
2341
2342static struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002343 .start = fib_trie_seq_start,
2344 .next = fib_trie_seq_next,
2345 .stop = fib_trie_seq_stop,
2346 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002347};
2348
2349static int fib_trie_seq_open(struct inode *inode, struct file *file)
2350{
2351 struct seq_file *seq;
2352 int rc = -ENOMEM;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002353 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2354
2355 if (!s)
2356 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07002357
2358 rc = seq_open(file, &fib_trie_seq_ops);
2359 if (rc)
2360 goto out_kfree;
2361
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002362 seq = file->private_data;
2363 seq->private = s;
2364 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002365out:
2366 return rc;
2367out_kfree:
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002368 kfree(s);
Robert Olsson19baf832005-06-21 12:43:18 -07002369 goto out;
2370}
2371
Arjan van de Ven9a321442007-02-12 00:55:35 -08002372static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002373 .owner = THIS_MODULE,
2374 .open = fib_trie_seq_open,
2375 .read = seq_read,
2376 .llseek = seq_lseek,
2377 .release = seq_release_private,
2378};
2379
Al Viro32ab5f82006-09-26 22:21:45 -07002380static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002381{
2382 static unsigned type2flags[RTN_MAX + 1] = {
2383 [7] = RTF_REJECT, [8] = RTF_REJECT,
2384 };
2385 unsigned flags = type2flags[type];
2386
2387 if (fi && fi->fib_nh->nh_gw)
2388 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002389 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002390 flags |= RTF_HOST;
2391 flags |= RTF_UP;
2392 return flags;
2393}
2394
2395/*
2396 * This outputs /proc/net/route.
2397 * The format of the file is not supposed to be changed
2398 * and needs to be same as fib_hash output to avoid breaking
2399 * legacy utilities
2400 */
2401static int fib_route_seq_show(struct seq_file *seq, void *v)
2402{
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002403 const struct fib_trie_iter *iter = seq->private;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002404 struct leaf *l = v;
2405 int i;
2406 char bf[128];
2407
2408 if (v == SEQ_START_TOKEN) {
2409 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2410 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2411 "\tWindow\tIRTT");
2412 return 0;
2413 }
2414
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002415 if (iter->trie == trie_local)
2416 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002417 if (IS_TNODE(l))
2418 return 0;
2419
2420 for (i=32; i>=0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002421 struct leaf_info *li = find_leaf_info(l, i);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002422 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002423 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002424
2425 if (!li)
2426 continue;
2427
2428 mask = inet_make_mask(li->plen);
2429 prefix = htonl(l->key);
2430
2431 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002432 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002433 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2434
2435 if (fa->fa_type == RTN_BROADCAST
2436 || fa->fa_type == RTN_MULTICAST)
2437 continue;
2438
2439 if (fi)
2440 snprintf(bf, sizeof(bf),
2441 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2442 fi->fib_dev ? fi->fib_dev->name : "*",
2443 prefix,
2444 fi->fib_nh->nh_gw, flags, 0, 0,
2445 fi->fib_priority,
2446 mask,
2447 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2448 fi->fib_window,
2449 fi->fib_rtt >> 3);
2450 else
2451 snprintf(bf, sizeof(bf),
2452 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2453 prefix, 0, flags, 0, 0, 0,
2454 mask, 0, 0, 0);
2455
2456 seq_printf(seq, "%-127s\n", bf);
2457 }
2458 }
2459
2460 return 0;
2461}
2462
2463static struct seq_operations fib_route_seq_ops = {
2464 .start = fib_trie_seq_start,
2465 .next = fib_trie_seq_next,
2466 .stop = fib_trie_seq_stop,
2467 .show = fib_route_seq_show,
2468};
2469
2470static int fib_route_seq_open(struct inode *inode, struct file *file)
2471{
2472 struct seq_file *seq;
2473 int rc = -ENOMEM;
2474 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2475
2476 if (!s)
2477 goto out;
2478
2479 rc = seq_open(file, &fib_route_seq_ops);
2480 if (rc)
2481 goto out_kfree;
2482
2483 seq = file->private_data;
2484 seq->private = s;
2485 memset(s, 0, sizeof(*s));
2486out:
2487 return rc;
2488out_kfree:
2489 kfree(s);
2490 goto out;
2491}
2492
Arjan van de Ven9a321442007-02-12 00:55:35 -08002493static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002494 .owner = THIS_MODULE,
2495 .open = fib_route_seq_open,
2496 .read = seq_read,
2497 .llseek = seq_lseek,
2498 .release = seq_release_private,
Robert Olsson19baf832005-06-21 12:43:18 -07002499};
2500
2501int __init fib_proc_init(void)
2502{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002503 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
2504 goto out1;
2505
2506 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
2507 goto out2;
2508
2509 if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
2510 goto out3;
2511
Robert Olsson19baf832005-06-21 12:43:18 -07002512 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002513
2514out3:
2515 proc_net_remove("fib_triestat");
2516out2:
2517 proc_net_remove("fib_trie");
2518out1:
2519 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002520}
2521
2522void __init fib_proc_exit(void)
2523{
2524 proc_net_remove("fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002525 proc_net_remove("fib_triestat");
2526 proc_net_remove("route");
Robert Olsson19baf832005-06-21 12:43:18 -07002527}
2528
2529#endif /* CONFIG_PROC_FS */