blob: 6f818cc7efd0e8dd5b58fb48cbfc16b5edca20f8 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
44 */
45
Robert Olsson2f368952005-07-05 15:02:40 -070046#define VERSION "0.325"
Robert Olsson19baf832005-06-21 12:43:18 -070047
48#include <linux/config.h>
49#include <asm/uaccess.h>
50#include <asm/system.h>
51#include <asm/bitops.h>
52#include <linux/types.h>
53#include <linux/kernel.h>
54#include <linux/sched.h>
55#include <linux/mm.h>
56#include <linux/string.h>
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/errno.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_arp.h>
64#include <linux/proc_fs.h>
65#include <linux/skbuff.h>
66#include <linux/netlink.h>
67#include <linux/init.h>
68#include <linux/list.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/tcp.h>
73#include <net/sock.h>
74#include <net/ip_fib.h>
75#include "fib_lookup.h"
76
77#undef CONFIG_IP_FIB_TRIE_STATS
78#define MAX_CHILDS 16384
79
80#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81#define KEYLENGTH (8*sizeof(t_key))
82#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
84
85static DEFINE_RWLOCK(fib_lock);
86
87typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
Olof Johansson91b9a272005-08-09 20:24:39 -070092#define NODE_PARENT(node) \
93 ((struct tnode *)((node)->parent & ~NODE_TYPE_MASK))
94#define NODE_SET_PARENT(node, ptr) \
95 ((node)->parent = (((unsigned long)(ptr)) | \
96 ((node)->parent & NODE_TYPE_MASK)))
97#define NODE_INIT_PARENT(node, type) \
98 ((node)->parent = (type))
99#define NODE_TYPE(node) \
100 ((node)->parent & NODE_TYPE_MASK)
Robert Olsson19baf832005-06-21 12:43:18 -0700101
Olof Johansson91b9a272005-08-09 20:24:39 -0700102#define IS_TNODE(n) (!(n->parent & T_LEAF))
103#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -0700104
105struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -0700106 t_key key;
107 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700108};
109
110struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700111 t_key key;
112 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700113 struct hlist_head list;
114};
115
116struct leaf_info {
117 struct hlist_node hlist;
118 int plen;
119 struct list_head falh;
120};
121
122struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700123 t_key key;
124 unsigned long parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700130};
131
132#ifdef CONFIG_IP_FIB_TRIE_STATS
133struct trie_use_stats {
134 unsigned int gets;
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700139 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700140};
141#endif
142
143struct trie_stat {
144 unsigned int totdepth;
145 unsigned int maxdepth;
146 unsigned int tnodes;
147 unsigned int leaves;
148 unsigned int nullpointers;
149 unsigned int nodesizes[MAX_CHILDS];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700150};
Robert Olsson19baf832005-06-21 12:43:18 -0700151
152struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700153 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700154#ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156#endif
Olof Johansson91b9a272005-08-09 20:24:39 -0700157 int size;
Robert Olsson19baf832005-06-21 12:43:18 -0700158 unsigned int revision;
159};
160
161static int trie_debug = 0;
162
Olof Johansson91b9a272005-08-09 20:24:39 -0700163#define DBG(x...) do { if (trie_debug) printk(x); } while (0)
164
Robert Olsson19baf832005-06-21 12:43:18 -0700165static int tnode_full(struct tnode *tn, struct node *n);
166static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
167static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
168static int tnode_child_length(struct tnode *tn);
169static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f368952005-07-05 15:02:40 -0700170static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err);
171static struct tnode *halve(struct trie *t, struct tnode *tn, int *err);
Robert Olsson19baf832005-06-21 12:43:18 -0700172static void tnode_free(struct tnode *tn);
173static void trie_dump_seq(struct seq_file *seq, struct trie *t);
174extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
175extern int fib_detect_death(struct fib_info *fi, int order,
Olof Johansson91b9a272005-08-09 20:24:39 -0700176 struct fib_info **last_resort, int *last_idx, int *dflt);
Robert Olsson19baf832005-06-21 12:43:18 -0700177
178extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
Olof Johansson91b9a272005-08-09 20:24:39 -0700179 struct nlmsghdr *n, struct netlink_skb_parms *req);
Robert Olsson19baf832005-06-21 12:43:18 -0700180
181static kmem_cache_t *fn_alias_kmem;
182static struct trie *trie_local = NULL, *trie_main = NULL;
183
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700184static inline struct node *tnode_get_child(struct tnode *tn, int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700185{
Olof Johansson91b9a272005-08-09 20:24:39 -0700186 BUG_ON(i >= 1 << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700187
Olof Johansson91b9a272005-08-09 20:24:39 -0700188 return tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700189}
190
191static inline int tnode_child_length(struct tnode *tn)
192{
Olof Johansson91b9a272005-08-09 20:24:39 -0700193 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700194}
195
Robert Olsson19baf832005-06-21 12:43:18 -0700196static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
197{
Olof Johansson91b9a272005-08-09 20:24:39 -0700198 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700199 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700200 else
Robert Olsson19baf832005-06-21 12:43:18 -0700201 return 0;
202}
203
204static inline int tkey_equals(t_key a, t_key b)
205{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700206 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700207}
208
209static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
210{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700211 if (bits == 0 || offset >= KEYLENGTH)
212 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700213 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
214 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700215}
Robert Olsson19baf832005-06-21 12:43:18 -0700216
217static inline int tkey_mismatch(t_key a, int offset, t_key b)
218{
219 t_key diff = a ^ b;
220 int i = offset;
221
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700222 if (!diff)
223 return 0;
224 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700225 i++;
226 return i;
227}
228
Olof Johansson91b9a272005-08-09 20:24:39 -0700229/* Candidate for fib_semantics */
Robert Olsson19baf832005-06-21 12:43:18 -0700230
231static void fn_free_alias(struct fib_alias *fa)
232{
233 fib_release_info(fa->fa_info);
234 kmem_cache_free(fn_alias_kmem, fa);
235}
236
237/*
238 To understand this stuff, an understanding of keys and all their bits is
239 necessary. Every node in the trie has a key associated with it, but not
240 all of the bits in that key are significant.
241
242 Consider a node 'n' and its parent 'tp'.
243
244 If n is a leaf, every bit in its key is significant. Its presence is
245 necessitaded by path compression, since during a tree traversal (when
246 searching for a leaf - unless we are doing an insertion) we will completely
247 ignore all skipped bits we encounter. Thus we need to verify, at the end of
248 a potentially successful search, that we have indeed been walking the
249 correct key path.
250
251 Note that we can never "miss" the correct key in the tree if present by
252 following the wrong path. Path compression ensures that segments of the key
253 that are the same for all keys with a given prefix are skipped, but the
254 skipped part *is* identical for each node in the subtrie below the skipped
255 bit! trie_insert() in this implementation takes care of that - note the
256 call to tkey_sub_equals() in trie_insert().
257
258 if n is an internal node - a 'tnode' here, the various parts of its key
259 have many different meanings.
260
261 Example:
262 _________________________________________________________________
263 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
264 -----------------------------------------------------------------
265 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
266
267 _________________________________________________________________
268 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
269 -----------------------------------------------------------------
270 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
271
272 tp->pos = 7
273 tp->bits = 3
274 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700275 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700276
277 First, let's just ignore the bits that come before the parent tp, that is
278 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
279 not use them for anything.
280
281 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
282 index into the parent's child array. That is, they will be used to find
283 'n' among tp's children.
284
285 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
286 for the node n.
287
288 All the bits we have seen so far are significant to the node n. The rest
289 of the bits are really not needed or indeed known in n->key.
290
291 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
292 n's child array, and will of course be different for each child.
293
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700294
Robert Olsson19baf832005-06-21 12:43:18 -0700295 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
296 at this point.
297
298*/
299
300static void check_tnode(struct tnode *tn)
301{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700302 if (tn && tn->pos+tn->bits > 32) {
Robert Olsson19baf832005-06-21 12:43:18 -0700303 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
304 }
305}
306
307static int halve_threshold = 25;
308static int inflate_threshold = 50;
309
310static struct leaf *leaf_new(void)
311{
312 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700313 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -0700314 NODE_INIT_PARENT(l, T_LEAF);
315 INIT_HLIST_HEAD(&l->list);
316 }
317 return l;
318}
319
320static struct leaf_info *leaf_info_new(int plen)
321{
322 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700323
324 if (!li)
325 return NULL;
326
327 li->plen = plen;
328 INIT_LIST_HEAD(&li->falh);
329
Robert Olsson19baf832005-06-21 12:43:18 -0700330 return li;
331}
332
333static inline void free_leaf(struct leaf *l)
334{
335 kfree(l);
336}
337
338static inline void free_leaf_info(struct leaf_info *li)
339{
340 kfree(li);
341}
342
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700343static struct tnode *tnode_alloc(unsigned int size)
344{
345 if (size <= PAGE_SIZE) {
346 return kmalloc(size, GFP_KERNEL);
347 } else {
348 return (struct tnode *)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700349 __get_free_pages(GFP_KERNEL, get_order(size));
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700350 }
351}
352
353static void __tnode_free(struct tnode *tn)
354{
355 unsigned int size = sizeof(struct tnode) +
Olof Johansson91b9a272005-08-09 20:24:39 -0700356 (1 << tn->bits) * sizeof(struct node *);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700357
358 if (size <= PAGE_SIZE)
359 kfree(tn);
360 else
361 free_pages((unsigned long)tn, get_order(size));
362}
363
Robert Olsson19baf832005-06-21 12:43:18 -0700364static struct tnode* tnode_new(t_key key, int pos, int bits)
365{
366 int nchildren = 1<<bits;
367 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700368 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700369
Olof Johansson91b9a272005-08-09 20:24:39 -0700370 if (tn) {
Robert Olsson19baf832005-06-21 12:43:18 -0700371 memset(tn, 0, sz);
372 NODE_INIT_PARENT(tn, T_TNODE);
373 tn->pos = pos;
374 tn->bits = bits;
375 tn->key = key;
376 tn->full_children = 0;
377 tn->empty_children = 1<<bits;
378 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700379
Olof Johansson91b9a272005-08-09 20:24:39 -0700380 DBG("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
381 (unsigned int) (sizeof(struct node) * 1<<bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700382 return tn;
383}
384
385static void tnode_free(struct tnode *tn)
386{
Olof Johansson91b9a272005-08-09 20:24:39 -0700387 BUG_ON(!tn);
388
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700389 if (IS_LEAF(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700390 free_leaf((struct leaf *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700391 DBG("FL %p \n", tn);
392 } else {
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700393 __tnode_free(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700394 DBG("FT %p \n", tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700395 }
396}
397
398/*
399 * Check whether a tnode 'n' is "full", i.e. it is an internal node
400 * and no bits are skipped. See discussion in dyntree paper p. 6
401 */
402
403static inline int tnode_full(struct tnode *tn, struct node *n)
404{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700405 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700406 return 0;
407
408 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
409}
410
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700411static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700412{
413 tnode_put_child_reorg(tn, i, n, -1);
414}
415
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700416 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700417 * Add a child at position i overwriting the old value.
418 * Update the value of full_children and empty_children.
419 */
420
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700421static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700422{
423 struct node *chi;
424 int isfull;
425
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700426 if (i >= 1<<tn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -0700427 printk("bits=%d, i=%d\n", tn->bits, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700428 BUG();
Robert Olsson19baf832005-06-21 12:43:18 -0700429 }
430 write_lock_bh(&fib_lock);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700431 chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700432
433 /* update emptyChildren */
434 if (n == NULL && chi != NULL)
435 tn->empty_children++;
436 else if (n != NULL && chi == NULL)
437 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700438
Robert Olsson19baf832005-06-21 12:43:18 -0700439 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700440 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700441 wasfull = tnode_full(tn, chi);
442
443 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700444 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700445 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700446 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700447 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700448
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700449 if (n)
450 NODE_SET_PARENT(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700451
452 tn->child[i] = n;
453 write_unlock_bh(&fib_lock);
454}
455
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700456static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700457{
458 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700459 int err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700460
461 if (!tn)
462 return NULL;
463
Olof Johansson91b9a272005-08-09 20:24:39 -0700464 DBG("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
465 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700466
467 /* No children */
468 if (tn->empty_children == tnode_child_length(tn)) {
469 tnode_free(tn);
470 return NULL;
471 }
472 /* One child */
473 if (tn->empty_children == tnode_child_length(tn) - 1)
474 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700475 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700476
477 write_lock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700478 n = tn->child[i];
479 if (!n) {
Robert Olsson19baf832005-06-21 12:43:18 -0700480 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700481 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700482 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700483
484 /* compress one level */
485 NODE_INIT_PARENT(n, NODE_TYPE(n));
486
Robert Olsson19baf832005-06-21 12:43:18 -0700487 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700488 tnode_free(tn);
489 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700490 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700491 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700492 * Double as long as the resulting node has a number of
493 * nonempty nodes that are above the threshold.
494 */
495
496 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700497 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
498 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700499 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700500 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700501 * children in the *doubled* node is at least 'high'."
502 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700503 * 'high' in this instance is the variable 'inflate_threshold'. It
504 * is expressed as a percentage, so we multiply it with
505 * tnode_child_length() and instead of multiplying by 2 (since the
506 * child array will be doubled by inflate()) and multiplying
507 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700508 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700509 *
510 * The left-hand side may look a bit weird: tnode_child_length(tn)
511 * - tn->empty_children is of course the number of non-null children
512 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700513 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700514 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700515 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700516 *
Robert Olsson19baf832005-06-21 12:43:18 -0700517 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700518 *
Robert Olsson19baf832005-06-21 12:43:18 -0700519 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700520 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700521 * tn->full_children;
522 *
523 * new_child_length = tnode_child_length(tn) * 2;
524 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700525 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700526 * new_child_length;
527 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700528 *
529 * ...and so on, tho it would mess up the while () loop.
530 *
Robert Olsson19baf832005-06-21 12:43:18 -0700531 * anyway,
532 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
533 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700534 *
Robert Olsson19baf832005-06-21 12:43:18 -0700535 * avoid a division:
536 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
537 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700538 *
Robert Olsson19baf832005-06-21 12:43:18 -0700539 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700540 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700541 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700542 *
Robert Olsson19baf832005-06-21 12:43:18 -0700543 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700544 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700545 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700546 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700547 *
Robert Olsson19baf832005-06-21 12:43:18 -0700548 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700549 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700550 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700551 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700552 *
Robert Olsson19baf832005-06-21 12:43:18 -0700553 */
554
555 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700556
Robert Olsson2f368952005-07-05 15:02:40 -0700557 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700558 while ((tn->full_children > 0 &&
559 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
560 inflate_threshold * tnode_child_length(tn))) {
561
Robert Olsson2f368952005-07-05 15:02:40 -0700562 tn = inflate(t, tn, &err);
563
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700564 if (err) {
Robert Olsson2f368952005-07-05 15:02:40 -0700565#ifdef CONFIG_IP_FIB_TRIE_STATS
566 t->stats.resize_node_skipped++;
567#endif
568 break;
569 }
Robert Olsson19baf832005-06-21 12:43:18 -0700570 }
571
572 check_tnode(tn);
573
574 /*
575 * Halve as long as the number of empty children in this
576 * node is above threshold.
577 */
Robert Olsson2f368952005-07-05 15:02:40 -0700578
579 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700580 while (tn->bits > 1 &&
581 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olsson2f368952005-07-05 15:02:40 -0700582 halve_threshold * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700583
Robert Olsson2f368952005-07-05 15:02:40 -0700584 tn = halve(t, tn, &err);
585
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700586 if (err) {
Robert Olsson2f368952005-07-05 15:02:40 -0700587#ifdef CONFIG_IP_FIB_TRIE_STATS
588 t->stats.resize_node_skipped++;
589#endif
590 break;
591 }
592 }
593
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700594
Robert Olsson19baf832005-06-21 12:43:18 -0700595 /* Only one child remains */
596
597 if (tn->empty_children == tnode_child_length(tn) - 1)
598 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700599 struct node *n;
600
Robert Olsson19baf832005-06-21 12:43:18 -0700601 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -0700602
Olof Johansson91b9a272005-08-09 20:24:39 -0700603 n = tn->child[i];
604 if (!n) {
Robert Olsson19baf832005-06-21 12:43:18 -0700605 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700606 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700607 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700608
609 /* compress one level */
610
611 NODE_INIT_PARENT(n, NODE_TYPE(n));
612
Robert Olsson19baf832005-06-21 12:43:18 -0700613 write_unlock_bh(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -0700614 tnode_free(tn);
615 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700616 }
617
618 return (struct node *) tn;
619}
620
Robert Olsson2f368952005-07-05 15:02:40 -0700621static struct tnode *inflate(struct trie *t, struct tnode *tn, int *err)
Robert Olsson19baf832005-06-21 12:43:18 -0700622{
623 struct tnode *inode;
624 struct tnode *oldtnode = tn;
625 int olen = tnode_child_length(tn);
626 int i;
627
Olof Johansson91b9a272005-08-09 20:24:39 -0700628 DBG("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700629
630 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
631
Robert Olsson2f368952005-07-05 15:02:40 -0700632 if (!tn) {
633 *err = -ENOMEM;
634 return oldtnode;
635 }
636
637 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700638 * Preallocate and store tnodes before the actual work so we
639 * don't get into an inconsistent state if memory allocation
640 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700641 * of tnode is ignored.
642 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700643
644 for (i = 0; i < olen; i++) {
Robert Olsson2f368952005-07-05 15:02:40 -0700645 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
646
647 if (inode &&
648 IS_TNODE(inode) &&
649 inode->pos == oldtnode->pos + oldtnode->bits &&
650 inode->bits > 1) {
651 struct tnode *left, *right;
Robert Olsson2f368952005-07-05 15:02:40 -0700652 t_key m = TKEY_GET_MASK(inode->pos, 1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700653
Robert Olsson2f368952005-07-05 15:02:40 -0700654 left = tnode_new(inode->key&(~m), inode->pos + 1,
655 inode->bits - 1);
656
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700657 if (!left) {
658 *err = -ENOMEM;
Robert Olsson2f368952005-07-05 15:02:40 -0700659 break;
660 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700661
Robert Olsson2f368952005-07-05 15:02:40 -0700662 right = tnode_new(inode->key|m, inode->pos + 1,
663 inode->bits - 1);
664
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700665 if (!right) {
666 *err = -ENOMEM;
Robert Olsson2f368952005-07-05 15:02:40 -0700667 break;
668 }
669
670 put_child(t, tn, 2*i, (struct node *) left);
671 put_child(t, tn, 2*i+1, (struct node *) right);
672 }
673 }
674
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700675 if (*err) {
Robert Olsson2f368952005-07-05 15:02:40 -0700676 int size = tnode_child_length(tn);
677 int j;
678
Olof Johansson91b9a272005-08-09 20:24:39 -0700679 for (j = 0; j < size; j++)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700680 if (tn->child[j])
Robert Olsson2f368952005-07-05 15:02:40 -0700681 tnode_free((struct tnode *)tn->child[j]);
682
683 tnode_free(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700684
Robert Olsson2f368952005-07-05 15:02:40 -0700685 *err = -ENOMEM;
686 return oldtnode;
687 }
Robert Olsson19baf832005-06-21 12:43:18 -0700688
Olof Johansson91b9a272005-08-09 20:24:39 -0700689 for (i = 0; i < olen; i++) {
Robert Olsson19baf832005-06-21 12:43:18 -0700690 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700691 struct tnode *left, *right;
692 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700693
Robert Olsson19baf832005-06-21 12:43:18 -0700694 /* An empty child */
695 if (node == NULL)
696 continue;
697
698 /* A leaf or an internal node with skipped bits */
699
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700700 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700701 tn->pos + tn->bits - 1) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700702 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
Robert Olsson19baf832005-06-21 12:43:18 -0700703 1) == 0)
704 put_child(t, tn, 2*i, node);
705 else
706 put_child(t, tn, 2*i+1, node);
707 continue;
708 }
709
710 /* An internal node with two children */
711 inode = (struct tnode *) node;
712
713 if (inode->bits == 1) {
714 put_child(t, tn, 2*i, inode->child[0]);
715 put_child(t, tn, 2*i+1, inode->child[1]);
716
717 tnode_free(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700718 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700719 }
720
Olof Johansson91b9a272005-08-09 20:24:39 -0700721 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700722
Olof Johansson91b9a272005-08-09 20:24:39 -0700723 /* We will replace this node 'inode' with two new
724 * ones, 'left' and 'right', each with half of the
725 * original children. The two new nodes will have
726 * a position one bit further down the key and this
727 * means that the "significant" part of their keys
728 * (see the discussion near the top of this file)
729 * will differ by one bit, which will be "0" in
730 * left's key and "1" in right's key. Since we are
731 * moving the key position by one step, the bit that
732 * we are moving away from - the bit at position
733 * (inode->pos) - is the one that will differ between
734 * left and right. So... we synthesize that bit in the
735 * two new keys.
736 * The mask 'm' below will be a single "one" bit at
737 * the position (inode->pos)
738 */
Robert Olsson19baf832005-06-21 12:43:18 -0700739
Olof Johansson91b9a272005-08-09 20:24:39 -0700740 /* Use the old key, but set the new significant
741 * bit to zero.
742 */
Robert Olsson19baf832005-06-21 12:43:18 -0700743
Olof Johansson91b9a272005-08-09 20:24:39 -0700744 left = (struct tnode *) tnode_get_child(tn, 2*i);
745 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700746
Olof Johansson91b9a272005-08-09 20:24:39 -0700747 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700748
Olof Johansson91b9a272005-08-09 20:24:39 -0700749 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
750 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700751
Olof Johansson91b9a272005-08-09 20:24:39 -0700752 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700753
Olof Johansson91b9a272005-08-09 20:24:39 -0700754 size = tnode_child_length(left);
755 for (j = 0; j < size; j++) {
756 put_child(t, left, j, inode->child[j]);
757 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700758 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700759 put_child(t, tn, 2*i, resize(t, left));
760 put_child(t, tn, 2*i+1, resize(t, right));
761
762 tnode_free(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700763 }
764 tnode_free(oldtnode);
765 return tn;
766}
767
Robert Olsson2f368952005-07-05 15:02:40 -0700768static struct tnode *halve(struct trie *t, struct tnode *tn, int *err)
Robert Olsson19baf832005-06-21 12:43:18 -0700769{
770 struct tnode *oldtnode = tn;
771 struct node *left, *right;
772 int i;
773 int olen = tnode_child_length(tn);
774
Olof Johansson91b9a272005-08-09 20:24:39 -0700775 DBG("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700776
777 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700778
Robert Olsson2f368952005-07-05 15:02:40 -0700779 if (!tn) {
780 *err = -ENOMEM;
781 return oldtnode;
782 }
783
784 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700785 * Preallocate and store tnodes before the actual work so we
786 * don't get into an inconsistent state if memory allocation
787 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700788 * of tnode is ignored.
789 */
790
Olof Johansson91b9a272005-08-09 20:24:39 -0700791 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700792 left = tnode_get_child(oldtnode, i);
793 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700794
Robert Olsson2f368952005-07-05 15:02:40 -0700795 /* Two nonempty children */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700796 if (left && right) {
Robert Olsson2f368952005-07-05 15:02:40 -0700797 struct tnode *newBinNode =
798 tnode_new(left->key, tn->pos + tn->bits, 1);
799
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700800 if (!newBinNode) {
801 *err = -ENOMEM;
Robert Olsson2f368952005-07-05 15:02:40 -0700802 break;
803 }
804 put_child(t, tn, i/2, (struct node *)newBinNode);
805 }
806 }
807
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700808 if (*err) {
Robert Olsson2f368952005-07-05 15:02:40 -0700809 int size = tnode_child_length(tn);
810 int j;
811
Olof Johansson91b9a272005-08-09 20:24:39 -0700812 for (j = 0; j < size; j++)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700813 if (tn->child[j])
Robert Olsson2f368952005-07-05 15:02:40 -0700814 tnode_free((struct tnode *)tn->child[j]);
815
816 tnode_free(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700817
Robert Olsson2f368952005-07-05 15:02:40 -0700818 *err = -ENOMEM;
819 return oldtnode;
820 }
Robert Olsson19baf832005-06-21 12:43:18 -0700821
Olof Johansson91b9a272005-08-09 20:24:39 -0700822 for (i = 0; i < olen; i += 2) {
823 struct tnode *newBinNode;
824
Robert Olsson19baf832005-06-21 12:43:18 -0700825 left = tnode_get_child(oldtnode, i);
826 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700827
Robert Olsson19baf832005-06-21 12:43:18 -0700828 /* At least one of the children is empty */
829 if (left == NULL) {
830 if (right == NULL) /* Both are empty */
831 continue;
832 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700833 continue;
834 }
835
836 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700837 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700838 continue;
839 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700840
Robert Olsson19baf832005-06-21 12:43:18 -0700841 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700842 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
843 put_child(t, tn, i/2, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700844
Olof Johansson91b9a272005-08-09 20:24:39 -0700845 BUG_ON(!newBinNode);
Robert Olsson19baf832005-06-21 12:43:18 -0700846
Olof Johansson91b9a272005-08-09 20:24:39 -0700847 put_child(t, newBinNode, 0, left);
848 put_child(t, newBinNode, 1, right);
849 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700850 }
851 tnode_free(oldtnode);
852 return tn;
853}
854
Olof Johansson91b9a272005-08-09 20:24:39 -0700855static void trie_init(struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -0700856{
Olof Johansson91b9a272005-08-09 20:24:39 -0700857 if (!t)
858 return;
859
860 t->size = 0;
861 t->trie = NULL;
862 t->revision = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700863#ifdef CONFIG_IP_FIB_TRIE_STATS
Olof Johansson91b9a272005-08-09 20:24:39 -0700864 memset(&t->stats, 0, sizeof(struct trie_use_stats));
Robert Olsson19baf832005-06-21 12:43:18 -0700865#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700866}
867
868static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
869{
870 struct hlist_node *node;
871 struct leaf_info *li;
872
Olof Johansson91b9a272005-08-09 20:24:39 -0700873 hlist_for_each_entry(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700874 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700875 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700876
Robert Olsson19baf832005-06-21 12:43:18 -0700877 return NULL;
878}
879
880static inline struct list_head * get_fa_head(struct leaf *l, int plen)
881{
Robert Olsson19baf832005-06-21 12:43:18 -0700882 struct leaf_info *li = find_leaf_info(&l->list, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700883
Olof Johansson91b9a272005-08-09 20:24:39 -0700884 if (!li)
885 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700886
Olof Johansson91b9a272005-08-09 20:24:39 -0700887 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700888}
889
890static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
891{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700892 struct leaf_info *li = NULL, *last = NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -0700893 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700894
895 write_lock_bh(&fib_lock);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700896
Olof Johansson91b9a272005-08-09 20:24:39 -0700897 if (hlist_empty(head)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700898 hlist_add_head(&new->hlist, head);
Olof Johansson91b9a272005-08-09 20:24:39 -0700899 } else {
900 hlist_for_each_entry(li, node, head, hlist) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700901 if (new->plen > li->plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700902 break;
Olof Johansson91b9a272005-08-09 20:24:39 -0700903
Robert Olsson19baf832005-06-21 12:43:18 -0700904 last = li;
905 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700906 if (last)
Robert Olsson19baf832005-06-21 12:43:18 -0700907 hlist_add_after(&last->hlist, &new->hlist);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700908 else
Robert Olsson19baf832005-06-21 12:43:18 -0700909 hlist_add_before(&new->hlist, &li->hlist);
910 }
911 write_unlock_bh(&fib_lock);
912}
913
914static struct leaf *
915fib_find_node(struct trie *t, u32 key)
916{
917 int pos;
918 struct tnode *tn;
919 struct node *n;
920
921 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700922 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700923
924 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
925 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700926
Robert Olsson19baf832005-06-21 12:43:18 -0700927 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700928
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700929 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700930 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700931 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700932 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700933 break;
934 }
935 /* Case we have found a leaf. Compare prefixes */
936
Olof Johansson91b9a272005-08-09 20:24:39 -0700937 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
938 return (struct leaf *)n;
939
Robert Olsson19baf832005-06-21 12:43:18 -0700940 return NULL;
941}
942
943static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
944{
Olof Johansson91b9a272005-08-09 20:24:39 -0700945 int i;
Robert Olsson19baf832005-06-21 12:43:18 -0700946 int wasfull;
947 t_key cindex, key;
948 struct tnode *tp = NULL;
949
Olof Johansson91b9a272005-08-09 20:24:39 -0700950 BUG_ON(!tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700951
Robert Olsson19baf832005-06-21 12:43:18 -0700952 key = tn->key;
953 i = 0;
954
955 while (tn != NULL && NODE_PARENT(tn) != NULL) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700956 if (i > 10) {
Robert Olsson19baf832005-06-21 12:43:18 -0700957 printk("Rebalance tn=%p \n", tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700958 if (tn)
959 printk("tn->parent=%p \n", NODE_PARENT(tn));
960
Robert Olsson19baf832005-06-21 12:43:18 -0700961 printk("Rebalance tp=%p \n", tp);
Olof Johansson91b9a272005-08-09 20:24:39 -0700962 if (tp)
963 printk("tp->parent=%p \n", NODE_PARENT(tp));
Robert Olsson19baf832005-06-21 12:43:18 -0700964 }
965
Olof Johansson91b9a272005-08-09 20:24:39 -0700966 BUG_ON(i > 12); /* Why is this a bug? -ojn */
Robert Olsson19baf832005-06-21 12:43:18 -0700967 i++;
968
969 tp = NODE_PARENT(tn);
970 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
971 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
972 tn = (struct tnode *) resize (t, (struct tnode *)tn);
973 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700974
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700975 if (!NODE_PARENT(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700976 break;
977
978 tn = NODE_PARENT(tn);
979 }
980 /* Handle last (top) tnode */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700981 if (IS_TNODE(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700982 tn = (struct tnode*) resize(t, (struct tnode *)tn);
983
984 return (struct node*) tn;
985}
986
Robert Olssonf835e472005-06-28 15:00:39 -0700987static struct list_head *
988fib_insert_node(struct trie *t, int *err, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700989{
990 int pos, newpos;
991 struct tnode *tp = NULL, *tn = NULL;
992 struct node *n;
993 struct leaf *l;
994 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700995 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700996 struct leaf_info *li;
997 t_key cindex;
998
999 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001000 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001001
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001002 /* If we point to NULL, stop. Either the tree is empty and we should
1003 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001004 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001005 * If we point to a T_TNODE, check if it matches our key. Note that
1006 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001007 * not be the parent's 'pos'+'bits'!
1008 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001009 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001010 * the index from our key, push the T_TNODE and walk the tree.
1011 *
1012 * If it doesn't, we have to replace it with a new T_TNODE.
1013 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001014 * If we point to a T_LEAF, it might or might not have the same key
1015 * as we do. If it does, just change the value, update the T_LEAF's
1016 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001017 * If it doesn't, we need to replace it with a T_TNODE.
1018 */
1019
1020 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1021 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001022
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001023 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001024
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001025 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001026 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001027 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001028 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1029
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001030 if (n && NODE_PARENT(n) != tn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001031 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1032 BUG();
1033 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001034 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001035 break;
1036 }
1037
1038 /*
1039 * n ----> NULL, LEAF or TNODE
1040 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001041 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001042 */
1043
Olof Johansson91b9a272005-08-09 20:24:39 -07001044 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001045
1046 /* Case 1: n is a leaf. Compare prefixes */
1047
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001048 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001049 struct leaf *l = (struct leaf *) n;
1050
Robert Olsson19baf832005-06-21 12:43:18 -07001051 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001052
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001053 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001054 *err = -ENOMEM;
1055 goto err;
1056 }
Robert Olsson19baf832005-06-21 12:43:18 -07001057
1058 fa_head = &li->falh;
1059 insert_leaf_info(&l->list, li);
1060 goto done;
1061 }
1062 t->size++;
1063 l = leaf_new();
1064
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001065 if (!l) {
Robert Olssonf835e472005-06-28 15:00:39 -07001066 *err = -ENOMEM;
1067 goto err;
1068 }
Robert Olsson19baf832005-06-21 12:43:18 -07001069
1070 l->key = key;
1071 li = leaf_info_new(plen);
1072
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001073 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001074 tnode_free((struct tnode *) l);
1075 *err = -ENOMEM;
1076 goto err;
1077 }
Robert Olsson19baf832005-06-21 12:43:18 -07001078
1079 fa_head = &li->falh;
1080 insert_leaf_info(&l->list, li);
1081
Robert Olsson19baf832005-06-21 12:43:18 -07001082 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001083 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001084
1085 NODE_SET_PARENT(l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001086
Olof Johansson91b9a272005-08-09 20:24:39 -07001087 BUG_ON(!tp);
1088
1089 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1090 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1091 } else {
1092 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001093 /*
1094 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001095 * first tnode need some special handling
1096 */
1097
1098 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001099 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001100 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001101 pos = 0;
1102
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001103 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001104 newpos = tkey_mismatch(key, pos, n->key);
1105 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001106 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001107 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001108 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001109 }
Robert Olsson19baf832005-06-21 12:43:18 -07001110
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001111 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001112 free_leaf_info(li);
1113 tnode_free((struct tnode *) l);
1114 *err = -ENOMEM;
1115 goto err;
Olof Johansson91b9a272005-08-09 20:24:39 -07001116 }
1117
Robert Olsson19baf832005-06-21 12:43:18 -07001118 NODE_SET_PARENT(tn, tp);
1119
Olof Johansson91b9a272005-08-09 20:24:39 -07001120 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001121 put_child(t, tn, missbit, (struct node *)l);
1122 put_child(t, tn, 1-missbit, n);
1123
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001124 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001125 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1126 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001127 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001128 t->trie = (struct node*) tn; /* First tnode */
1129 tp = tn;
1130 }
1131 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001132
1133 if (tp && tp->pos + tp->bits > 32)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001134 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
Robert Olsson19baf832005-06-21 12:43:18 -07001135 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001136
Robert Olsson19baf832005-06-21 12:43:18 -07001137 /* Rebalance the trie */
1138 t->trie = trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001139done:
1140 t->revision++;
Olof Johansson91b9a272005-08-09 20:24:39 -07001141err:
Robert Olsson19baf832005-06-21 12:43:18 -07001142 return fa_head;
1143}
1144
1145static int
1146fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1147 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1148{
1149 struct trie *t = (struct trie *) tb->tb_data;
1150 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001151 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001152 struct fib_info *fi;
1153 int plen = r->rtm_dst_len;
1154 int type = r->rtm_type;
1155 u8 tos = r->rtm_tos;
1156 u32 key, mask;
1157 int err;
1158 struct leaf *l;
1159
1160 if (plen > 32)
1161 return -EINVAL;
1162
1163 key = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001164 if (rta->rta_dst)
Robert Olsson19baf832005-06-21 12:43:18 -07001165 memcpy(&key, rta->rta_dst, 4);
1166
1167 key = ntohl(key);
1168
Olof Johansson91b9a272005-08-09 20:24:39 -07001169 DBG("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001170
Olof Johansson91b9a272005-08-09 20:24:39 -07001171 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001172
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001173 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001174 return -EINVAL;
1175
1176 key = key & mask;
1177
Olof Johansson91b9a272005-08-09 20:24:39 -07001178 fi = fib_create_info(r, rta, nlhdr, &err);
1179
1180 if (!fi)
Robert Olsson19baf832005-06-21 12:43:18 -07001181 goto err;
1182
1183 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001184 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001185
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001186 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001187 fa_head = get_fa_head(l, plen);
1188 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1189 }
1190
1191 /* Now fa, if non-NULL, points to the first fib alias
1192 * with the same keys [prefix,tos,priority], if such key already
1193 * exists or to the node before which we will insert new one.
1194 *
1195 * If fa is NULL, we will need to allocate a new one and
1196 * insert to the head of f.
1197 *
1198 * If f is NULL, no fib node matched the destination key
1199 * and we need to allocate a new one of those as well.
1200 */
1201
Olof Johansson91b9a272005-08-09 20:24:39 -07001202 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
Robert Olsson19baf832005-06-21 12:43:18 -07001203 struct fib_alias *fa_orig;
1204
1205 err = -EEXIST;
1206 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1207 goto out;
1208
1209 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1210 struct fib_info *fi_drop;
1211 u8 state;
1212
1213 write_lock_bh(&fib_lock);
1214
1215 fi_drop = fa->fa_info;
1216 fa->fa_info = fi;
1217 fa->fa_type = type;
1218 fa->fa_scope = r->rtm_scope;
1219 state = fa->fa_state;
1220 fa->fa_state &= ~FA_S_ACCESSED;
1221
1222 write_unlock_bh(&fib_lock);
1223
1224 fib_release_info(fi_drop);
1225 if (state & FA_S_ACCESSED)
Olof Johansson91b9a272005-08-09 20:24:39 -07001226 rt_cache_flush(-1);
Robert Olsson19baf832005-06-21 12:43:18 -07001227
Olof Johansson91b9a272005-08-09 20:24:39 -07001228 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001229 }
1230 /* Error if we find a perfect match which
1231 * uses the same scope, type, and nexthop
1232 * information.
1233 */
1234 fa_orig = fa;
1235 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1236 if (fa->fa_tos != tos)
1237 break;
1238 if (fa->fa_info->fib_priority != fi->fib_priority)
1239 break;
1240 if (fa->fa_type == type &&
1241 fa->fa_scope == r->rtm_scope &&
1242 fa->fa_info == fi) {
1243 goto out;
1244 }
1245 }
1246 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1247 fa = fa_orig;
1248 }
1249 err = -ENOENT;
Olof Johansson91b9a272005-08-09 20:24:39 -07001250 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001251 goto out;
1252
1253 err = -ENOBUFS;
1254 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1255 if (new_fa == NULL)
1256 goto out;
1257
1258 new_fa->fa_info = fi;
1259 new_fa->fa_tos = tos;
1260 new_fa->fa_type = type;
1261 new_fa->fa_scope = r->rtm_scope;
1262 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001263 /*
1264 * Insert new entry to the list.
1265 */
1266
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001267 if (!fa_head) {
Robert Olssonf835e472005-06-28 15:00:39 -07001268 fa_head = fib_insert_node(t, &err, key, plen);
1269 err = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001270 if (err)
Robert Olssonf835e472005-06-28 15:00:39 -07001271 goto out_free_new_fa;
1272 }
Robert Olsson19baf832005-06-21 12:43:18 -07001273
1274 write_lock_bh(&fib_lock);
1275
Olof Johansson91b9a272005-08-09 20:24:39 -07001276 list_add_tail(&new_fa->fa_list, (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001277
1278 write_unlock_bh(&fib_lock);
1279
1280 rt_cache_flush(-1);
1281 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1282succeeded:
1283 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001284
1285out_free_new_fa:
1286 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001287out:
1288 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001289err:
Robert Olsson19baf832005-06-21 12:43:18 -07001290 return err;
1291}
1292
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001293static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
Patrick McHardy06c74272005-08-23 22:06:09 -07001294 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001295{
Patrick McHardy06c74272005-08-23 22:06:09 -07001296 int err, i;
Robert Olsson19baf832005-06-21 12:43:18 -07001297 t_key mask;
1298 struct leaf_info *li;
1299 struct hlist_head *hhead = &l->list;
1300 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001301
Robert Olsson19baf832005-06-21 12:43:18 -07001302 hlist_for_each_entry(li, node, hhead, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001303 i = li->plen;
1304 mask = ntohl(inet_make_mask(i));
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001305 if (l->key != (key & mask))
Robert Olsson19baf832005-06-21 12:43:18 -07001306 continue;
1307
Patrick McHardy06c74272005-08-23 22:06:09 -07001308 if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001309 *plen = i;
1310#ifdef CONFIG_IP_FIB_TRIE_STATS
1311 t->stats.semantic_match_passed++;
1312#endif
Patrick McHardy06c74272005-08-23 22:06:09 -07001313 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001314 }
1315#ifdef CONFIG_IP_FIB_TRIE_STATS
1316 t->stats.semantic_match_miss++;
1317#endif
1318 }
Patrick McHardy06c74272005-08-23 22:06:09 -07001319 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001320}
1321
1322static int
1323fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1324{
1325 struct trie *t = (struct trie *) tb->tb_data;
1326 int plen, ret = 0;
1327 struct node *n;
1328 struct tnode *pn;
1329 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001330 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001331 int chopped_off;
1332 t_key cindex = 0;
1333 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001334 struct tnode *cn;
1335 t_key node_prefix, key_prefix, pref_mismatch;
1336 int mp;
1337
Robert Olsson19baf832005-06-21 12:43:18 -07001338 n = t->trie;
1339
1340 read_lock(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -07001341
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001342 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001343 goto failed;
1344
1345#ifdef CONFIG_IP_FIB_TRIE_STATS
1346 t->stats.gets++;
1347#endif
1348
1349 /* Just a leaf? */
1350 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001351 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001352 goto found;
1353 goto failed;
1354 }
1355 pn = (struct tnode *) n;
1356 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001357
Olof Johansson91b9a272005-08-09 20:24:39 -07001358 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001359 pos = pn->pos;
1360 bits = pn->bits;
1361
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001362 if (!chopped_off)
Robert Olsson19baf832005-06-21 12:43:18 -07001363 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1364
1365 n = tnode_get_child(pn, cindex);
1366
1367 if (n == NULL) {
1368#ifdef CONFIG_IP_FIB_TRIE_STATS
1369 t->stats.null_node_hit++;
1370#endif
1371 goto backtrace;
1372 }
1373
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001374 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001375 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001376 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001377 else
1378 goto backtrace;
1379 }
1380
1381#define HL_OPTIMIZE
1382#ifdef HL_OPTIMIZE
1383 cn = (struct tnode *)n;
1384
1385 /*
1386 * It's a tnode, and we can do some extra checks here if we
1387 * like, to avoid descending into a dead-end branch.
1388 * This tnode is in the parent's child array at index
1389 * key[p_pos..p_pos+p_bits] but potentially with some bits
1390 * chopped off, so in reality the index may be just a
1391 * subprefix, padded with zero at the end.
1392 * We can also take a look at any skipped bits in this
1393 * tnode - everything up to p_pos is supposed to be ok,
1394 * and the non-chopped bits of the index (se previous
1395 * paragraph) are also guaranteed ok, but the rest is
1396 * considered unknown.
1397 *
1398 * The skipped bits are key[pos+bits..cn->pos].
1399 */
1400
1401 /* If current_prefix_length < pos+bits, we are already doing
1402 * actual prefix matching, which means everything from
1403 * pos+(bits-chopped_off) onward must be zero along some
1404 * branch of this subtree - otherwise there is *no* valid
1405 * prefix present. Here we can only check the skipped
1406 * bits. Remember, since we have already indexed into the
1407 * parent's child array, we know that the bits we chopped of
1408 * *are* zero.
1409 */
1410
1411 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1412
1413 if (current_prefix_length < pos+bits) {
1414 if (tkey_extract_bits(cn->key, current_prefix_length,
1415 cn->pos - current_prefix_length) != 0 ||
1416 !(cn->child[0]))
1417 goto backtrace;
1418 }
1419
1420 /*
1421 * If chopped_off=0, the index is fully validated and we
1422 * only need to look at the skipped bits for this, the new,
1423 * tnode. What we actually want to do is to find out if
1424 * these skipped bits match our key perfectly, or if we will
1425 * have to count on finding a matching prefix further down,
1426 * because if we do, we would like to have some way of
1427 * verifying the existence of such a prefix at this point.
1428 */
1429
1430 /* The only thing we can do at this point is to verify that
1431 * any such matching prefix can indeed be a prefix to our
1432 * key, and if the bits in the node we are inspecting that
1433 * do not match our key are not ZERO, this cannot be true.
1434 * Thus, find out where there is a mismatch (before cn->pos)
1435 * and verify that all the mismatching bits are zero in the
1436 * new tnode's key.
1437 */
1438
1439 /* Note: We aren't very concerned about the piece of the key
1440 * that precede pn->pos+pn->bits, since these have already been
1441 * checked. The bits after cn->pos aren't checked since these are
1442 * by definition "unknown" at this point. Thus, what we want to
1443 * see is if we are about to enter the "prefix matching" state,
1444 * and in that case verify that the skipped bits that will prevail
1445 * throughout this subtree are zero, as they have to be if we are
1446 * to find a matching prefix.
1447 */
1448
1449 node_prefix = MASK_PFX(cn->key, cn->pos);
1450 key_prefix = MASK_PFX(key, cn->pos);
1451 pref_mismatch = key_prefix^node_prefix;
1452 mp = 0;
1453
1454 /* In short: If skipped bits in this node do not match the search
1455 * key, enter the "prefix matching" state.directly.
1456 */
1457 if (pref_mismatch) {
1458 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1459 mp++;
1460 pref_mismatch = pref_mismatch <<1;
1461 }
1462 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1463
1464 if (key_prefix != 0)
1465 goto backtrace;
1466
1467 if (current_prefix_length >= cn->pos)
1468 current_prefix_length = mp;
1469 }
1470#endif
1471 pn = (struct tnode *)n; /* Descend */
1472 chopped_off = 0;
1473 continue;
1474
Robert Olsson19baf832005-06-21 12:43:18 -07001475backtrace:
1476 chopped_off++;
1477
1478 /* As zero don't change the child key (cindex) */
Olof Johansson91b9a272005-08-09 20:24:39 -07001479 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001480 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001481
1482 /* Decrease current_... with bits chopped off */
1483 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1484 current_prefix_length = pn->pos + pn->bits - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001485
Robert Olsson19baf832005-06-21 12:43:18 -07001486 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001487 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001488 * chopped off all bits in this tnode walk up to our parent.
1489 */
1490
Olof Johansson91b9a272005-08-09 20:24:39 -07001491 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001492 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001493 } else {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001494 if (NODE_PARENT(pn) == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001495 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001496
Robert Olsson19baf832005-06-21 12:43:18 -07001497 /* Get Child's index */
1498 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1499 pn = NODE_PARENT(pn);
1500 chopped_off = 0;
1501
1502#ifdef CONFIG_IP_FIB_TRIE_STATS
1503 t->stats.backtrack++;
1504#endif
1505 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001506 }
Robert Olsson19baf832005-06-21 12:43:18 -07001507 }
1508failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001509 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001510found:
1511 read_unlock(&fib_lock);
1512 return ret;
1513}
1514
1515static int trie_leaf_remove(struct trie *t, t_key key)
1516{
1517 t_key cindex;
1518 struct tnode *tp = NULL;
1519 struct node *n = t->trie;
1520 struct leaf *l;
1521
Olof Johansson91b9a272005-08-09 20:24:39 -07001522 DBG("entering trie_leaf_remove(%p)\n", n);
Robert Olsson19baf832005-06-21 12:43:18 -07001523
1524 /* Note that in the case skipped bits, those bits are *not* checked!
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001525 * When we finish this, we will have NULL or a T_LEAF, and the
Robert Olsson19baf832005-06-21 12:43:18 -07001526 * T_LEAF may or may not match our key.
1527 */
1528
Olof Johansson91b9a272005-08-09 20:24:39 -07001529 while (n != NULL && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001530 struct tnode *tn = (struct tnode *) n;
1531 check_tnode(tn);
1532 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1533
Olof Johansson91b9a272005-08-09 20:24:39 -07001534 if (n && NODE_PARENT(n) != tn) {
1535 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1536 BUG();
1537 }
1538 }
Robert Olsson19baf832005-06-21 12:43:18 -07001539 l = (struct leaf *) n;
1540
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001541 if (!n || !tkey_equals(l->key, key))
Robert Olsson19baf832005-06-21 12:43:18 -07001542 return 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001543
1544 /*
1545 * Key found.
1546 * Remove the leaf and rebalance the tree
Robert Olsson19baf832005-06-21 12:43:18 -07001547 */
1548
1549 t->revision++;
1550 t->size--;
1551
1552 tp = NODE_PARENT(n);
1553 tnode_free((struct tnode *) n);
1554
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001555 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001556 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1557 put_child(t, (struct tnode *)tp, cindex, NULL);
1558 t->trie = trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001559 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001560 t->trie = NULL;
1561
1562 return 1;
1563}
1564
1565static int
1566fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
Olof Johansson91b9a272005-08-09 20:24:39 -07001567 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
Robert Olsson19baf832005-06-21 12:43:18 -07001568{
1569 struct trie *t = (struct trie *) tb->tb_data;
1570 u32 key, mask;
1571 int plen = r->rtm_dst_len;
1572 u8 tos = r->rtm_tos;
1573 struct fib_alias *fa, *fa_to_delete;
1574 struct list_head *fa_head;
1575 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001576 int kill_li = 0;
1577 struct leaf_info *li;
1578
Robert Olsson19baf832005-06-21 12:43:18 -07001579
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001580 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001581 return -EINVAL;
1582
1583 key = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001584 if (rta->rta_dst)
Robert Olsson19baf832005-06-21 12:43:18 -07001585 memcpy(&key, rta->rta_dst, 4);
1586
1587 key = ntohl(key);
Olof Johansson91b9a272005-08-09 20:24:39 -07001588 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001589
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001590 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001591 return -EINVAL;
1592
1593 key = key & mask;
1594 l = fib_find_node(t, key);
1595
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001596 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001597 return -ESRCH;
1598
1599 fa_head = get_fa_head(l, plen);
1600 fa = fib_find_alias(fa_head, tos, 0);
1601
1602 if (!fa)
1603 return -ESRCH;
1604
Olof Johansson91b9a272005-08-09 20:24:39 -07001605 DBG("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001606
1607 fa_to_delete = NULL;
1608 fa_head = fa->fa_list.prev;
1609 list_for_each_entry(fa, fa_head, fa_list) {
1610 struct fib_info *fi = fa->fa_info;
1611
1612 if (fa->fa_tos != tos)
1613 break;
1614
1615 if ((!r->rtm_type ||
1616 fa->fa_type == r->rtm_type) &&
1617 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1618 fa->fa_scope == r->rtm_scope) &&
1619 (!r->rtm_protocol ||
1620 fi->fib_protocol == r->rtm_protocol) &&
1621 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1622 fa_to_delete = fa;
1623 break;
1624 }
1625 }
1626
Olof Johansson91b9a272005-08-09 20:24:39 -07001627 if (!fa_to_delete)
1628 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001629
Olof Johansson91b9a272005-08-09 20:24:39 -07001630 fa = fa_to_delete;
1631 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
Robert Olsson19baf832005-06-21 12:43:18 -07001632
Olof Johansson91b9a272005-08-09 20:24:39 -07001633 l = fib_find_node(t, key);
1634 li = find_leaf_info(&l->list, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001635
Olof Johansson91b9a272005-08-09 20:24:39 -07001636 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001637
Olof Johansson91b9a272005-08-09 20:24:39 -07001638 list_del(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001639
Olof Johansson91b9a272005-08-09 20:24:39 -07001640 if (list_empty(fa_head)) {
1641 hlist_del(&li->hlist);
1642 kill_li = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001643 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001644 write_unlock_bh(&fib_lock);
1645
1646 if (kill_li)
1647 free_leaf_info(li);
1648
1649 if (hlist_empty(&l->list))
1650 trie_leaf_remove(t, key);
1651
1652 if (fa->fa_state & FA_S_ACCESSED)
1653 rt_cache_flush(-1);
1654
1655 fn_free_alias(fa);
1656 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001657}
1658
1659static int trie_flush_list(struct trie *t, struct list_head *head)
1660{
1661 struct fib_alias *fa, *fa_node;
1662 int found = 0;
1663
1664 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1665 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001666
Olof Johansson91b9a272005-08-09 20:24:39 -07001667 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001668 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001669 list_del(&fa->fa_list);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001670 write_unlock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001671
1672 fn_free_alias(fa);
1673 found++;
1674 }
1675 }
1676 return found;
1677}
1678
1679static int trie_flush_leaf(struct trie *t, struct leaf *l)
1680{
1681 int found = 0;
1682 struct hlist_head *lih = &l->list;
1683 struct hlist_node *node, *tmp;
1684 struct leaf_info *li = NULL;
1685
1686 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001687 found += trie_flush_list(t, &li->falh);
1688
1689 if (list_empty(&li->falh)) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001690 write_lock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001691 hlist_del(&li->hlist);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001692 write_unlock_bh(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001693
1694 free_leaf_info(li);
1695 }
1696 }
1697 return found;
1698}
1699
1700static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1701{
1702 struct node *c = (struct node *) thisleaf;
1703 struct tnode *p;
1704 int idx;
1705
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001706 if (c == NULL) {
1707 if (t->trie == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001708 return NULL;
1709
1710 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1711 return (struct leaf *) t->trie;
1712
1713 p = (struct tnode*) t->trie; /* Start */
Olof Johansson91b9a272005-08-09 20:24:39 -07001714 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001715 p = (struct tnode *) NODE_PARENT(c);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001716
Robert Olsson19baf832005-06-21 12:43:18 -07001717 while (p) {
1718 int pos, last;
1719
1720 /* Find the next child of the parent */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001721 if (c)
1722 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1723 else
Robert Olsson19baf832005-06-21 12:43:18 -07001724 pos = 0;
1725
1726 last = 1 << p->bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001727 for (idx = pos; idx < last ; idx++) {
1728 if (!p->child[idx])
1729 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001730
Olof Johansson91b9a272005-08-09 20:24:39 -07001731 /* Decend if tnode */
1732 while (IS_TNODE(p->child[idx])) {
1733 p = (struct tnode*) p->child[idx];
1734 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001735
Olof Johansson91b9a272005-08-09 20:24:39 -07001736 /* Rightmost non-NULL branch */
1737 if (p && IS_TNODE(p))
1738 while (p->child[idx] == NULL && idx < (1 << p->bits)) idx++;
Robert Olsson19baf832005-06-21 12:43:18 -07001739
Olof Johansson91b9a272005-08-09 20:24:39 -07001740 /* Done with this tnode? */
1741 if (idx >= (1 << p->bits) || p->child[idx] == NULL)
1742 goto up;
Robert Olsson19baf832005-06-21 12:43:18 -07001743 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001744 return (struct leaf*) p->child[idx];
Robert Olsson19baf832005-06-21 12:43:18 -07001745 }
1746up:
1747 /* No more children go up one step */
Olof Johansson91b9a272005-08-09 20:24:39 -07001748 c = (struct node *) p;
Robert Olsson19baf832005-06-21 12:43:18 -07001749 p = (struct tnode *) NODE_PARENT(p);
1750 }
1751 return NULL; /* Ready. Root of trie */
1752}
1753
1754static int fn_trie_flush(struct fib_table *tb)
1755{
1756 struct trie *t = (struct trie *) tb->tb_data;
1757 struct leaf *ll = NULL, *l = NULL;
1758 int found = 0, h;
1759
1760 t->revision++;
1761
Olof Johansson91b9a272005-08-09 20:24:39 -07001762 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001763 found += trie_flush_leaf(t, l);
1764
1765 if (ll && hlist_empty(&ll->list))
1766 trie_leaf_remove(t, ll->key);
1767 ll = l;
1768 }
1769
1770 if (ll && hlist_empty(&ll->list))
1771 trie_leaf_remove(t, ll->key);
1772
Olof Johansson91b9a272005-08-09 20:24:39 -07001773 DBG("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001774 return found;
1775}
1776
Olof Johansson91b9a272005-08-09 20:24:39 -07001777static int trie_last_dflt = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001778
1779static void
1780fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1781{
1782 struct trie *t = (struct trie *) tb->tb_data;
1783 int order, last_idx;
1784 struct fib_info *fi = NULL;
1785 struct fib_info *last_resort;
1786 struct fib_alias *fa = NULL;
1787 struct list_head *fa_head;
1788 struct leaf *l;
1789
1790 last_idx = -1;
1791 last_resort = NULL;
1792 order = -1;
1793
1794 read_lock(&fib_lock);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001795
Robert Olsson19baf832005-06-21 12:43:18 -07001796 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001797 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001798 goto out;
1799
1800 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001801 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001802 goto out;
1803
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001804 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001805 goto out;
1806
1807 list_for_each_entry(fa, fa_head, fa_list) {
1808 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001809
Robert Olsson19baf832005-06-21 12:43:18 -07001810 if (fa->fa_scope != res->scope ||
1811 fa->fa_type != RTN_UNICAST)
1812 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001813
Robert Olsson19baf832005-06-21 12:43:18 -07001814 if (next_fi->fib_priority > res->fi->fib_priority)
1815 break;
1816 if (!next_fi->fib_nh[0].nh_gw ||
1817 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1818 continue;
1819 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001820
Robert Olsson19baf832005-06-21 12:43:18 -07001821 if (fi == NULL) {
1822 if (next_fi != res->fi)
1823 break;
1824 } else if (!fib_detect_death(fi, order, &last_resort,
1825 &last_idx, &trie_last_dflt)) {
1826 if (res->fi)
1827 fib_info_put(res->fi);
1828 res->fi = fi;
1829 atomic_inc(&fi->fib_clntref);
1830 trie_last_dflt = order;
1831 goto out;
1832 }
1833 fi = next_fi;
1834 order++;
1835 }
1836 if (order <= 0 || fi == NULL) {
1837 trie_last_dflt = -1;
1838 goto out;
1839 }
1840
1841 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1842 if (res->fi)
1843 fib_info_put(res->fi);
1844 res->fi = fi;
1845 atomic_inc(&fi->fib_clntref);
1846 trie_last_dflt = order;
1847 goto out;
1848 }
1849 if (last_idx >= 0) {
1850 if (res->fi)
1851 fib_info_put(res->fi);
1852 res->fi = last_resort;
1853 if (last_resort)
1854 atomic_inc(&last_resort->fib_clntref);
1855 }
1856 trie_last_dflt = last_idx;
1857 out:;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001858 read_unlock(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07001859}
1860
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001861static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001862 struct sk_buff *skb, struct netlink_callback *cb)
1863{
1864 int i, s_i;
1865 struct fib_alias *fa;
1866
Olof Johansson91b9a272005-08-09 20:24:39 -07001867 u32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001868
Olof Johansson91b9a272005-08-09 20:24:39 -07001869 s_i = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001870 i = 0;
1871
1872 list_for_each_entry(fa, fah, fa_list) {
1873 if (i < s_i) {
1874 i++;
1875 continue;
1876 }
1877 if (fa->fa_info->fib_nh == NULL) {
1878 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1879 i++;
1880 continue;
1881 }
1882 if (fa->fa_info == NULL) {
1883 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1884 i++;
1885 continue;
1886 }
1887
1888 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1889 cb->nlh->nlmsg_seq,
1890 RTM_NEWROUTE,
1891 tb->tb_id,
1892 fa->fa_type,
1893 fa->fa_scope,
1894 &xkey,
1895 plen,
1896 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001897 fa->fa_info, 0) < 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001898 cb->args[3] = i;
1899 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001900 }
Robert Olsson19baf832005-06-21 12:43:18 -07001901 i++;
1902 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001903 cb->args[3] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001904 return skb->len;
1905}
1906
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001907static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
Robert Olsson19baf832005-06-21 12:43:18 -07001908 struct netlink_callback *cb)
1909{
1910 int h, s_h;
1911 struct list_head *fa_head;
1912 struct leaf *l = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001913
Olof Johansson91b9a272005-08-09 20:24:39 -07001914 s_h = cb->args[2];
Robert Olsson19baf832005-06-21 12:43:18 -07001915
Olof Johansson91b9a272005-08-09 20:24:39 -07001916 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001917 if (h < s_h)
1918 continue;
1919 if (h > s_h)
1920 memset(&cb->args[3], 0,
1921 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1922
1923 fa_head = get_fa_head(l, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001924
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001925 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001926 continue;
1927
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001928 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001929 continue;
1930
1931 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001932 cb->args[2] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001933 return -1;
1934 }
1935 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001936 cb->args[2] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001937 return skb->len;
1938}
1939
1940static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1941{
1942 int m, s_m;
1943 struct trie *t = (struct trie *) tb->tb_data;
1944
1945 s_m = cb->args[1];
1946
1947 read_lock(&fib_lock);
Olof Johansson91b9a272005-08-09 20:24:39 -07001948 for (m = 0; m <= 32; m++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001949 if (m < s_m)
1950 continue;
1951 if (m > s_m)
1952 memset(&cb->args[2], 0,
Olof Johansson91b9a272005-08-09 20:24:39 -07001953 sizeof(cb->args) - 2*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001954
1955 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1956 cb->args[1] = m;
1957 goto out;
1958 }
1959 }
1960 read_unlock(&fib_lock);
1961 cb->args[1] = m;
1962 return skb->len;
Olof Johansson91b9a272005-08-09 20:24:39 -07001963out:
Robert Olsson19baf832005-06-21 12:43:18 -07001964 read_unlock(&fib_lock);
1965 return -1;
1966}
1967
1968/* Fix more generic FIB names for init later */
1969
1970#ifdef CONFIG_IP_MULTIPLE_TABLES
1971struct fib_table * fib_hash_init(int id)
1972#else
1973struct fib_table * __init fib_hash_init(int id)
1974#endif
1975{
1976 struct fib_table *tb;
1977 struct trie *t;
1978
1979 if (fn_alias_kmem == NULL)
1980 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1981 sizeof(struct fib_alias),
1982 0, SLAB_HWCACHE_ALIGN,
1983 NULL, NULL);
1984
1985 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1986 GFP_KERNEL);
1987 if (tb == NULL)
1988 return NULL;
1989
1990 tb->tb_id = id;
1991 tb->tb_lookup = fn_trie_lookup;
1992 tb->tb_insert = fn_trie_insert;
1993 tb->tb_delete = fn_trie_delete;
1994 tb->tb_flush = fn_trie_flush;
1995 tb->tb_select_default = fn_trie_select_default;
1996 tb->tb_dump = fn_trie_dump;
1997 memset(tb->tb_data, 0, sizeof(struct trie));
1998
1999 t = (struct trie *) tb->tb_data;
2000
2001 trie_init(t);
2002
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002003 if (id == RT_TABLE_LOCAL)
Olof Johansson91b9a272005-08-09 20:24:39 -07002004 trie_local = t;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002005 else if (id == RT_TABLE_MAIN)
Olof Johansson91b9a272005-08-09 20:24:39 -07002006 trie_main = t;
Robert Olsson19baf832005-06-21 12:43:18 -07002007
2008 if (id == RT_TABLE_LOCAL)
2009 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
2010
2011 return tb;
2012}
2013
2014/* Trie dump functions */
2015
2016static void putspace_seq(struct seq_file *seq, int n)
2017{
Olof Johansson91b9a272005-08-09 20:24:39 -07002018 while (n--)
2019 seq_printf(seq, " ");
Robert Olsson19baf832005-06-21 12:43:18 -07002020}
2021
2022static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
2023{
2024 while (bits--)
2025 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
2026}
2027
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002028static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
Robert Olsson19baf832005-06-21 12:43:18 -07002029 int pend, int cindex, int bits)
2030{
2031 putspace_seq(seq, indent);
2032 if (IS_LEAF(n))
2033 seq_printf(seq, "|");
2034 else
2035 seq_printf(seq, "+");
2036 if (bits) {
2037 seq_printf(seq, "%d/", cindex);
2038 printbin_seq(seq, cindex, bits);
2039 seq_printf(seq, ": ");
Olof Johansson91b9a272005-08-09 20:24:39 -07002040 } else
Robert Olsson19baf832005-06-21 12:43:18 -07002041 seq_printf(seq, "<root>: ");
2042 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
2043
Robert Olsson19baf832005-06-21 12:43:18 -07002044 if (IS_LEAF(n)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07002045 struct leaf *l = (struct leaf *)n;
Robert Olsson19baf832005-06-21 12:43:18 -07002046 struct fib_alias *fa;
2047 int i;
Olof Johansson91b9a272005-08-09 20:24:39 -07002048
2049 seq_printf(seq, "key=%d.%d.%d.%d\n",
2050 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
2051
2052 for (i = 32; i >= 0; i--)
2053 if (find_leaf_info(&l->list, i)) {
Robert Olsson19baf832005-06-21 12:43:18 -07002054 struct list_head *fa_head = get_fa_head(l, i);
Olof Johansson91b9a272005-08-09 20:24:39 -07002055
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002056 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07002057 continue;
2058
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002059 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07002060 continue;
2061
2062 putspace_seq(seq, indent+2);
2063 seq_printf(seq, "{/%d...dumping}\n", i);
2064
Robert Olsson19baf832005-06-21 12:43:18 -07002065 list_for_each_entry(fa, fa_head, fa_list) {
2066 putspace_seq(seq, indent+2);
Robert Olsson19baf832005-06-21 12:43:18 -07002067 if (fa->fa_info == NULL) {
2068 seq_printf(seq, "Error fa_info=NULL\n");
2069 continue;
2070 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002071 if (fa->fa_info->fib_nh == NULL) {
2072 seq_printf(seq, "Error _fib_nh=NULL\n");
2073 continue;
2074 }
Robert Olsson19baf832005-06-21 12:43:18 -07002075
2076 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2077 fa->fa_type,
2078 fa->fa_scope,
2079 fa->fa_tos);
2080 }
2081 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002082 } else {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002083 struct tnode *tn = (struct tnode *)n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002084 int plen = ((struct tnode *)n)->pos;
2085 t_key prf = MASK_PFX(n->key, plen);
2086
2087 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
2088 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
2089
Robert Olsson19baf832005-06-21 12:43:18 -07002090 putspace_seq(seq, indent); seq_printf(seq, "| ");
Olof Johansson91b9a272005-08-09 20:24:39 -07002091 seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
Robert Olsson19baf832005-06-21 12:43:18 -07002092 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2093 seq_printf(seq, "}\n");
2094 putspace_seq(seq, indent); seq_printf(seq, "| ");
2095 seq_printf(seq, "{pos=%d", tn->pos);
2096 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2097 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2098 putspace_seq(seq, indent); seq_printf(seq, "| ");
2099 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2100 }
2101}
2102
2103static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2104{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002105 struct node *n = t->trie;
Olof Johansson91b9a272005-08-09 20:24:39 -07002106 int cindex = 0;
2107 int indent = 1;
2108 int pend = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002109 int depth = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07002110 struct tnode *tn;
Robert Olsson19baf832005-06-21 12:43:18 -07002111
2112 read_lock(&fib_lock);
2113
2114 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
Robert Olsson19baf832005-06-21 12:43:18 -07002115
Olof Johansson91b9a272005-08-09 20:24:39 -07002116 if (!n) {
2117 seq_printf(seq, "------ trie is empty\n");
Robert Olsson19baf832005-06-21 12:43:18 -07002118
Olof Johansson91b9a272005-08-09 20:24:39 -07002119 read_unlock(&fib_lock);
2120 return;
Robert Olsson19baf832005-06-21 12:43:18 -07002121 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002122
2123 printnode_seq(seq, indent, n, pend, cindex, 0);
2124
2125 if (!IS_TNODE(n)) {
2126 read_unlock(&fib_lock);
2127 return;
2128 }
2129
2130 tn = (struct tnode *)n;
2131 pend = tn->pos+tn->bits;
2132 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2133 indent += 3;
2134 depth++;
2135
2136 while (tn && cindex < (1 << tn->bits)) {
2137 if (tn->child[cindex]) {
2138 /* Got a child */
2139
2140 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2141 if (IS_LEAF(tn->child[cindex])) {
2142 cindex++;
2143 } else {
2144 /*
2145 * New tnode. Decend one level
2146 */
2147
2148 depth++;
2149 tn = (struct tnode *)tn->child[cindex];
2150 pend = tn->pos + tn->bits;
2151 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2152 indent += 3;
2153 cindex = 0;
2154 }
2155 } else
2156 cindex++;
2157
2158 /*
2159 * Test if we are done
2160 */
2161
2162 while (cindex >= (1 << tn->bits)) {
2163 /*
2164 * Move upwards and test for root
2165 * pop off all traversed nodes
2166 */
2167
2168 if (NODE_PARENT(tn) == NULL) {
2169 tn = NULL;
2170 break;
2171 }
2172
2173 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2174 cindex++;
2175 tn = NODE_PARENT(tn);
2176 pend = tn->pos + tn->bits;
2177 indent -= 3;
2178 depth--;
2179 }
2180 }
Robert Olsson19baf832005-06-21 12:43:18 -07002181
2182 read_unlock(&fib_lock);
2183}
2184
2185static struct trie_stat *trie_stat_new(void)
2186{
Olof Johansson91b9a272005-08-09 20:24:39 -07002187 struct trie_stat *s;
Robert Olsson19baf832005-06-21 12:43:18 -07002188 int i;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002189
Olof Johansson91b9a272005-08-09 20:24:39 -07002190 s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2191 if (!s)
2192 return NULL;
2193
2194 s->totdepth = 0;
2195 s->maxdepth = 0;
2196 s->tnodes = 0;
2197 s->leaves = 0;
2198 s->nullpointers = 0;
2199
2200 for (i = 0; i < MAX_CHILDS; i++)
2201 s->nodesizes[i] = 0;
2202
Robert Olsson19baf832005-06-21 12:43:18 -07002203 return s;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002204}
Robert Olsson19baf832005-06-21 12:43:18 -07002205
2206static struct trie_stat *trie_collect_stats(struct trie *t)
2207{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002208 struct node *n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07002209 struct trie_stat *s = trie_stat_new();
2210 int cindex = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002211 int pend = 0;
2212 int depth = 0;
2213
Olof Johansson91b9a272005-08-09 20:24:39 -07002214 if (!s)
2215 return NULL;
2216 if (!n)
2217 return s;
Robert Olsson19baf832005-06-21 12:43:18 -07002218
Olof Johansson91b9a272005-08-09 20:24:39 -07002219 read_lock(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07002220
Olof Johansson91b9a272005-08-09 20:24:39 -07002221 if (IS_TNODE(n)) {
2222 struct tnode *tn = (struct tnode *)n;
2223 pend = tn->pos+tn->bits;
2224 s->nodesizes[tn->bits]++;
2225 depth++;
Robert Olsson19baf832005-06-21 12:43:18 -07002226
Olof Johansson91b9a272005-08-09 20:24:39 -07002227 while (tn && cindex < (1 << tn->bits)) {
2228 if (tn->child[cindex]) {
2229 /* Got a child */
Robert Olsson19baf832005-06-21 12:43:18 -07002230
Olof Johansson91b9a272005-08-09 20:24:39 -07002231 if (IS_LEAF(tn->child[cindex])) {
2232 cindex++;
2233
2234 /* stats */
2235 if (depth > s->maxdepth)
2236 s->maxdepth = depth;
2237 s->totdepth += depth;
2238 s->leaves++;
2239 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07002240 /*
Olof Johansson91b9a272005-08-09 20:24:39 -07002241 * New tnode. Decend one level
Robert Olsson19baf832005-06-21 12:43:18 -07002242 */
Robert Olsson19baf832005-06-21 12:43:18 -07002243
Olof Johansson91b9a272005-08-09 20:24:39 -07002244 s->tnodes++;
2245 s->nodesizes[tn->bits]++;
2246 depth++;
Robert Olsson19baf832005-06-21 12:43:18 -07002247
Olof Johansson91b9a272005-08-09 20:24:39 -07002248 n = tn->child[cindex];
2249 tn = (struct tnode *)n;
2250 pend = tn->pos+tn->bits;
2251
2252 cindex = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002253 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002254 } else {
2255 cindex++;
2256 s->nullpointers++;
Robert Olsson19baf832005-06-21 12:43:18 -07002257 }
Olof Johansson91b9a272005-08-09 20:24:39 -07002258
2259 /*
2260 * Test if we are done
2261 */
2262
2263 while (cindex >= (1 << tn->bits)) {
2264 /*
2265 * Move upwards and test for root
2266 * pop off all traversed nodes
2267 */
2268
2269 if (NODE_PARENT(tn) == NULL) {
2270 tn = NULL;
2271 n = NULL;
2272 break;
2273 }
2274
2275 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2276 tn = NODE_PARENT(tn);
2277 cindex++;
2278 n = (struct node *)tn;
2279 pend = tn->pos+tn->bits;
2280 depth--;
2281 }
Robert Olsson19baf832005-06-21 12:43:18 -07002282 }
2283 }
2284
Olof Johansson91b9a272005-08-09 20:24:39 -07002285 read_unlock(&fib_lock);
Robert Olsson19baf832005-06-21 12:43:18 -07002286 return s;
2287}
2288
2289#ifdef CONFIG_PROC_FS
2290
2291static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2292{
2293 return NULL;
2294}
2295
2296static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2297{
2298 return NULL;
2299}
2300
2301static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2302{
Olof Johansson91b9a272005-08-09 20:24:39 -07002303 if (!ip_fib_main_table)
2304 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002305
Olof Johansson91b9a272005-08-09 20:24:39 -07002306 if (*pos)
2307 return fib_triestat_get_next(seq);
2308 else
2309 return SEQ_START_TOKEN;
Robert Olsson19baf832005-06-21 12:43:18 -07002310}
2311
2312static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2313{
2314 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002315 if (v == SEQ_START_TOKEN)
2316 return fib_triestat_get_first(seq);
2317 else
2318 return fib_triestat_get_next(seq);
Robert Olsson19baf832005-06-21 12:43:18 -07002319}
2320
2321static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2322{
2323
2324}
2325
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002326/*
Robert Olsson19baf832005-06-21 12:43:18 -07002327 * This outputs /proc/net/fib_triestats
2328 *
2329 * It always works in backward compatibility mode.
2330 * The format of the file is not supposed to be changed.
2331 */
2332
2333static void collect_and_show(struct trie *t, struct seq_file *seq)
2334{
2335 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2336 int i, max, pointers;
Olof Johansson91b9a272005-08-09 20:24:39 -07002337 struct trie_stat *stat;
Robert Olsson19baf832005-06-21 12:43:18 -07002338 int avdepth;
2339
2340 stat = trie_collect_stats(t);
2341
Olof Johansson91b9a272005-08-09 20:24:39 -07002342 bytes = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002343 seq_printf(seq, "trie=%p\n", t);
2344
2345 if (stat) {
2346 if (stat->leaves)
Olof Johansson91b9a272005-08-09 20:24:39 -07002347 avdepth = stat->totdepth*100 / stat->leaves;
Robert Olsson19baf832005-06-21 12:43:18 -07002348 else
Olof Johansson91b9a272005-08-09 20:24:39 -07002349 avdepth = 0;
2350 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
Robert Olsson19baf832005-06-21 12:43:18 -07002351 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
Olof Johansson91b9a272005-08-09 20:24:39 -07002352
Robert Olsson19baf832005-06-21 12:43:18 -07002353 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2354 bytes += sizeof(struct leaf) * stat->leaves;
2355 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2356 bytes += sizeof(struct tnode) * stat->tnodes;
2357
2358 max = MAX_CHILDS-1;
2359
2360 while (max >= 0 && stat->nodesizes[max] == 0)
2361 max--;
2362 pointers = 0;
2363
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002364 for (i = 1; i <= max; i++)
Robert Olsson19baf832005-06-21 12:43:18 -07002365 if (stat->nodesizes[i] != 0) {
2366 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2367 pointers += (1<<i) * stat->nodesizes[i];
2368 }
2369 seq_printf(seq, "\n");
2370 seq_printf(seq, "Pointers: %d\n", pointers);
2371 bytes += sizeof(struct node *) * pointers;
2372 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2373 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2374
2375 kfree(stat);
2376 }
2377
2378#ifdef CONFIG_IP_FIB_TRIE_STATS
2379 seq_printf(seq, "Counters:\n---------\n");
2380 seq_printf(seq,"gets = %d\n", t->stats.gets);
2381 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2382 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2383 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2384 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
Robert Olsson2f368952005-07-05 15:02:40 -07002385 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002386#ifdef CLEAR_STATS
2387 memset(&(t->stats), 0, sizeof(t->stats));
2388#endif
2389#endif /* CONFIG_IP_FIB_TRIE_STATS */
2390}
2391
2392static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2393{
2394 char bf[128];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002395
Robert Olsson19baf832005-06-21 12:43:18 -07002396 if (v == SEQ_START_TOKEN) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002397 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
Robert Olsson19baf832005-06-21 12:43:18 -07002398 sizeof(struct leaf), sizeof(struct tnode));
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002399 if (trie_local)
Robert Olsson19baf832005-06-21 12:43:18 -07002400 collect_and_show(trie_local, seq);
2401
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002402 if (trie_main)
Robert Olsson19baf832005-06-21 12:43:18 -07002403 collect_and_show(trie_main, seq);
Olof Johansson91b9a272005-08-09 20:24:39 -07002404 } else {
2405 snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
2406
Robert Olsson19baf832005-06-21 12:43:18 -07002407 seq_printf(seq, "%-127s\n", bf);
2408 }
2409 return 0;
2410}
2411
2412static struct seq_operations fib_triestat_seq_ops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002413 .start = fib_triestat_seq_start,
2414 .next = fib_triestat_seq_next,
2415 .stop = fib_triestat_seq_stop,
2416 .show = fib_triestat_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002417};
2418
2419static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2420{
2421 struct seq_file *seq;
2422 int rc = -ENOMEM;
2423
2424 rc = seq_open(file, &fib_triestat_seq_ops);
2425 if (rc)
2426 goto out_kfree;
2427
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002428 seq = file->private_data;
Robert Olsson19baf832005-06-21 12:43:18 -07002429out:
2430 return rc;
2431out_kfree:
2432 goto out;
2433}
2434
2435static struct file_operations fib_triestat_seq_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002436 .owner = THIS_MODULE,
2437 .open = fib_triestat_seq_open,
2438 .read = seq_read,
2439 .llseek = seq_lseek,
2440 .release = seq_release_private,
Robert Olsson19baf832005-06-21 12:43:18 -07002441};
2442
2443int __init fib_stat_proc_init(void)
2444{
2445 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2446 return -ENOMEM;
2447 return 0;
2448}
2449
2450void __init fib_stat_proc_exit(void)
2451{
2452 proc_net_remove("fib_triestat");
2453}
2454
2455static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2456{
2457 return NULL;
2458}
2459
2460static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2461{
2462 return NULL;
2463}
2464
2465static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2466{
Olof Johansson91b9a272005-08-09 20:24:39 -07002467 if (!ip_fib_main_table)
2468 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002469
Olof Johansson91b9a272005-08-09 20:24:39 -07002470 if (*pos)
2471 return fib_trie_get_next(seq);
2472 else
2473 return SEQ_START_TOKEN;
Robert Olsson19baf832005-06-21 12:43:18 -07002474}
2475
2476static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2477{
2478 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002479 if (v == SEQ_START_TOKEN)
2480 return fib_trie_get_first(seq);
2481 else
2482 return fib_trie_get_next(seq);
2483
Robert Olsson19baf832005-06-21 12:43:18 -07002484}
2485
2486static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2487{
Robert Olsson19baf832005-06-21 12:43:18 -07002488}
2489
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002490/*
Robert Olsson19baf832005-06-21 12:43:18 -07002491 * This outputs /proc/net/fib_trie.
2492 *
2493 * It always works in backward compatibility mode.
2494 * The format of the file is not supposed to be changed.
2495 */
2496
2497static int fib_trie_seq_show(struct seq_file *seq, void *v)
2498{
2499 char bf[128];
2500
2501 if (v == SEQ_START_TOKEN) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002502 if (trie_local)
Robert Olsson19baf832005-06-21 12:43:18 -07002503 trie_dump_seq(seq, trie_local);
2504
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002505 if (trie_main)
Robert Olsson19baf832005-06-21 12:43:18 -07002506 trie_dump_seq(seq, trie_main);
Olof Johansson91b9a272005-08-09 20:24:39 -07002507 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07002508 snprintf(bf, sizeof(bf),
2509 "*\t%08X\t%08X", 200, 400);
2510 seq_printf(seq, "%-127s\n", bf);
2511 }
2512
2513 return 0;
2514}
2515
2516static struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002517 .start = fib_trie_seq_start,
2518 .next = fib_trie_seq_next,
2519 .stop = fib_trie_seq_stop,
2520 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002521};
2522
2523static int fib_trie_seq_open(struct inode *inode, struct file *file)
2524{
2525 struct seq_file *seq;
2526 int rc = -ENOMEM;
2527
2528 rc = seq_open(file, &fib_trie_seq_ops);
2529 if (rc)
2530 goto out_kfree;
2531
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002532 seq = file->private_data;
Robert Olsson19baf832005-06-21 12:43:18 -07002533out:
2534 return rc;
2535out_kfree:
2536 goto out;
2537}
2538
2539static struct file_operations fib_trie_seq_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002540 .owner = THIS_MODULE,
2541 .open = fib_trie_seq_open,
2542 .read = seq_read,
2543 .llseek = seq_lseek,
2544 .release= seq_release_private,
Robert Olsson19baf832005-06-21 12:43:18 -07002545};
2546
2547int __init fib_proc_init(void)
2548{
2549 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2550 return -ENOMEM;
2551 return 0;
2552}
2553
2554void __init fib_proc_exit(void)
2555{
2556 proc_net_remove("fib_trie");
2557}
2558
2559#endif /* CONFIG_PROC_FS */