blob: 9038b914b4b14df4076d56abb1100d024bbb9039 [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
44 */
45
Robert Olssonf835e472005-06-28 15:00:39 -070046#define VERSION "0.324"
Robert Olsson19baf832005-06-21 12:43:18 -070047
48#include <linux/config.h>
49#include <asm/uaccess.h>
50#include <asm/system.h>
51#include <asm/bitops.h>
52#include <linux/types.h>
53#include <linux/kernel.h>
54#include <linux/sched.h>
55#include <linux/mm.h>
56#include <linux/string.h>
57#include <linux/socket.h>
58#include <linux/sockios.h>
59#include <linux/errno.h>
60#include <linux/in.h>
61#include <linux/inet.h>
62#include <linux/netdevice.h>
63#include <linux/if_arp.h>
64#include <linux/proc_fs.h>
65#include <linux/skbuff.h>
66#include <linux/netlink.h>
67#include <linux/init.h>
68#include <linux/list.h>
69#include <net/ip.h>
70#include <net/protocol.h>
71#include <net/route.h>
72#include <net/tcp.h>
73#include <net/sock.h>
74#include <net/ip_fib.h>
75#include "fib_lookup.h"
76
77#undef CONFIG_IP_FIB_TRIE_STATS
78#define MAX_CHILDS 16384
79
80#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81#define KEYLENGTH (8*sizeof(t_key))
82#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
84
85static DEFINE_RWLOCK(fib_lock);
86
87typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
92#define NODE_PARENT(_node) \
93((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
94#define NODE_SET_PARENT(_node, _ptr) \
95((_node)->_parent = (((unsigned long)(_ptr)) | \
96 ((_node)->_parent & NODE_TYPE_MASK)))
97#define NODE_INIT_PARENT(_node, _type) \
98((_node)->_parent = (_type))
99#define NODE_TYPE(_node) \
100((_node)->_parent & NODE_TYPE_MASK)
101
102#define IS_TNODE(n) (!(n->_parent & T_LEAF))
103#define IS_LEAF(n) (n->_parent & T_LEAF)
104
105struct node {
106 t_key key;
107 unsigned long _parent;
108};
109
110struct leaf {
111 t_key key;
112 unsigned long _parent;
113 struct hlist_head list;
114};
115
116struct leaf_info {
117 struct hlist_node hlist;
118 int plen;
119 struct list_head falh;
120};
121
122struct tnode {
123 t_key key;
124 unsigned long _parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
130};
131
132#ifdef CONFIG_IP_FIB_TRIE_STATS
133struct trie_use_stats {
134 unsigned int gets;
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
148 unsigned int nodesizes[MAX_CHILDS];
149};
150
151struct trie {
152 struct node *trie;
153#ifdef CONFIG_IP_FIB_TRIE_STATS
154 struct trie_use_stats stats;
155#endif
156 int size;
157 unsigned int revision;
158};
159
160static int trie_debug = 0;
161
162static int tnode_full(struct tnode *tn, struct node *n);
163static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165static int tnode_child_length(struct tnode *tn);
166static struct node *resize(struct trie *t, struct tnode *tn);
167static struct tnode *inflate(struct trie *t, struct tnode *tn);
168static struct tnode *halve(struct trie *t, struct tnode *tn);
169static void tnode_free(struct tnode *tn);
170static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
172extern int fib_detect_death(struct fib_info *fi, int order,
173 struct fib_info **last_resort, int *last_idx, int *dflt);
174
175extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
176 struct nlmsghdr *n, struct netlink_skb_parms *req);
177
178static kmem_cache_t *fn_alias_kmem;
179static struct trie *trie_local = NULL, *trie_main = NULL;
180
181static void trie_bug(char *err)
182{
183 printk("Trie Bug: %s\n", err);
184 BUG();
185}
186
187static inline struct node *tnode_get_child(struct tnode *tn, int i)
188{
189 if (i >= 1<<tn->bits)
190 trie_bug("tnode_get_child");
191
192 return tn->child[i];
193}
194
195static inline int tnode_child_length(struct tnode *tn)
196{
197 return 1<<tn->bits;
198}
199
200/*
201 _________________________________________________________________
202 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
203 ----------------------------------------------------------------
204 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
205
206 _________________________________________________________________
207 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
208 -----------------------------------------------------------------
209 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
210
211 tp->pos = 7
212 tp->bits = 3
213 n->pos = 15
214 n->bits=4
215 KEYLENGTH=32
216*/
217
218static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
219{
220 if (offset < KEYLENGTH)
221 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
222 else
223 return 0;
224}
225
226static inline int tkey_equals(t_key a, t_key b)
227{
228 return a == b;
229}
230
231static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
232{
233 if (bits == 0 || offset >= KEYLENGTH)
234 return 1;
235 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
236 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
237}
238
239static inline int tkey_mismatch(t_key a, int offset, t_key b)
240{
241 t_key diff = a ^ b;
242 int i = offset;
243
244 if(!diff)
245 return 0;
246 while((diff << i) >> (KEYLENGTH-1) == 0)
247 i++;
248 return i;
249}
250
251/* Candiate for fib_semantics */
252
253static void fn_free_alias(struct fib_alias *fa)
254{
255 fib_release_info(fa->fa_info);
256 kmem_cache_free(fn_alias_kmem, fa);
257}
258
259/*
260 To understand this stuff, an understanding of keys and all their bits is
261 necessary. Every node in the trie has a key associated with it, but not
262 all of the bits in that key are significant.
263
264 Consider a node 'n' and its parent 'tp'.
265
266 If n is a leaf, every bit in its key is significant. Its presence is
267 necessitaded by path compression, since during a tree traversal (when
268 searching for a leaf - unless we are doing an insertion) we will completely
269 ignore all skipped bits we encounter. Thus we need to verify, at the end of
270 a potentially successful search, that we have indeed been walking the
271 correct key path.
272
273 Note that we can never "miss" the correct key in the tree if present by
274 following the wrong path. Path compression ensures that segments of the key
275 that are the same for all keys with a given prefix are skipped, but the
276 skipped part *is* identical for each node in the subtrie below the skipped
277 bit! trie_insert() in this implementation takes care of that - note the
278 call to tkey_sub_equals() in trie_insert().
279
280 if n is an internal node - a 'tnode' here, the various parts of its key
281 have many different meanings.
282
283 Example:
284 _________________________________________________________________
285 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
286 -----------------------------------------------------------------
287 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
288
289 _________________________________________________________________
290 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
291 -----------------------------------------------------------------
292 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
293
294 tp->pos = 7
295 tp->bits = 3
296 n->pos = 15
297 n->bits=4
298
299 First, let's just ignore the bits that come before the parent tp, that is
300 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
301 not use them for anything.
302
303 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
304 index into the parent's child array. That is, they will be used to find
305 'n' among tp's children.
306
307 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
308 for the node n.
309
310 All the bits we have seen so far are significant to the node n. The rest
311 of the bits are really not needed or indeed known in n->key.
312
313 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
314 n's child array, and will of course be different for each child.
315
316 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
317 at this point.
318
319*/
320
321static void check_tnode(struct tnode *tn)
322{
323 if(tn && tn->pos+tn->bits > 32) {
324 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
325 }
326}
327
328static int halve_threshold = 25;
329static int inflate_threshold = 50;
330
331static struct leaf *leaf_new(void)
332{
333 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
334 if(l) {
335 NODE_INIT_PARENT(l, T_LEAF);
336 INIT_HLIST_HEAD(&l->list);
337 }
338 return l;
339}
340
341static struct leaf_info *leaf_info_new(int plen)
342{
343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olssonf835e472005-06-28 15:00:39 -0700344 if(li) {
345 li->plen = plen;
346 INIT_LIST_HEAD(&li->falh);
347 }
Robert Olsson19baf832005-06-21 12:43:18 -0700348 return li;
349}
350
351static inline void free_leaf(struct leaf *l)
352{
353 kfree(l);
354}
355
356static inline void free_leaf_info(struct leaf_info *li)
357{
358 kfree(li);
359}
360
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700361static struct tnode *tnode_alloc(unsigned int size)
362{
363 if (size <= PAGE_SIZE) {
364 return kmalloc(size, GFP_KERNEL);
365 } else {
366 return (struct tnode *)
367 __get_free_pages(GFP_KERNEL, get_order(size));
368 }
369}
370
371static void __tnode_free(struct tnode *tn)
372{
373 unsigned int size = sizeof(struct tnode) +
374 (1<<tn->bits) * sizeof(struct node *);
375
376 if (size <= PAGE_SIZE)
377 kfree(tn);
378 else
379 free_pages((unsigned long)tn, get_order(size));
380}
381
Robert Olsson19baf832005-06-21 12:43:18 -0700382static struct tnode* tnode_new(t_key key, int pos, int bits)
383{
384 int nchildren = 1<<bits;
385 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700386 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700387
388 if(tn) {
389 memset(tn, 0, sz);
390 NODE_INIT_PARENT(tn, T_TNODE);
391 tn->pos = pos;
392 tn->bits = bits;
393 tn->key = key;
394 tn->full_children = 0;
395 tn->empty_children = 1<<bits;
396 }
397 if(trie_debug > 0)
398 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
399 (unsigned int) (sizeof(struct node) * 1<<bits));
400 return tn;
401}
402
403static void tnode_free(struct tnode *tn)
404{
405 if(!tn) {
406 trie_bug("tnode_free\n");
407 }
408 if(IS_LEAF(tn)) {
409 free_leaf((struct leaf *)tn);
410 if(trie_debug > 0 )
411 printk("FL %p \n", tn);
412 }
413 else if(IS_TNODE(tn)) {
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700414 __tnode_free(tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700415 if(trie_debug > 0 )
416 printk("FT %p \n", tn);
417 }
418 else {
419 trie_bug("tnode_free\n");
420 }
421}
422
423/*
424 * Check whether a tnode 'n' is "full", i.e. it is an internal node
425 * and no bits are skipped. See discussion in dyntree paper p. 6
426 */
427
428static inline int tnode_full(struct tnode *tn, struct node *n)
429{
430 if(n == NULL || IS_LEAF(n))
431 return 0;
432
433 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
434}
435
436static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
437{
438 tnode_put_child_reorg(tn, i, n, -1);
439}
440
441 /*
442 * Add a child at position i overwriting the old value.
443 * Update the value of full_children and empty_children.
444 */
445
446static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
447{
448 struct node *chi;
449 int isfull;
450
451 if(i >= 1<<tn->bits) {
452 printk("bits=%d, i=%d\n", tn->bits, i);
453 trie_bug("tnode_put_child_reorg bits");
454 }
455 write_lock_bh(&fib_lock);
456 chi = tn->child[i];
457
458 /* update emptyChildren */
459 if (n == NULL && chi != NULL)
460 tn->empty_children++;
461 else if (n != NULL && chi == NULL)
462 tn->empty_children--;
463
464 /* update fullChildren */
465 if (wasfull == -1)
466 wasfull = tnode_full(tn, chi);
467
468 isfull = tnode_full(tn, n);
469 if (wasfull && !isfull)
470 tn->full_children--;
471
472 else if (!wasfull && isfull)
473 tn->full_children++;
474 if(n)
475 NODE_SET_PARENT(n, tn);
476
477 tn->child[i] = n;
478 write_unlock_bh(&fib_lock);
479}
480
481static struct node *resize(struct trie *t, struct tnode *tn)
482{
483 int i;
484
485 if (!tn)
486 return NULL;
487
488 if(trie_debug)
489 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
490 tn, inflate_threshold, halve_threshold);
491
492 /* No children */
493 if (tn->empty_children == tnode_child_length(tn)) {
494 tnode_free(tn);
495 return NULL;
496 }
497 /* One child */
498 if (tn->empty_children == tnode_child_length(tn) - 1)
499 for (i = 0; i < tnode_child_length(tn); i++) {
500
501 write_lock_bh(&fib_lock);
502 if (tn->child[i] != NULL) {
503
504 /* compress one level */
505 struct node *n = tn->child[i];
506 if(n)
507 NODE_INIT_PARENT(n, NODE_TYPE(n));
508
509 write_unlock_bh(&fib_lock);
510 tnode_free(tn);
511 return n;
512 }
513 write_unlock_bh(&fib_lock);
514 }
515 /*
516 * Double as long as the resulting node has a number of
517 * nonempty nodes that are above the threshold.
518 */
519
520 /*
521 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
522 * the Helsinki University of Technology and Matti Tikkanen of Nokia
523 * Telecommunications, page 6:
524 * "A node is doubled if the ratio of non-empty children to all
525 * children in the *doubled* node is at least 'high'."
526 *
527 * 'high' in this instance is the variable 'inflate_threshold'. It
528 * is expressed as a percentage, so we multiply it with
529 * tnode_child_length() and instead of multiplying by 2 (since the
530 * child array will be doubled by inflate()) and multiplying
531 * the left-hand side by 100 (to handle the percentage thing) we
532 * multiply the left-hand side by 50.
533 *
534 * The left-hand side may look a bit weird: tnode_child_length(tn)
535 * - tn->empty_children is of course the number of non-null children
536 * in the current node. tn->full_children is the number of "full"
537 * children, that is non-null tnodes with a skip value of 0.
538 * All of those will be doubled in the resulting inflated tnode, so
539 * we just count them one extra time here.
540 *
541 * A clearer way to write this would be:
542 *
543 * to_be_doubled = tn->full_children;
544 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
545 * tn->full_children;
546 *
547 * new_child_length = tnode_child_length(tn) * 2;
548 *
549 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
550 * new_child_length;
551 * if (new_fill_factor >= inflate_threshold)
552 *
553 * ...and so on, tho it would mess up the while() loop.
554 *
555 * anyway,
556 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
557 * inflate_threshold
558 *
559 * avoid a division:
560 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
561 * inflate_threshold * new_child_length
562 *
563 * expand not_to_be_doubled and to_be_doubled, and shorten:
564 * 100 * (tnode_child_length(tn) - tn->empty_children +
565 * tn->full_children ) >= inflate_threshold * new_child_length
566 *
567 * expand new_child_length:
568 * 100 * (tnode_child_length(tn) - tn->empty_children +
569 * tn->full_children ) >=
570 * inflate_threshold * tnode_child_length(tn) * 2
571 *
572 * shorten again:
573 * 50 * (tn->full_children + tnode_child_length(tn) -
574 * tn->empty_children ) >= inflate_threshold *
575 * tnode_child_length(tn)
576 *
577 */
578
579 check_tnode(tn);
580
581 while ((tn->full_children > 0 &&
582 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
583 inflate_threshold * tnode_child_length(tn))) {
584
585 tn = inflate(t, tn);
586 }
587
588 check_tnode(tn);
589
590 /*
591 * Halve as long as the number of empty children in this
592 * node is above threshold.
593 */
594 while (tn->bits > 1 &&
595 100 * (tnode_child_length(tn) - tn->empty_children) <
596 halve_threshold * tnode_child_length(tn))
597
598 tn = halve(t, tn);
599
600 /* Only one child remains */
601
602 if (tn->empty_children == tnode_child_length(tn) - 1)
603 for (i = 0; i < tnode_child_length(tn); i++) {
604
605 write_lock_bh(&fib_lock);
606 if (tn->child[i] != NULL) {
607 /* compress one level */
608 struct node *n = tn->child[i];
609
610 if(n)
611 NODE_INIT_PARENT(n, NODE_TYPE(n));
612
613 write_unlock_bh(&fib_lock);
614 tnode_free(tn);
615 return n;
616 }
617 write_unlock_bh(&fib_lock);
618 }
619
620 return (struct node *) tn;
621}
622
623static struct tnode *inflate(struct trie *t, struct tnode *tn)
624{
625 struct tnode *inode;
626 struct tnode *oldtnode = tn;
627 int olen = tnode_child_length(tn);
628 int i;
629
630 if(trie_debug)
631 printk("In inflate\n");
632
633 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
634
635 if (!tn)
636 trie_bug("tnode_new failed");
637
638 for(i = 0; i < olen; i++) {
639 struct node *node = tnode_get_child(oldtnode, i);
640
641 /* An empty child */
642 if (node == NULL)
643 continue;
644
645 /* A leaf or an internal node with skipped bits */
646
647 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
648 tn->pos + tn->bits - 1) {
649 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1,
650 1) == 0)
651 put_child(t, tn, 2*i, node);
652 else
653 put_child(t, tn, 2*i+1, node);
654 continue;
655 }
656
657 /* An internal node with two children */
658 inode = (struct tnode *) node;
659
660 if (inode->bits == 1) {
661 put_child(t, tn, 2*i, inode->child[0]);
662 put_child(t, tn, 2*i+1, inode->child[1]);
663
664 tnode_free(inode);
665 }
666
667 /* An internal node with more than two children */
668 else {
669 struct tnode *left, *right;
670 int size, j;
671
672 /* We will replace this node 'inode' with two new
673 * ones, 'left' and 'right', each with half of the
674 * original children. The two new nodes will have
675 * a position one bit further down the key and this
676 * means that the "significant" part of their keys
677 * (see the discussion near the top of this file)
678 * will differ by one bit, which will be "0" in
679 * left's key and "1" in right's key. Since we are
680 * moving the key position by one step, the bit that
681 * we are moving away from - the bit at position
682 * (inode->pos) - is the one that will differ between
683 * left and right. So... we synthesize that bit in the
684 * two new keys.
685 * The mask 'm' below will be a single "one" bit at
686 * the position (inode->pos)
687 */
688
689 t_key m = TKEY_GET_MASK(inode->pos, 1);
690
691 /* Use the old key, but set the new significant
692 * bit to zero.
693 */
694 left = tnode_new(inode->key&(~m), inode->pos + 1,
695 inode->bits - 1);
696
697 if(!left)
698 trie_bug("tnode_new failed");
699
700
701 /* Use the old key, but set the new significant
702 * bit to one.
703 */
704 right = tnode_new(inode->key|m, inode->pos + 1,
705 inode->bits - 1);
706
707 if(!right)
708 trie_bug("tnode_new failed");
709
710 size = tnode_child_length(left);
711 for(j = 0; j < size; j++) {
712 put_child(t, left, j, inode->child[j]);
713 put_child(t, right, j, inode->child[j + size]);
714 }
715 put_child(t, tn, 2*i, resize(t, left));
716 put_child(t, tn, 2*i+1, resize(t, right));
717
718 tnode_free(inode);
719 }
720 }
721 tnode_free(oldtnode);
722 return tn;
723}
724
725static struct tnode *halve(struct trie *t, struct tnode *tn)
726{
727 struct tnode *oldtnode = tn;
728 struct node *left, *right;
729 int i;
730 int olen = tnode_child_length(tn);
731
732 if(trie_debug) printk("In halve\n");
733
734 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
735
736 if(!tn)
737 trie_bug("tnode_new failed");
738
739 for(i = 0; i < olen; i += 2) {
740 left = tnode_get_child(oldtnode, i);
741 right = tnode_get_child(oldtnode, i+1);
742
743 /* At least one of the children is empty */
744 if (left == NULL) {
745 if (right == NULL) /* Both are empty */
746 continue;
747 put_child(t, tn, i/2, right);
748 } else if (right == NULL)
749 put_child(t, tn, i/2, left);
750
751 /* Two nonempty children */
752 else {
753 struct tnode *newBinNode =
754 tnode_new(left->key, tn->pos + tn->bits, 1);
755
756 if(!newBinNode)
757 trie_bug("tnode_new failed");
758
759 put_child(t, newBinNode, 0, left);
760 put_child(t, newBinNode, 1, right);
761 put_child(t, tn, i/2, resize(t, newBinNode));
762 }
763 }
764 tnode_free(oldtnode);
765 return tn;
766}
767
768static void *trie_init(struct trie *t)
769{
770 if(t) {
771 t->size = 0;
772 t->trie = NULL;
773 t->revision = 0;
774#ifdef CONFIG_IP_FIB_TRIE_STATS
775 memset(&t->stats, 0, sizeof(struct trie_use_stats));
776#endif
777 }
778 return t;
779}
780
781static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
782{
783 struct hlist_node *node;
784 struct leaf_info *li;
785
786 hlist_for_each_entry(li, node, head, hlist) {
787
788 if ( li->plen == plen )
789 return li;
790 }
791 return NULL;
792}
793
794static inline struct list_head * get_fa_head(struct leaf *l, int plen)
795{
796 struct list_head *fa_head=NULL;
797 struct leaf_info *li = find_leaf_info(&l->list, plen);
798
799 if(li)
800 fa_head = &li->falh;
801
802 return fa_head;
803}
804
805static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
806{
807 struct leaf_info *li=NULL, *last=NULL;
808 struct hlist_node *node, *tmp;
809
810 write_lock_bh(&fib_lock);
811
812 if(hlist_empty(head))
813 hlist_add_head(&new->hlist, head);
814 else {
815 hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
816
817 if (new->plen > li->plen)
818 break;
819
820 last = li;
821 }
822 if(last)
823 hlist_add_after(&last->hlist, &new->hlist);
824 else
825 hlist_add_before(&new->hlist, &li->hlist);
826 }
827 write_unlock_bh(&fib_lock);
828}
829
830static struct leaf *
831fib_find_node(struct trie *t, u32 key)
832{
833 int pos;
834 struct tnode *tn;
835 struct node *n;
836
837 pos = 0;
838 n=t->trie;
839
840 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
841 tn = (struct tnode *) n;
842
843 check_tnode(tn);
844
845 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
846 pos=tn->pos + tn->bits;
847 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
848 }
849 else
850 break;
851 }
852 /* Case we have found a leaf. Compare prefixes */
853
854 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
855 struct leaf *l = (struct leaf *) n;
856 return l;
857 }
858 return NULL;
859}
860
861static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
862{
863 int i = 0;
864 int wasfull;
865 t_key cindex, key;
866 struct tnode *tp = NULL;
867
868 if(!tn)
869 BUG();
870
871 key = tn->key;
872 i = 0;
873
874 while (tn != NULL && NODE_PARENT(tn) != NULL) {
875
876 if( i > 10 ) {
877 printk("Rebalance tn=%p \n", tn);
878 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
879
880 printk("Rebalance tp=%p \n", tp);
881 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
882 }
883
884 if( i > 12 ) BUG();
885 i++;
886
887 tp = NODE_PARENT(tn);
888 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
889 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
890 tn = (struct tnode *) resize (t, (struct tnode *)tn);
891 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
892
893 if(!NODE_PARENT(tn))
894 break;
895
896 tn = NODE_PARENT(tn);
897 }
898 /* Handle last (top) tnode */
899 if (IS_TNODE(tn))
900 tn = (struct tnode*) resize(t, (struct tnode *)tn);
901
902 return (struct node*) tn;
903}
904
Robert Olssonf835e472005-06-28 15:00:39 -0700905static struct list_head *
906fib_insert_node(struct trie *t, int *err, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700907{
908 int pos, newpos;
909 struct tnode *tp = NULL, *tn = NULL;
910 struct node *n;
911 struct leaf *l;
912 int missbit;
913 struct list_head *fa_head=NULL;
914 struct leaf_info *li;
915 t_key cindex;
916
917 pos = 0;
918 n=t->trie;
919
920 /* If we point to NULL, stop. Either the tree is empty and we should
921 * just put a new leaf in if, or we have reached an empty child slot,
922 * and we should just put our new leaf in that.
923 * If we point to a T_TNODE, check if it matches our key. Note that
924 * a T_TNODE might be skipping any number of bits - its 'pos' need
925 * not be the parent's 'pos'+'bits'!
926 *
927 * If it does match the current key, get pos/bits from it, extract
928 * the index from our key, push the T_TNODE and walk the tree.
929 *
930 * If it doesn't, we have to replace it with a new T_TNODE.
931 *
932 * If we point to a T_LEAF, it might or might not have the same key
933 * as we do. If it does, just change the value, update the T_LEAF's
934 * value, and return it.
935 * If it doesn't, we need to replace it with a T_TNODE.
936 */
937
938 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
939 tn = (struct tnode *) n;
940
941 check_tnode(tn);
942
943 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
944 tp = tn;
945 pos=tn->pos + tn->bits;
946 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
947
948 if(n && NODE_PARENT(n) != tn) {
949 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
950 BUG();
951 }
952 }
953 else
954 break;
955 }
956
957 /*
958 * n ----> NULL, LEAF or TNODE
959 *
960 * tp is n's (parent) ----> NULL or TNODE
961 */
962
963 if(tp && IS_LEAF(tp))
964 BUG();
965
Robert Olsson19baf832005-06-21 12:43:18 -0700966
967 /* Case 1: n is a leaf. Compare prefixes */
968
969 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
970 struct leaf *l = ( struct leaf *) n;
971
972 li = leaf_info_new(plen);
973
Robert Olssonf835e472005-06-28 15:00:39 -0700974 if(! li) {
975 *err = -ENOMEM;
976 goto err;
977 }
Robert Olsson19baf832005-06-21 12:43:18 -0700978
979 fa_head = &li->falh;
980 insert_leaf_info(&l->list, li);
981 goto done;
982 }
983 t->size++;
984 l = leaf_new();
985
Robert Olssonf835e472005-06-28 15:00:39 -0700986 if(! l) {
987 *err = -ENOMEM;
988 goto err;
989 }
Robert Olsson19baf832005-06-21 12:43:18 -0700990
991 l->key = key;
992 li = leaf_info_new(plen);
993
Robert Olssonf835e472005-06-28 15:00:39 -0700994 if(! li) {
995 tnode_free((struct tnode *) l);
996 *err = -ENOMEM;
997 goto err;
998 }
Robert Olsson19baf832005-06-21 12:43:18 -0700999
1000 fa_head = &li->falh;
1001 insert_leaf_info(&l->list, li);
1002
1003 /* Case 2: n is NULL, and will just insert a new leaf */
1004 if (t->trie && n == NULL) {
1005
1006 NODE_SET_PARENT(l, tp);
1007
1008 if (!tp)
1009 BUG();
1010
1011 else {
1012 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1013 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1014 }
1015 }
1016 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
1017 else {
1018 /*
1019 * Add a new tnode here
1020 * first tnode need some special handling
1021 */
1022
1023 if (tp)
1024 pos=tp->pos+tp->bits;
1025 else
1026 pos=0;
1027 if(n) {
1028 newpos = tkey_mismatch(key, pos, n->key);
1029 tn = tnode_new(n->key, newpos, 1);
1030 }
1031 else {
1032 newpos = 0;
1033 tn = tnode_new(key, newpos, 1); /* First tnode */
1034 }
Robert Olsson19baf832005-06-21 12:43:18 -07001035
Robert Olssonf835e472005-06-28 15:00:39 -07001036 if(!tn) {
1037 free_leaf_info(li);
1038 tnode_free((struct tnode *) l);
1039 *err = -ENOMEM;
1040 goto err;
1041 }
1042
Robert Olsson19baf832005-06-21 12:43:18 -07001043 NODE_SET_PARENT(tn, tp);
1044
1045 missbit=tkey_extract_bits(key, newpos, 1);
1046 put_child(t, tn, missbit, (struct node *)l);
1047 put_child(t, tn, 1-missbit, n);
1048
1049 if(tp) {
1050 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1051 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1052 }
1053 else {
1054 t->trie = (struct node*) tn; /* First tnode */
1055 tp = tn;
1056 }
1057 }
1058 if(tp && tp->pos+tp->bits > 32) {
1059 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1060 tp, tp->pos, tp->bits, key, plen);
1061 }
1062 /* Rebalance the trie */
1063 t->trie = trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001064done:
1065 t->revision++;
1066err:;
Robert Olsson19baf832005-06-21 12:43:18 -07001067 return fa_head;
1068}
1069
1070static int
1071fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1072 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1073{
1074 struct trie *t = (struct trie *) tb->tb_data;
1075 struct fib_alias *fa, *new_fa;
1076 struct list_head *fa_head=NULL;
1077 struct fib_info *fi;
1078 int plen = r->rtm_dst_len;
1079 int type = r->rtm_type;
1080 u8 tos = r->rtm_tos;
1081 u32 key, mask;
1082 int err;
1083 struct leaf *l;
1084
1085 if (plen > 32)
1086 return -EINVAL;
1087
1088 key = 0;
1089 if (rta->rta_dst)
1090 memcpy(&key, rta->rta_dst, 4);
1091
1092 key = ntohl(key);
1093
1094 if(trie_debug)
1095 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1096
1097 mask = ntohl( inet_make_mask(plen) );
1098
1099 if(key & ~mask)
1100 return -EINVAL;
1101
1102 key = key & mask;
1103
1104 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
1105 goto err;
1106
1107 l = fib_find_node(t, key);
1108 fa = NULL;
1109
1110 if(l) {
1111 fa_head = get_fa_head(l, plen);
1112 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1113 }
1114
1115 /* Now fa, if non-NULL, points to the first fib alias
1116 * with the same keys [prefix,tos,priority], if such key already
1117 * exists or to the node before which we will insert new one.
1118 *
1119 * If fa is NULL, we will need to allocate a new one and
1120 * insert to the head of f.
1121 *
1122 * If f is NULL, no fib node matched the destination key
1123 * and we need to allocate a new one of those as well.
1124 */
1125
1126 if (fa &&
1127 fa->fa_info->fib_priority == fi->fib_priority) {
1128 struct fib_alias *fa_orig;
1129
1130 err = -EEXIST;
1131 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1132 goto out;
1133
1134 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1135 struct fib_info *fi_drop;
1136 u8 state;
1137
1138 write_lock_bh(&fib_lock);
1139
1140 fi_drop = fa->fa_info;
1141 fa->fa_info = fi;
1142 fa->fa_type = type;
1143 fa->fa_scope = r->rtm_scope;
1144 state = fa->fa_state;
1145 fa->fa_state &= ~FA_S_ACCESSED;
1146
1147 write_unlock_bh(&fib_lock);
1148
1149 fib_release_info(fi_drop);
1150 if (state & FA_S_ACCESSED)
1151 rt_cache_flush(-1);
1152
1153 goto succeeded;
1154 }
1155 /* Error if we find a perfect match which
1156 * uses the same scope, type, and nexthop
1157 * information.
1158 */
1159 fa_orig = fa;
1160 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1161 if (fa->fa_tos != tos)
1162 break;
1163 if (fa->fa_info->fib_priority != fi->fib_priority)
1164 break;
1165 if (fa->fa_type == type &&
1166 fa->fa_scope == r->rtm_scope &&
1167 fa->fa_info == fi) {
1168 goto out;
1169 }
1170 }
1171 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1172 fa = fa_orig;
1173 }
1174 err = -ENOENT;
1175 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
1176 goto out;
1177
1178 err = -ENOBUFS;
1179 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1180 if (new_fa == NULL)
1181 goto out;
1182
1183 new_fa->fa_info = fi;
1184 new_fa->fa_tos = tos;
1185 new_fa->fa_type = type;
1186 new_fa->fa_scope = r->rtm_scope;
1187 new_fa->fa_state = 0;
1188#if 0
1189 new_fa->dst = NULL;
1190#endif
1191 /*
1192 * Insert new entry to the list.
1193 */
1194
Robert Olssonf835e472005-06-28 15:00:39 -07001195 if(!fa_head) {
1196 fa_head = fib_insert_node(t, &err, key, plen);
1197 err = 0;
1198 if(err)
1199 goto out_free_new_fa;
1200 }
Robert Olsson19baf832005-06-21 12:43:18 -07001201
1202 write_lock_bh(&fib_lock);
1203
1204 list_add_tail(&new_fa->fa_list,
1205 (fa ? &fa->fa_list : fa_head));
1206
1207 write_unlock_bh(&fib_lock);
1208
1209 rt_cache_flush(-1);
1210 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1211succeeded:
1212 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001213
1214out_free_new_fa:
1215 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001216out:
1217 fib_release_info(fi);
1218err:;
1219 return err;
1220}
1221
1222static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
1223 struct fib_result *res, int *err)
1224{
1225 int i;
1226 t_key mask;
1227 struct leaf_info *li;
1228 struct hlist_head *hhead = &l->list;
1229 struct hlist_node *node;
1230
1231 hlist_for_each_entry(li, node, hhead, hlist) {
1232
1233 i = li->plen;
1234 mask = ntohl(inet_make_mask(i));
1235 if (l->key != (key & mask))
1236 continue;
1237
1238 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
1239 *plen = i;
1240#ifdef CONFIG_IP_FIB_TRIE_STATS
1241 t->stats.semantic_match_passed++;
1242#endif
1243 return 1;
1244 }
1245#ifdef CONFIG_IP_FIB_TRIE_STATS
1246 t->stats.semantic_match_miss++;
1247#endif
1248 }
1249 return 0;
1250}
1251
1252static int
1253fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1254{
1255 struct trie *t = (struct trie *) tb->tb_data;
1256 int plen, ret = 0;
1257 struct node *n;
1258 struct tnode *pn;
1259 int pos, bits;
1260 t_key key=ntohl(flp->fl4_dst);
1261 int chopped_off;
1262 t_key cindex = 0;
1263 int current_prefix_length = KEYLENGTH;
1264 n = t->trie;
1265
1266 read_lock(&fib_lock);
1267 if(!n)
1268 goto failed;
1269
1270#ifdef CONFIG_IP_FIB_TRIE_STATS
1271 t->stats.gets++;
1272#endif
1273
1274 /* Just a leaf? */
1275 if (IS_LEAF(n)) {
1276 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
1277 goto found;
1278 goto failed;
1279 }
1280 pn = (struct tnode *) n;
1281 chopped_off = 0;
1282
1283 while (pn) {
1284
1285 pos = pn->pos;
1286 bits = pn->bits;
1287
1288 if(!chopped_off)
1289 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1290
1291 n = tnode_get_child(pn, cindex);
1292
1293 if (n == NULL) {
1294#ifdef CONFIG_IP_FIB_TRIE_STATS
1295 t->stats.null_node_hit++;
1296#endif
1297 goto backtrace;
1298 }
1299
1300 if (IS_TNODE(n)) {
1301#define HL_OPTIMIZE
1302#ifdef HL_OPTIMIZE
1303 struct tnode *cn = (struct tnode *)n;
1304 t_key node_prefix, key_prefix, pref_mismatch;
1305 int mp;
1306
1307 /*
1308 * It's a tnode, and we can do some extra checks here if we
1309 * like, to avoid descending into a dead-end branch.
1310 * This tnode is in the parent's child array at index
1311 * key[p_pos..p_pos+p_bits] but potentially with some bits
1312 * chopped off, so in reality the index may be just a
1313 * subprefix, padded with zero at the end.
1314 * We can also take a look at any skipped bits in this
1315 * tnode - everything up to p_pos is supposed to be ok,
1316 * and the non-chopped bits of the index (se previous
1317 * paragraph) are also guaranteed ok, but the rest is
1318 * considered unknown.
1319 *
1320 * The skipped bits are key[pos+bits..cn->pos].
1321 */
1322
1323 /* If current_prefix_length < pos+bits, we are already doing
1324 * actual prefix matching, which means everything from
1325 * pos+(bits-chopped_off) onward must be zero along some
1326 * branch of this subtree - otherwise there is *no* valid
1327 * prefix present. Here we can only check the skipped
1328 * bits. Remember, since we have already indexed into the
1329 * parent's child array, we know that the bits we chopped of
1330 * *are* zero.
1331 */
1332
1333 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1334
1335 if (current_prefix_length < pos+bits) {
1336 if (tkey_extract_bits(cn->key, current_prefix_length,
1337 cn->pos - current_prefix_length) != 0 ||
1338 !(cn->child[0]))
1339 goto backtrace;
1340 }
1341
1342 /*
1343 * If chopped_off=0, the index is fully validated and we
1344 * only need to look at the skipped bits for this, the new,
1345 * tnode. What we actually want to do is to find out if
1346 * these skipped bits match our key perfectly, or if we will
1347 * have to count on finding a matching prefix further down,
1348 * because if we do, we would like to have some way of
1349 * verifying the existence of such a prefix at this point.
1350 */
1351
1352 /* The only thing we can do at this point is to verify that
1353 * any such matching prefix can indeed be a prefix to our
1354 * key, and if the bits in the node we are inspecting that
1355 * do not match our key are not ZERO, this cannot be true.
1356 * Thus, find out where there is a mismatch (before cn->pos)
1357 * and verify that all the mismatching bits are zero in the
1358 * new tnode's key.
1359 */
1360
1361 /* Note: We aren't very concerned about the piece of the key
1362 * that precede pn->pos+pn->bits, since these have already been
1363 * checked. The bits after cn->pos aren't checked since these are
1364 * by definition "unknown" at this point. Thus, what we want to
1365 * see is if we are about to enter the "prefix matching" state,
1366 * and in that case verify that the skipped bits that will prevail
1367 * throughout this subtree are zero, as they have to be if we are
1368 * to find a matching prefix.
1369 */
1370
1371 node_prefix = MASK_PFX(cn->key, cn->pos);
1372 key_prefix = MASK_PFX(key, cn->pos);
1373 pref_mismatch = key_prefix^node_prefix;
1374 mp = 0;
1375
1376 /* In short: If skipped bits in this node do not match the search
1377 * key, enter the "prefix matching" state.directly.
1378 */
1379 if (pref_mismatch) {
1380 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1381 mp++;
1382 pref_mismatch = pref_mismatch <<1;
1383 }
1384 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1385
1386 if (key_prefix != 0)
1387 goto backtrace;
1388
1389 if (current_prefix_length >= cn->pos)
1390 current_prefix_length=mp;
1391 }
1392#endif
1393 pn = (struct tnode *)n; /* Descend */
1394 chopped_off = 0;
1395 continue;
1396 }
1397 if (IS_LEAF(n)) {
1398 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1399 goto found;
1400 }
1401backtrace:
1402 chopped_off++;
1403
1404 /* As zero don't change the child key (cindex) */
1405 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
1406 chopped_off++;
1407 }
1408
1409 /* Decrease current_... with bits chopped off */
1410 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1411 current_prefix_length = pn->pos + pn->bits - chopped_off;
1412
1413 /*
1414 * Either we do the actual chop off according or if we have
1415 * chopped off all bits in this tnode walk up to our parent.
1416 */
1417
1418 if(chopped_off <= pn->bits)
1419 cindex &= ~(1 << (chopped_off-1));
1420 else {
1421 if( NODE_PARENT(pn) == NULL)
1422 goto failed;
1423
1424 /* Get Child's index */
1425 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1426 pn = NODE_PARENT(pn);
1427 chopped_off = 0;
1428
1429#ifdef CONFIG_IP_FIB_TRIE_STATS
1430 t->stats.backtrack++;
1431#endif
1432 goto backtrace;
1433 }
1434 }
1435failed:
1436 ret = 1;
1437found:
1438 read_unlock(&fib_lock);
1439 return ret;
1440}
1441
1442static int trie_leaf_remove(struct trie *t, t_key key)
1443{
1444 t_key cindex;
1445 struct tnode *tp = NULL;
1446 struct node *n = t->trie;
1447 struct leaf *l;
1448
1449 if(trie_debug)
1450 printk("entering trie_leaf_remove(%p)\n", n);
1451
1452 /* Note that in the case skipped bits, those bits are *not* checked!
1453 * When we finish this, we will have NULL or a T_LEAF, and the
1454 * T_LEAF may or may not match our key.
1455 */
1456
1457 while (n != NULL && IS_TNODE(n)) {
1458 struct tnode *tn = (struct tnode *) n;
1459 check_tnode(tn);
1460 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1461
1462 if(n && NODE_PARENT(n) != tn) {
1463 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1464 BUG();
1465 }
1466 }
1467 l = (struct leaf *) n;
1468
1469 if(!n || !tkey_equals(l->key, key))
1470 return 0;
1471
1472 /*
1473 * Key found.
1474 * Remove the leaf and rebalance the tree
1475 */
1476
1477 t->revision++;
1478 t->size--;
1479
1480 tp = NODE_PARENT(n);
1481 tnode_free((struct tnode *) n);
1482
1483 if(tp) {
1484 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1485 put_child(t, (struct tnode *)tp, cindex, NULL);
1486 t->trie = trie_rebalance(t, tp);
1487 }
1488 else
1489 t->trie = NULL;
1490
1491 return 1;
1492}
1493
1494static int
1495fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1496 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1497{
1498 struct trie *t = (struct trie *) tb->tb_data;
1499 u32 key, mask;
1500 int plen = r->rtm_dst_len;
1501 u8 tos = r->rtm_tos;
1502 struct fib_alias *fa, *fa_to_delete;
1503 struct list_head *fa_head;
1504 struct leaf *l;
1505
1506 if (plen > 32)
1507 return -EINVAL;
1508
1509 key = 0;
1510 if (rta->rta_dst)
1511 memcpy(&key, rta->rta_dst, 4);
1512
1513 key = ntohl(key);
1514 mask = ntohl( inet_make_mask(plen) );
1515
1516 if(key & ~mask)
1517 return -EINVAL;
1518
1519 key = key & mask;
1520 l = fib_find_node(t, key);
1521
1522 if(!l)
1523 return -ESRCH;
1524
1525 fa_head = get_fa_head(l, plen);
1526 fa = fib_find_alias(fa_head, tos, 0);
1527
1528 if (!fa)
1529 return -ESRCH;
1530
1531 if (trie_debug)
1532 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1533
1534 fa_to_delete = NULL;
1535 fa_head = fa->fa_list.prev;
1536 list_for_each_entry(fa, fa_head, fa_list) {
1537 struct fib_info *fi = fa->fa_info;
1538
1539 if (fa->fa_tos != tos)
1540 break;
1541
1542 if ((!r->rtm_type ||
1543 fa->fa_type == r->rtm_type) &&
1544 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1545 fa->fa_scope == r->rtm_scope) &&
1546 (!r->rtm_protocol ||
1547 fi->fib_protocol == r->rtm_protocol) &&
1548 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1549 fa_to_delete = fa;
1550 break;
1551 }
1552 }
1553
1554 if (fa_to_delete) {
1555 int kill_li = 0;
1556 struct leaf_info *li;
1557
1558 fa = fa_to_delete;
1559 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1560
1561 l = fib_find_node(t, key);
1562 li = find_leaf_info(&l->list, plen);
1563
1564 write_lock_bh(&fib_lock);
1565
1566 list_del(&fa->fa_list);
1567
1568 if(list_empty(fa_head)) {
1569 hlist_del(&li->hlist);
1570 kill_li = 1;
1571 }
1572 write_unlock_bh(&fib_lock);
1573
1574 if(kill_li)
1575 free_leaf_info(li);
1576
1577 if(hlist_empty(&l->list))
1578 trie_leaf_remove(t, key);
1579
1580 if (fa->fa_state & FA_S_ACCESSED)
1581 rt_cache_flush(-1);
1582
1583 fn_free_alias(fa);
1584 return 0;
1585 }
1586 return -ESRCH;
1587}
1588
1589static int trie_flush_list(struct trie *t, struct list_head *head)
1590{
1591 struct fib_alias *fa, *fa_node;
1592 int found = 0;
1593
1594 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1595 struct fib_info *fi = fa->fa_info;
1596
1597 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1598
1599 write_lock_bh(&fib_lock);
1600 list_del(&fa->fa_list);
1601 write_unlock_bh(&fib_lock);
1602
1603 fn_free_alias(fa);
1604 found++;
1605 }
1606 }
1607 return found;
1608}
1609
1610static int trie_flush_leaf(struct trie *t, struct leaf *l)
1611{
1612 int found = 0;
1613 struct hlist_head *lih = &l->list;
1614 struct hlist_node *node, *tmp;
1615 struct leaf_info *li = NULL;
1616
1617 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1618
1619 found += trie_flush_list(t, &li->falh);
1620
1621 if (list_empty(&li->falh)) {
1622
1623 write_lock_bh(&fib_lock);
1624 hlist_del(&li->hlist);
1625 write_unlock_bh(&fib_lock);
1626
1627 free_leaf_info(li);
1628 }
1629 }
1630 return found;
1631}
1632
1633static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1634{
1635 struct node *c = (struct node *) thisleaf;
1636 struct tnode *p;
1637 int idx;
1638
1639 if(c == NULL) {
1640 if(t->trie == NULL)
1641 return NULL;
1642
1643 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1644 return (struct leaf *) t->trie;
1645
1646 p = (struct tnode*) t->trie; /* Start */
1647 }
1648 else
1649 p = (struct tnode *) NODE_PARENT(c);
1650 while (p) {
1651 int pos, last;
1652
1653 /* Find the next child of the parent */
1654 if(c)
1655 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1656 else
1657 pos = 0;
1658
1659 last = 1 << p->bits;
1660 for(idx = pos; idx < last ; idx++) {
1661 if( p->child[idx]) {
1662
1663 /* Decend if tnode */
1664
1665 while (IS_TNODE(p->child[idx])) {
1666 p = (struct tnode*) p->child[idx];
1667 idx = 0;
1668
1669 /* Rightmost non-NULL branch */
1670 if( p && IS_TNODE(p) )
1671 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
1672
1673 /* Done with this tnode? */
1674 if( idx >= (1 << p->bits) || p->child[idx] == NULL )
1675 goto up;
1676 }
1677 return (struct leaf*) p->child[idx];
1678 }
1679 }
1680up:
1681 /* No more children go up one step */
1682 c = (struct node*) p;
1683 p = (struct tnode *) NODE_PARENT(p);
1684 }
1685 return NULL; /* Ready. Root of trie */
1686}
1687
1688static int fn_trie_flush(struct fib_table *tb)
1689{
1690 struct trie *t = (struct trie *) tb->tb_data;
1691 struct leaf *ll = NULL, *l = NULL;
1692 int found = 0, h;
1693
1694 t->revision++;
1695
1696 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1697 found += trie_flush_leaf(t, l);
1698
1699 if (ll && hlist_empty(&ll->list))
1700 trie_leaf_remove(t, ll->key);
1701 ll = l;
1702 }
1703
1704 if (ll && hlist_empty(&ll->list))
1705 trie_leaf_remove(t, ll->key);
1706
1707 if(trie_debug)
1708 printk("trie_flush found=%d\n", found);
1709 return found;
1710}
1711
1712static int trie_last_dflt=-1;
1713
1714static void
1715fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1716{
1717 struct trie *t = (struct trie *) tb->tb_data;
1718 int order, last_idx;
1719 struct fib_info *fi = NULL;
1720 struct fib_info *last_resort;
1721 struct fib_alias *fa = NULL;
1722 struct list_head *fa_head;
1723 struct leaf *l;
1724
1725 last_idx = -1;
1726 last_resort = NULL;
1727 order = -1;
1728
1729 read_lock(&fib_lock);
1730
1731 l = fib_find_node(t, 0);
1732 if(!l)
1733 goto out;
1734
1735 fa_head = get_fa_head(l, 0);
1736 if(!fa_head)
1737 goto out;
1738
1739 if (list_empty(fa_head))
1740 goto out;
1741
1742 list_for_each_entry(fa, fa_head, fa_list) {
1743 struct fib_info *next_fi = fa->fa_info;
1744
1745 if (fa->fa_scope != res->scope ||
1746 fa->fa_type != RTN_UNICAST)
1747 continue;
1748
1749 if (next_fi->fib_priority > res->fi->fib_priority)
1750 break;
1751 if (!next_fi->fib_nh[0].nh_gw ||
1752 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1753 continue;
1754 fa->fa_state |= FA_S_ACCESSED;
1755
1756 if (fi == NULL) {
1757 if (next_fi != res->fi)
1758 break;
1759 } else if (!fib_detect_death(fi, order, &last_resort,
1760 &last_idx, &trie_last_dflt)) {
1761 if (res->fi)
1762 fib_info_put(res->fi);
1763 res->fi = fi;
1764 atomic_inc(&fi->fib_clntref);
1765 trie_last_dflt = order;
1766 goto out;
1767 }
1768 fi = next_fi;
1769 order++;
1770 }
1771 if (order <= 0 || fi == NULL) {
1772 trie_last_dflt = -1;
1773 goto out;
1774 }
1775
1776 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1777 if (res->fi)
1778 fib_info_put(res->fi);
1779 res->fi = fi;
1780 atomic_inc(&fi->fib_clntref);
1781 trie_last_dflt = order;
1782 goto out;
1783 }
1784 if (last_idx >= 0) {
1785 if (res->fi)
1786 fib_info_put(res->fi);
1787 res->fi = last_resort;
1788 if (last_resort)
1789 atomic_inc(&last_resort->fib_clntref);
1790 }
1791 trie_last_dflt = last_idx;
1792 out:;
1793 read_unlock(&fib_lock);
1794}
1795
1796static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1797 struct sk_buff *skb, struct netlink_callback *cb)
1798{
1799 int i, s_i;
1800 struct fib_alias *fa;
1801
1802 u32 xkey=htonl(key);
1803
1804 s_i=cb->args[3];
1805 i = 0;
1806
1807 list_for_each_entry(fa, fah, fa_list) {
1808 if (i < s_i) {
1809 i++;
1810 continue;
1811 }
1812 if (fa->fa_info->fib_nh == NULL) {
1813 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1814 i++;
1815 continue;
1816 }
1817 if (fa->fa_info == NULL) {
1818 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1819 i++;
1820 continue;
1821 }
1822
1823 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1824 cb->nlh->nlmsg_seq,
1825 RTM_NEWROUTE,
1826 tb->tb_id,
1827 fa->fa_type,
1828 fa->fa_scope,
1829 &xkey,
1830 plen,
1831 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001832 fa->fa_info, 0) < 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001833 cb->args[3] = i;
1834 return -1;
1835 }
1836 i++;
1837 }
1838 cb->args[3]=i;
1839 return skb->len;
1840}
1841
1842static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1843 struct netlink_callback *cb)
1844{
1845 int h, s_h;
1846 struct list_head *fa_head;
1847 struct leaf *l = NULL;
1848 s_h=cb->args[2];
1849
1850 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1851
1852 if (h < s_h)
1853 continue;
1854 if (h > s_h)
1855 memset(&cb->args[3], 0,
1856 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1857
1858 fa_head = get_fa_head(l, plen);
1859
1860 if(!fa_head)
1861 continue;
1862
1863 if(list_empty(fa_head))
1864 continue;
1865
1866 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1867 cb->args[2]=h;
1868 return -1;
1869 }
1870 }
1871 cb->args[2]=h;
1872 return skb->len;
1873}
1874
1875static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1876{
1877 int m, s_m;
1878 struct trie *t = (struct trie *) tb->tb_data;
1879
1880 s_m = cb->args[1];
1881
1882 read_lock(&fib_lock);
1883 for (m=0; m<=32; m++) {
1884
1885 if (m < s_m)
1886 continue;
1887 if (m > s_m)
1888 memset(&cb->args[2], 0,
1889 sizeof(cb->args) - 2*sizeof(cb->args[0]));
1890
1891 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1892 cb->args[1] = m;
1893 goto out;
1894 }
1895 }
1896 read_unlock(&fib_lock);
1897 cb->args[1] = m;
1898 return skb->len;
1899 out:
1900 read_unlock(&fib_lock);
1901 return -1;
1902}
1903
1904/* Fix more generic FIB names for init later */
1905
1906#ifdef CONFIG_IP_MULTIPLE_TABLES
1907struct fib_table * fib_hash_init(int id)
1908#else
1909struct fib_table * __init fib_hash_init(int id)
1910#endif
1911{
1912 struct fib_table *tb;
1913 struct trie *t;
1914
1915 if (fn_alias_kmem == NULL)
1916 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1917 sizeof(struct fib_alias),
1918 0, SLAB_HWCACHE_ALIGN,
1919 NULL, NULL);
1920
1921 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1922 GFP_KERNEL);
1923 if (tb == NULL)
1924 return NULL;
1925
1926 tb->tb_id = id;
1927 tb->tb_lookup = fn_trie_lookup;
1928 tb->tb_insert = fn_trie_insert;
1929 tb->tb_delete = fn_trie_delete;
1930 tb->tb_flush = fn_trie_flush;
1931 tb->tb_select_default = fn_trie_select_default;
1932 tb->tb_dump = fn_trie_dump;
1933 memset(tb->tb_data, 0, sizeof(struct trie));
1934
1935 t = (struct trie *) tb->tb_data;
1936
1937 trie_init(t);
1938
1939 if (id == RT_TABLE_LOCAL)
1940 trie_local=t;
1941 else if (id == RT_TABLE_MAIN)
1942 trie_main=t;
1943
1944 if (id == RT_TABLE_LOCAL)
1945 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
1946
1947 return tb;
1948}
1949
1950/* Trie dump functions */
1951
1952static void putspace_seq(struct seq_file *seq, int n)
1953{
1954 while (n--) seq_printf(seq, " ");
1955}
1956
1957static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
1958{
1959 while (bits--)
1960 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
1961}
1962
1963static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
1964 int pend, int cindex, int bits)
1965{
1966 putspace_seq(seq, indent);
1967 if (IS_LEAF(n))
1968 seq_printf(seq, "|");
1969 else
1970 seq_printf(seq, "+");
1971 if (bits) {
1972 seq_printf(seq, "%d/", cindex);
1973 printbin_seq(seq, cindex, bits);
1974 seq_printf(seq, ": ");
1975 }
1976 else
1977 seq_printf(seq, "<root>: ");
1978 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
1979
1980 if (IS_LEAF(n))
1981 seq_printf(seq, "key=%d.%d.%d.%d\n",
1982 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
1983 else {
1984 int plen=((struct tnode *)n)->pos;
1985 t_key prf=MASK_PFX(n->key, plen);
1986 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
1987 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
1988 }
1989 if (IS_LEAF(n)) {
1990 struct leaf *l=(struct leaf *)n;
1991 struct fib_alias *fa;
1992 int i;
1993 for (i=32; i>=0; i--)
1994 if(find_leaf_info(&l->list, i)) {
1995
1996 struct list_head *fa_head = get_fa_head(l, i);
1997
1998 if(!fa_head)
1999 continue;
2000
2001 if(list_empty(fa_head))
2002 continue;
2003
2004 putspace_seq(seq, indent+2);
2005 seq_printf(seq, "{/%d...dumping}\n", i);
2006
2007
2008 list_for_each_entry(fa, fa_head, fa_list) {
2009 putspace_seq(seq, indent+2);
2010 if (fa->fa_info->fib_nh == NULL) {
2011 seq_printf(seq, "Error _fib_nh=NULL\n");
2012 continue;
2013 }
2014 if (fa->fa_info == NULL) {
2015 seq_printf(seq, "Error fa_info=NULL\n");
2016 continue;
2017 }
2018
2019 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2020 fa->fa_type,
2021 fa->fa_scope,
2022 fa->fa_tos);
2023 }
2024 }
2025 }
2026 else if (IS_TNODE(n)) {
2027 struct tnode *tn=(struct tnode *)n;
2028 putspace_seq(seq, indent); seq_printf(seq, "| ");
2029 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
2030 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2031 seq_printf(seq, "}\n");
2032 putspace_seq(seq, indent); seq_printf(seq, "| ");
2033 seq_printf(seq, "{pos=%d", tn->pos);
2034 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2035 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2036 putspace_seq(seq, indent); seq_printf(seq, "| ");
2037 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2038 }
2039}
2040
2041static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2042{
2043 struct node *n=t->trie;
2044 int cindex=0;
2045 int indent=1;
2046 int pend=0;
2047 int depth = 0;
2048
2049 read_lock(&fib_lock);
2050
2051 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2052 if (n) {
2053 printnode_seq(seq, indent, n, pend, cindex, 0);
2054 if (IS_TNODE(n)) {
2055 struct tnode *tn=(struct tnode *)n;
2056 pend = tn->pos+tn->bits;
2057 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2058 indent += 3;
2059 depth++;
2060
2061 while (tn && cindex < (1 << tn->bits)) {
2062 if (tn->child[cindex]) {
2063
2064 /* Got a child */
2065
2066 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2067 if (IS_LEAF(tn->child[cindex])) {
2068 cindex++;
2069
2070 }
2071 else {
2072 /*
2073 * New tnode. Decend one level
2074 */
2075
2076 depth++;
2077 n=tn->child[cindex];
2078 tn=(struct tnode *)n;
2079 pend=tn->pos+tn->bits;
2080 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2081 indent+=3;
2082 cindex=0;
2083 }
2084 }
2085 else
2086 cindex++;
2087
2088 /*
2089 * Test if we are done
2090 */
2091
2092 while (cindex >= (1 << tn->bits)) {
2093
2094 /*
2095 * Move upwards and test for root
2096 * pop off all traversed nodes
2097 */
2098
2099 if (NODE_PARENT(tn) == NULL) {
2100 tn = NULL;
2101 n = NULL;
2102 break;
2103 }
2104 else {
2105 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2106 tn = NODE_PARENT(tn);
2107 cindex++;
2108 n=(struct node *)tn;
2109 pend=tn->pos+tn->bits;
2110 indent-=3;
2111 depth--;
2112 }
2113 }
2114 }
2115 }
2116 else n = NULL;
2117 }
2118 else seq_printf(seq, "------ trie is empty\n");
2119
2120 read_unlock(&fib_lock);
2121}
2122
2123static struct trie_stat *trie_stat_new(void)
2124{
2125 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2126 int i;
2127
2128 if(s) {
2129 s->totdepth = 0;
2130 s->maxdepth = 0;
2131 s->tnodes = 0;
2132 s->leaves = 0;
2133 s->nullpointers = 0;
2134
2135 for(i=0; i< MAX_CHILDS; i++)
2136 s->nodesizes[i] = 0;
2137 }
2138 return s;
2139}
2140
2141static struct trie_stat *trie_collect_stats(struct trie *t)
2142{
2143 struct node *n=t->trie;
2144 struct trie_stat *s = trie_stat_new();
2145 int cindex = 0;
2146 int indent = 1;
2147 int pend = 0;
2148 int depth = 0;
2149
2150 read_lock(&fib_lock);
2151
2152 if (s) {
2153 if (n) {
2154 if (IS_TNODE(n)) {
2155 struct tnode *tn = (struct tnode *)n;
2156 pend=tn->pos+tn->bits;
2157 indent += 3;
2158 s->nodesizes[tn->bits]++;
2159 depth++;
2160
2161 while (tn && cindex < (1 << tn->bits)) {
2162 if (tn->child[cindex]) {
2163 /* Got a child */
2164
2165 if (IS_LEAF(tn->child[cindex])) {
2166 cindex++;
2167
2168 /* stats */
2169 if (depth > s->maxdepth)
2170 s->maxdepth = depth;
2171 s->totdepth += depth;
2172 s->leaves++;
2173 }
2174
2175 else {
2176 /*
2177 * New tnode. Decend one level
2178 */
2179
2180 s->tnodes++;
2181 s->nodesizes[tn->bits]++;
2182 depth++;
2183
2184 n = tn->child[cindex];
2185 tn = (struct tnode *)n;
2186 pend = tn->pos+tn->bits;
2187
2188 indent += 3;
2189 cindex = 0;
2190 }
2191 }
2192 else {
2193 cindex++;
2194 s->nullpointers++;
2195 }
2196
2197 /*
2198 * Test if we are done
2199 */
2200
2201 while (cindex >= (1 << tn->bits)) {
2202
2203 /*
2204 * Move upwards and test for root
2205 * pop off all traversed nodes
2206 */
2207
2208
2209 if (NODE_PARENT(tn) == NULL) {
2210 tn = NULL;
2211 n = NULL;
2212 break;
2213 }
2214 else {
2215 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2216 tn = NODE_PARENT(tn);
2217 cindex++;
2218 n = (struct node *)tn;
2219 pend=tn->pos+tn->bits;
2220 indent -= 3;
2221 depth--;
2222 }
2223 }
2224 }
2225 }
2226 else n = NULL;
2227 }
2228 }
2229
2230 read_unlock(&fib_lock);
2231 return s;
2232}
2233
2234#ifdef CONFIG_PROC_FS
2235
2236static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2237{
2238 return NULL;
2239}
2240
2241static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2242{
2243 return NULL;
2244}
2245
2246static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2247{
2248 void *v = NULL;
2249
2250 if (ip_fib_main_table)
2251 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
2252 return v;
2253}
2254
2255static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2256{
2257 ++*pos;
2258 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
2259}
2260
2261static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2262{
2263
2264}
2265
2266/*
2267 * This outputs /proc/net/fib_triestats
2268 *
2269 * It always works in backward compatibility mode.
2270 * The format of the file is not supposed to be changed.
2271 */
2272
2273static void collect_and_show(struct trie *t, struct seq_file *seq)
2274{
2275 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2276 int i, max, pointers;
2277 struct trie_stat *stat;
2278 int avdepth;
2279
2280 stat = trie_collect_stats(t);
2281
2282 bytes=0;
2283 seq_printf(seq, "trie=%p\n", t);
2284
2285 if (stat) {
2286 if (stat->leaves)
2287 avdepth=stat->totdepth*100 / stat->leaves;
2288 else
2289 avdepth=0;
2290 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2291 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2292
2293 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2294 bytes += sizeof(struct leaf) * stat->leaves;
2295 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2296 bytes += sizeof(struct tnode) * stat->tnodes;
2297
2298 max = MAX_CHILDS-1;
2299
2300 while (max >= 0 && stat->nodesizes[max] == 0)
2301 max--;
2302 pointers = 0;
2303
2304 for (i = 1; i <= max; i++)
2305 if (stat->nodesizes[i] != 0) {
2306 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2307 pointers += (1<<i) * stat->nodesizes[i];
2308 }
2309 seq_printf(seq, "\n");
2310 seq_printf(seq, "Pointers: %d\n", pointers);
2311 bytes += sizeof(struct node *) * pointers;
2312 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2313 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2314
2315 kfree(stat);
2316 }
2317
2318#ifdef CONFIG_IP_FIB_TRIE_STATS
2319 seq_printf(seq, "Counters:\n---------\n");
2320 seq_printf(seq,"gets = %d\n", t->stats.gets);
2321 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2322 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2323 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2324 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2325#ifdef CLEAR_STATS
2326 memset(&(t->stats), 0, sizeof(t->stats));
2327#endif
2328#endif /* CONFIG_IP_FIB_TRIE_STATS */
2329}
2330
2331static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2332{
2333 char bf[128];
2334
2335 if (v == SEQ_START_TOKEN) {
2336 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2337 sizeof(struct leaf), sizeof(struct tnode));
2338 if (trie_local)
2339 collect_and_show(trie_local, seq);
2340
2341 if (trie_main)
2342 collect_and_show(trie_main, seq);
2343 }
2344 else {
2345 snprintf(bf, sizeof(bf),
2346 "*\t%08X\t%08X", 200, 400);
2347
2348 seq_printf(seq, "%-127s\n", bf);
2349 }
2350 return 0;
2351}
2352
2353static struct seq_operations fib_triestat_seq_ops = {
2354 .start = fib_triestat_seq_start,
2355 .next = fib_triestat_seq_next,
2356 .stop = fib_triestat_seq_stop,
2357 .show = fib_triestat_seq_show,
2358};
2359
2360static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2361{
2362 struct seq_file *seq;
2363 int rc = -ENOMEM;
2364
2365 rc = seq_open(file, &fib_triestat_seq_ops);
2366 if (rc)
2367 goto out_kfree;
2368
2369 seq = file->private_data;
2370out:
2371 return rc;
2372out_kfree:
2373 goto out;
2374}
2375
2376static struct file_operations fib_triestat_seq_fops = {
2377 .owner = THIS_MODULE,
2378 .open = fib_triestat_seq_open,
2379 .read = seq_read,
2380 .llseek = seq_lseek,
2381 .release = seq_release_private,
2382};
2383
2384int __init fib_stat_proc_init(void)
2385{
2386 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2387 return -ENOMEM;
2388 return 0;
2389}
2390
2391void __init fib_stat_proc_exit(void)
2392{
2393 proc_net_remove("fib_triestat");
2394}
2395
2396static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2397{
2398 return NULL;
2399}
2400
2401static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2402{
2403 return NULL;
2404}
2405
2406static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2407{
2408 void *v = NULL;
2409
2410 if (ip_fib_main_table)
2411 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
2412 return v;
2413}
2414
2415static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2416{
2417 ++*pos;
2418 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
2419}
2420
2421static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2422{
2423
2424}
2425
2426/*
2427 * This outputs /proc/net/fib_trie.
2428 *
2429 * It always works in backward compatibility mode.
2430 * The format of the file is not supposed to be changed.
2431 */
2432
2433static int fib_trie_seq_show(struct seq_file *seq, void *v)
2434{
2435 char bf[128];
2436
2437 if (v == SEQ_START_TOKEN) {
2438 if (trie_local)
2439 trie_dump_seq(seq, trie_local);
2440
2441 if (trie_main)
2442 trie_dump_seq(seq, trie_main);
2443 }
2444
2445 else {
2446 snprintf(bf, sizeof(bf),
2447 "*\t%08X\t%08X", 200, 400);
2448 seq_printf(seq, "%-127s\n", bf);
2449 }
2450
2451 return 0;
2452}
2453
2454static struct seq_operations fib_trie_seq_ops = {
2455 .start = fib_trie_seq_start,
2456 .next = fib_trie_seq_next,
2457 .stop = fib_trie_seq_stop,
2458 .show = fib_trie_seq_show,
2459};
2460
2461static int fib_trie_seq_open(struct inode *inode, struct file *file)
2462{
2463 struct seq_file *seq;
2464 int rc = -ENOMEM;
2465
2466 rc = seq_open(file, &fib_trie_seq_ops);
2467 if (rc)
2468 goto out_kfree;
2469
2470 seq = file->private_data;
2471out:
2472 return rc;
2473out_kfree:
2474 goto out;
2475}
2476
2477static struct file_operations fib_trie_seq_fops = {
2478 .owner = THIS_MODULE,
2479 .open = fib_trie_seq_open,
2480 .read = seq_read,
2481 .llseek = seq_lseek,
2482 .release = seq_release_private,
2483};
2484
2485int __init fib_proc_init(void)
2486{
2487 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2488 return -ENOMEM;
2489 return 0;
2490}
2491
2492void __init fib_proc_exit(void)
2493{
2494 proc_net_remove("fib_trie");
2495}
2496
2497#endif /* CONFIG_PROC_FS */