blob: 1cb65305e1029b9bd47de0f041a469f098cb550f [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
12 *
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
16 *
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
26 *
27 *
28 * Code from fib_hash has been reused which includes the following header:
29 *
30 *
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
34 *
35 * IPv4 FIB: lookup engine and maintenance routines.
36 *
37 *
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
39 *
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080044 *
45 * Substantial contributions to this work comes from:
46 *
47 * David S. Miller, <davem@davemloft.net>
48 * Stephen Hemminger <shemminger@osdl.org>
49 * Paul E. McKenney <paulmck@us.ibm.com>
50 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070051 */
52
Robert Olsson550e29b2006-04-04 12:53:35 -070053#define VERSION "0.407"
Robert Olsson19baf832005-06-21 12:43:18 -070054
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <asm/uaccess.h>
56#include <asm/system.h>
57#include <asm/bitops.h>
58#include <linux/types.h>
59#include <linux/kernel.h>
60#include <linux/sched.h>
61#include <linux/mm.h>
62#include <linux/string.h>
63#include <linux/socket.h>
64#include <linux/sockios.h>
65#include <linux/errno.h>
66#include <linux/in.h>
67#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080068#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070069#include <linux/netdevice.h>
70#include <linux/if_arp.h>
71#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070072#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070073#include <linux/skbuff.h>
74#include <linux/netlink.h>
75#include <linux/init.h>
76#include <linux/list.h>
77#include <net/ip.h>
78#include <net/protocol.h>
79#include <net/route.h>
80#include <net/tcp.h>
81#include <net/sock.h>
82#include <net/ip_fib.h>
83#include "fib_lookup.h"
84
85#undef CONFIG_IP_FIB_TRIE_STATS
Robert Olsson06ef9212006-03-20 21:35:01 -080086#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070087
Robert Olsson19baf832005-06-21 12:43:18 -070088#define KEYLENGTH (8*sizeof(t_key))
89#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
90#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
91
Robert Olsson19baf832005-06-21 12:43:18 -070092typedef unsigned int t_key;
93
94#define T_TNODE 0
95#define T_LEAF 1
96#define NODE_TYPE_MASK 0x1UL
Olof Johansson91b9a272005-08-09 20:24:39 -070097#define NODE_PARENT(node) \
Robert Olsson2373ce12005-08-25 13:01:29 -070098 ((struct tnode *)rcu_dereference(((node)->parent & ~NODE_TYPE_MASK)))
99
100#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
101
102#define NODE_SET_PARENT(node, ptr) \
103 rcu_assign_pointer((node)->parent, \
104 ((unsigned long)(ptr)) | NODE_TYPE(node))
Robert Olsson19baf832005-06-21 12:43:18 -0700105
Olof Johansson91b9a272005-08-09 20:24:39 -0700106#define IS_TNODE(n) (!(n->parent & T_LEAF))
107#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -0700108
109struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -0700110 t_key key;
111 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700112};
113
114struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700115 t_key key;
116 unsigned long parent;
Robert Olsson19baf832005-06-21 12:43:18 -0700117 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700118 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700119};
120
121struct leaf_info {
122 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700123 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700124 int plen;
125 struct list_head falh;
126};
127
128struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700129 t_key key;
130 unsigned long parent;
131 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
132 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
133 unsigned short full_children; /* KEYLENGTH bits needed */
134 unsigned short empty_children; /* KEYLENGTH bits needed */
Robert Olsson2373ce12005-08-25 13:01:29 -0700135 struct rcu_head rcu;
Olof Johansson91b9a272005-08-09 20:24:39 -0700136 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700137};
138
139#ifdef CONFIG_IP_FIB_TRIE_STATS
140struct trie_use_stats {
141 unsigned int gets;
142 unsigned int backtrack;
143 unsigned int semantic_match_passed;
144 unsigned int semantic_match_miss;
145 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700146 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700147};
148#endif
149
150struct trie_stat {
151 unsigned int totdepth;
152 unsigned int maxdepth;
153 unsigned int tnodes;
154 unsigned int leaves;
155 unsigned int nullpointers;
Robert Olsson06ef9212006-03-20 21:35:01 -0800156 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700157};
Robert Olsson19baf832005-06-21 12:43:18 -0700158
159struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700160 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700161#ifdef CONFIG_IP_FIB_TRIE_STATS
162 struct trie_use_stats stats;
163#endif
Olof Johansson91b9a272005-08-09 20:24:39 -0700164 int size;
Robert Olsson19baf832005-06-21 12:43:18 -0700165 unsigned int revision;
166};
167
Robert Olsson19baf832005-06-21 12:43:18 -0700168static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
169static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700170static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700171static struct tnode *inflate(struct trie *t, struct tnode *tn);
172static struct tnode *halve(struct trie *t, struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700173static void tnode_free(struct tnode *tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700174
Eric Dumazetba899662005-08-26 12:05:31 -0700175static kmem_cache_t *fn_alias_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700176static struct trie *trie_local = NULL, *trie_main = NULL;
177
Robert Olsson2373ce12005-08-25 13:01:29 -0700178
179/* rcu_read_lock needs to be hold by caller from readside */
180
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700181static inline struct node *tnode_get_child(struct tnode *tn, int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700182{
Olof Johansson91b9a272005-08-09 20:24:39 -0700183 BUG_ON(i >= 1 << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700184
Robert Olsson2373ce12005-08-25 13:01:29 -0700185 return rcu_dereference(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700186}
187
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700188static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700189{
Olof Johansson91b9a272005-08-09 20:24:39 -0700190 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700191}
192
Robert Olsson19baf832005-06-21 12:43:18 -0700193static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
194{
Olof Johansson91b9a272005-08-09 20:24:39 -0700195 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700196 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700197 else
Robert Olsson19baf832005-06-21 12:43:18 -0700198 return 0;
199}
200
201static inline int tkey_equals(t_key a, t_key b)
202{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700203 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700204}
205
206static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
207{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700208 if (bits == 0 || offset >= KEYLENGTH)
209 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700210 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
211 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700212}
Robert Olsson19baf832005-06-21 12:43:18 -0700213
214static inline int tkey_mismatch(t_key a, int offset, t_key b)
215{
216 t_key diff = a ^ b;
217 int i = offset;
218
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700219 if (!diff)
220 return 0;
221 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700222 i++;
223 return i;
224}
225
Robert Olsson19baf832005-06-21 12:43:18 -0700226/*
227 To understand this stuff, an understanding of keys and all their bits is
228 necessary. Every node in the trie has a key associated with it, but not
229 all of the bits in that key are significant.
230
231 Consider a node 'n' and its parent 'tp'.
232
233 If n is a leaf, every bit in its key is significant. Its presence is
Robert Olsson772cb712005-09-19 15:31:18 -0700234 necessitated by path compression, since during a tree traversal (when
Robert Olsson19baf832005-06-21 12:43:18 -0700235 searching for a leaf - unless we are doing an insertion) we will completely
236 ignore all skipped bits we encounter. Thus we need to verify, at the end of
237 a potentially successful search, that we have indeed been walking the
238 correct key path.
239
240 Note that we can never "miss" the correct key in the tree if present by
241 following the wrong path. Path compression ensures that segments of the key
242 that are the same for all keys with a given prefix are skipped, but the
243 skipped part *is* identical for each node in the subtrie below the skipped
244 bit! trie_insert() in this implementation takes care of that - note the
245 call to tkey_sub_equals() in trie_insert().
246
247 if n is an internal node - a 'tnode' here, the various parts of its key
248 have many different meanings.
249
250 Example:
251 _________________________________________________________________
252 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
253 -----------------------------------------------------------------
254 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
255
256 _________________________________________________________________
257 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
258 -----------------------------------------------------------------
259 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
260
261 tp->pos = 7
262 tp->bits = 3
263 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700264 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700265
266 First, let's just ignore the bits that come before the parent tp, that is
267 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
268 not use them for anything.
269
270 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
271 index into the parent's child array. That is, they will be used to find
272 'n' among tp's children.
273
274 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
275 for the node n.
276
277 All the bits we have seen so far are significant to the node n. The rest
278 of the bits are really not needed or indeed known in n->key.
279
280 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
281 n's child array, and will of course be different for each child.
282
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700283
Robert Olsson19baf832005-06-21 12:43:18 -0700284 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
285 at this point.
286
287*/
288
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700289static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700290{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700291 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700292}
293
294static int halve_threshold = 25;
295static int inflate_threshold = 50;
Robert Olssone6308be2005-10-04 13:01:58 -0700296static int halve_threshold_root = 15;
297static int inflate_threshold_root = 25;
Robert Olsson19baf832005-06-21 12:43:18 -0700298
Robert Olsson2373ce12005-08-25 13:01:29 -0700299
300static void __alias_free_mem(struct rcu_head *head)
301{
302 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
303 kmem_cache_free(fn_alias_kmem, fa);
304}
305
306static inline void alias_free_mem_rcu(struct fib_alias *fa)
307{
308 call_rcu(&fa->rcu, __alias_free_mem);
309}
310
311static void __leaf_free_rcu(struct rcu_head *head)
312{
313 kfree(container_of(head, struct leaf, rcu));
314}
315
Robert Olsson2373ce12005-08-25 13:01:29 -0700316static void __leaf_info_free_rcu(struct rcu_head *head)
317{
318 kfree(container_of(head, struct leaf_info, rcu));
319}
320
321static inline void free_leaf_info(struct leaf_info *leaf)
322{
323 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
324}
325
326static struct tnode *tnode_alloc(unsigned int size)
327{
328 struct page *pages;
329
330 if (size <= PAGE_SIZE)
331 return kcalloc(size, 1, GFP_KERNEL);
332
333 pages = alloc_pages(GFP_KERNEL|__GFP_ZERO, get_order(size));
334 if (!pages)
335 return NULL;
336
337 return page_address(pages);
338}
339
340static void __tnode_free_rcu(struct rcu_head *head)
341{
342 struct tnode *tn = container_of(head, struct tnode, rcu);
343 unsigned int size = sizeof(struct tnode) +
344 (1 << tn->bits) * sizeof(struct node *);
345
346 if (size <= PAGE_SIZE)
347 kfree(tn);
348 else
349 free_pages((unsigned long)tn, get_order(size));
350}
351
352static inline void tnode_free(struct tnode *tn)
353{
Robert Olsson550e29b2006-04-04 12:53:35 -0700354 if(IS_LEAF(tn)) {
355 struct leaf *l = (struct leaf *) tn;
356 call_rcu_bh(&l->rcu, __leaf_free_rcu);
357 }
358 else
359 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700360}
361
Robert Olsson19baf832005-06-21 12:43:18 -0700362static struct leaf *leaf_new(void)
363{
364 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700365 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700366 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700367 INIT_HLIST_HEAD(&l->list);
368 }
369 return l;
370}
371
372static struct leaf_info *leaf_info_new(int plen)
373{
374 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700375 if (li) {
376 li->plen = plen;
377 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700378 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700379 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700380}
381
Robert Olsson19baf832005-06-21 12:43:18 -0700382static struct tnode* tnode_new(t_key key, int pos, int bits)
383{
384 int nchildren = 1<<bits;
385 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700386 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700387
Olof Johansson91b9a272005-08-09 20:24:39 -0700388 if (tn) {
Robert Olsson19baf832005-06-21 12:43:18 -0700389 memset(tn, 0, sz);
Robert Olsson2373ce12005-08-25 13:01:29 -0700390 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700391 tn->pos = pos;
392 tn->bits = bits;
393 tn->key = key;
394 tn->full_children = 0;
395 tn->empty_children = 1<<bits;
396 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700397
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700398 pr_debug("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
399 (unsigned int) (sizeof(struct node) * 1<<bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700400 return tn;
401}
402
Robert Olsson19baf832005-06-21 12:43:18 -0700403/*
404 * Check whether a tnode 'n' is "full", i.e. it is an internal node
405 * and no bits are skipped. See discussion in dyntree paper p. 6
406 */
407
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700408static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700409{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700410 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700411 return 0;
412
413 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
414}
415
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700416static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700417{
418 tnode_put_child_reorg(tn, i, n, -1);
419}
420
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700421 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700422 * Add a child at position i overwriting the old value.
423 * Update the value of full_children and empty_children.
424 */
425
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700426static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700427{
Robert Olsson2373ce12005-08-25 13:01:29 -0700428 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700429 int isfull;
430
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700431 BUG_ON(i >= 1<<tn->bits);
432
Robert Olsson19baf832005-06-21 12:43:18 -0700433
434 /* update emptyChildren */
435 if (n == NULL && chi != NULL)
436 tn->empty_children++;
437 else if (n != NULL && chi == NULL)
438 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700439
Robert Olsson19baf832005-06-21 12:43:18 -0700440 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700441 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700442 wasfull = tnode_full(tn, chi);
443
444 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700445 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700446 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700447 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700448 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700449
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700450 if (n)
451 NODE_SET_PARENT(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700452
Robert Olsson2373ce12005-08-25 13:01:29 -0700453 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700454}
455
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700456static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700457{
458 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700459 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700460 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700461 int inflate_threshold_use;
462 int halve_threshold_use;
Robert Olsson19baf832005-06-21 12:43:18 -0700463
464 if (!tn)
465 return NULL;
466
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700467 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
468 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700469
470 /* No children */
471 if (tn->empty_children == tnode_child_length(tn)) {
472 tnode_free(tn);
473 return NULL;
474 }
475 /* One child */
476 if (tn->empty_children == tnode_child_length(tn) - 1)
477 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700478 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700479
Olof Johansson91b9a272005-08-09 20:24:39 -0700480 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700481 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700482 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700483
484 /* compress one level */
Robert Olsson2373ce12005-08-25 13:01:29 -0700485 NODE_SET_PARENT(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700486 tnode_free(tn);
487 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700488 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700489 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700490 * Double as long as the resulting node has a number of
491 * nonempty nodes that are above the threshold.
492 */
493
494 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700495 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
496 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700497 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700498 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700499 * children in the *doubled* node is at least 'high'."
500 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700501 * 'high' in this instance is the variable 'inflate_threshold'. It
502 * is expressed as a percentage, so we multiply it with
503 * tnode_child_length() and instead of multiplying by 2 (since the
504 * child array will be doubled by inflate()) and multiplying
505 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700506 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700507 *
508 * The left-hand side may look a bit weird: tnode_child_length(tn)
509 * - tn->empty_children is of course the number of non-null children
510 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700511 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700512 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700513 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700514 *
Robert Olsson19baf832005-06-21 12:43:18 -0700515 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700516 *
Robert Olsson19baf832005-06-21 12:43:18 -0700517 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700518 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700519 * tn->full_children;
520 *
521 * new_child_length = tnode_child_length(tn) * 2;
522 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700523 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700524 * new_child_length;
525 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700526 *
527 * ...and so on, tho it would mess up the while () loop.
528 *
Robert Olsson19baf832005-06-21 12:43:18 -0700529 * anyway,
530 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
531 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700532 *
Robert Olsson19baf832005-06-21 12:43:18 -0700533 * avoid a division:
534 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
535 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 *
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700538 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700539 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700540 *
Robert Olsson19baf832005-06-21 12:43:18 -0700541 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700542 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700543 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700545 *
Robert Olsson19baf832005-06-21 12:43:18 -0700546 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700547 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700548 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700549 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700550 *
Robert Olsson19baf832005-06-21 12:43:18 -0700551 */
552
553 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700554
Robert Olssone6308be2005-10-04 13:01:58 -0700555 /* Keep root node larger */
556
557 if(!tn->parent)
558 inflate_threshold_use = inflate_threshold_root;
559 else
560 inflate_threshold_use = inflate_threshold;
561
Robert Olsson2f368952005-07-05 15:02:40 -0700562 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700563 while ((tn->full_children > 0 &&
564 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
Robert Olssone6308be2005-10-04 13:01:58 -0700565 inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700566
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700567 old_tn = tn;
568 tn = inflate(t, tn);
569 if (IS_ERR(tn)) {
570 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700571#ifdef CONFIG_IP_FIB_TRIE_STATS
572 t->stats.resize_node_skipped++;
573#endif
574 break;
575 }
Robert Olsson19baf832005-06-21 12:43:18 -0700576 }
577
578 check_tnode(tn);
579
580 /*
581 * Halve as long as the number of empty children in this
582 * node is above threshold.
583 */
Robert Olsson2f368952005-07-05 15:02:40 -0700584
Robert Olssone6308be2005-10-04 13:01:58 -0700585
586 /* Keep root node larger */
587
588 if(!tn->parent)
589 halve_threshold_use = halve_threshold_root;
590 else
591 halve_threshold_use = halve_threshold;
592
Robert Olsson2f368952005-07-05 15:02:40 -0700593 err = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700594 while (tn->bits > 1 &&
595 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700596 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700597
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700598 old_tn = tn;
599 tn = halve(t, tn);
600 if (IS_ERR(tn)) {
601 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700602#ifdef CONFIG_IP_FIB_TRIE_STATS
603 t->stats.resize_node_skipped++;
604#endif
605 break;
606 }
607 }
608
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700609
Robert Olsson19baf832005-06-21 12:43:18 -0700610 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700611 if (tn->empty_children == tnode_child_length(tn) - 1)
612 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700613 struct node *n;
614
Olof Johansson91b9a272005-08-09 20:24:39 -0700615 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700616 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700617 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700618
619 /* compress one level */
620
Robert Olsson2373ce12005-08-25 13:01:29 -0700621 NODE_SET_PARENT(n, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700622 tnode_free(tn);
623 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700624 }
625
626 return (struct node *) tn;
627}
628
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700629static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700630{
631 struct tnode *inode;
632 struct tnode *oldtnode = tn;
633 int olen = tnode_child_length(tn);
634 int i;
635
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700636 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700637
638 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
639
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700640 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700641 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700642
643 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700644 * Preallocate and store tnodes before the actual work so we
645 * don't get into an inconsistent state if memory allocation
646 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700647 * of tnode is ignored.
648 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700649
650 for (i = 0; i < olen; i++) {
Robert Olsson2f368952005-07-05 15:02:40 -0700651 struct tnode *inode = (struct tnode *) tnode_get_child(oldtnode, i);
652
653 if (inode &&
654 IS_TNODE(inode) &&
655 inode->pos == oldtnode->pos + oldtnode->bits &&
656 inode->bits > 1) {
657 struct tnode *left, *right;
Robert Olsson2f368952005-07-05 15:02:40 -0700658 t_key m = TKEY_GET_MASK(inode->pos, 1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700659
Robert Olsson2f368952005-07-05 15:02:40 -0700660 left = tnode_new(inode->key&(~m), inode->pos + 1,
661 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700662 if (!left)
663 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700664
Robert Olsson2f368952005-07-05 15:02:40 -0700665 right = tnode_new(inode->key|m, inode->pos + 1,
666 inode->bits - 1);
667
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700668 if (!right) {
669 tnode_free(left);
670 goto nomem;
671 }
Robert Olsson2f368952005-07-05 15:02:40 -0700672
673 put_child(t, tn, 2*i, (struct node *) left);
674 put_child(t, tn, 2*i+1, (struct node *) right);
675 }
676 }
677
Olof Johansson91b9a272005-08-09 20:24:39 -0700678 for (i = 0; i < olen; i++) {
Robert Olsson19baf832005-06-21 12:43:18 -0700679 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700680 struct tnode *left, *right;
681 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700682
Robert Olsson19baf832005-06-21 12:43:18 -0700683 /* An empty child */
684 if (node == NULL)
685 continue;
686
687 /* A leaf or an internal node with skipped bits */
688
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700689 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700690 tn->pos + tn->bits - 1) {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700691 if (tkey_extract_bits(node->key, oldtnode->pos + oldtnode->bits,
Robert Olsson19baf832005-06-21 12:43:18 -0700692 1) == 0)
693 put_child(t, tn, 2*i, node);
694 else
695 put_child(t, tn, 2*i+1, node);
696 continue;
697 }
698
699 /* An internal node with two children */
700 inode = (struct tnode *) node;
701
702 if (inode->bits == 1) {
703 put_child(t, tn, 2*i, inode->child[0]);
704 put_child(t, tn, 2*i+1, inode->child[1]);
705
706 tnode_free(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700707 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700708 }
709
Olof Johansson91b9a272005-08-09 20:24:39 -0700710 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700711
Olof Johansson91b9a272005-08-09 20:24:39 -0700712 /* We will replace this node 'inode' with two new
713 * ones, 'left' and 'right', each with half of the
714 * original children. The two new nodes will have
715 * a position one bit further down the key and this
716 * means that the "significant" part of their keys
717 * (see the discussion near the top of this file)
718 * will differ by one bit, which will be "0" in
719 * left's key and "1" in right's key. Since we are
720 * moving the key position by one step, the bit that
721 * we are moving away from - the bit at position
722 * (inode->pos) - is the one that will differ between
723 * left and right. So... we synthesize that bit in the
724 * two new keys.
725 * The mask 'm' below will be a single "one" bit at
726 * the position (inode->pos)
727 */
Robert Olsson19baf832005-06-21 12:43:18 -0700728
Olof Johansson91b9a272005-08-09 20:24:39 -0700729 /* Use the old key, but set the new significant
730 * bit to zero.
731 */
Robert Olsson19baf832005-06-21 12:43:18 -0700732
Olof Johansson91b9a272005-08-09 20:24:39 -0700733 left = (struct tnode *) tnode_get_child(tn, 2*i);
734 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700735
Olof Johansson91b9a272005-08-09 20:24:39 -0700736 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700737
Olof Johansson91b9a272005-08-09 20:24:39 -0700738 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
739 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700740
Olof Johansson91b9a272005-08-09 20:24:39 -0700741 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700742
Olof Johansson91b9a272005-08-09 20:24:39 -0700743 size = tnode_child_length(left);
744 for (j = 0; j < size; j++) {
745 put_child(t, left, j, inode->child[j]);
746 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700747 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700748 put_child(t, tn, 2*i, resize(t, left));
749 put_child(t, tn, 2*i+1, resize(t, right));
750
751 tnode_free(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700752 }
753 tnode_free(oldtnode);
754 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700755nomem:
756 {
757 int size = tnode_child_length(tn);
758 int j;
759
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700760 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700761 if (tn->child[j])
762 tnode_free((struct tnode *)tn->child[j]);
763
764 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700765
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700766 return ERR_PTR(-ENOMEM);
767 }
Robert Olsson19baf832005-06-21 12:43:18 -0700768}
769
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700770static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700771{
772 struct tnode *oldtnode = tn;
773 struct node *left, *right;
774 int i;
775 int olen = tnode_child_length(tn);
776
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700777 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700778
779 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700780
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700781 if (!tn)
782 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700783
784 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700785 * Preallocate and store tnodes before the actual work so we
786 * don't get into an inconsistent state if memory allocation
787 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700788 * of tnode is ignored.
789 */
790
Olof Johansson91b9a272005-08-09 20:24:39 -0700791 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700792 left = tnode_get_child(oldtnode, i);
793 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700794
Robert Olsson2f368952005-07-05 15:02:40 -0700795 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700796 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700797 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700798
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700799 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700800
801 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700802 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700803
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700804 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700805 }
Robert Olsson2f368952005-07-05 15:02:40 -0700806
Robert Olsson2f368952005-07-05 15:02:40 -0700807 }
Robert Olsson19baf832005-06-21 12:43:18 -0700808
Olof Johansson91b9a272005-08-09 20:24:39 -0700809 for (i = 0; i < olen; i += 2) {
810 struct tnode *newBinNode;
811
Robert Olsson19baf832005-06-21 12:43:18 -0700812 left = tnode_get_child(oldtnode, i);
813 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700814
Robert Olsson19baf832005-06-21 12:43:18 -0700815 /* At least one of the children is empty */
816 if (left == NULL) {
817 if (right == NULL) /* Both are empty */
818 continue;
819 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700820 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700821 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700822
823 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700824 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700825 continue;
826 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700827
Robert Olsson19baf832005-06-21 12:43:18 -0700828 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700829 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
830 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700831 put_child(t, newBinNode, 0, left);
832 put_child(t, newBinNode, 1, right);
833 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700834 }
835 tnode_free(oldtnode);
836 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700837nomem:
838 {
839 int size = tnode_child_length(tn);
840 int j;
841
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700842 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700843 if (tn->child[j])
844 tnode_free((struct tnode *)tn->child[j]);
845
846 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700847
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700848 return ERR_PTR(-ENOMEM);
849 }
Robert Olsson19baf832005-06-21 12:43:18 -0700850}
851
Olof Johansson91b9a272005-08-09 20:24:39 -0700852static void trie_init(struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -0700853{
Olof Johansson91b9a272005-08-09 20:24:39 -0700854 if (!t)
855 return;
856
857 t->size = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700858 rcu_assign_pointer(t->trie, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700859 t->revision = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700860#ifdef CONFIG_IP_FIB_TRIE_STATS
Olof Johansson91b9a272005-08-09 20:24:39 -0700861 memset(&t->stats, 0, sizeof(struct trie_use_stats));
Robert Olsson19baf832005-06-21 12:43:18 -0700862#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700863}
864
Robert Olsson772cb712005-09-19 15:31:18 -0700865/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700866 via get_fa_head and dump */
867
Robert Olsson772cb712005-09-19 15:31:18 -0700868static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700869{
Robert Olsson772cb712005-09-19 15:31:18 -0700870 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700871 struct hlist_node *node;
872 struct leaf_info *li;
873
Robert Olsson2373ce12005-08-25 13:01:29 -0700874 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700875 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700876 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700877
Robert Olsson19baf832005-06-21 12:43:18 -0700878 return NULL;
879}
880
881static inline struct list_head * get_fa_head(struct leaf *l, int plen)
882{
Robert Olsson772cb712005-09-19 15:31:18 -0700883 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700884
Olof Johansson91b9a272005-08-09 20:24:39 -0700885 if (!li)
886 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700887
Olof Johansson91b9a272005-08-09 20:24:39 -0700888 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700889}
890
891static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
892{
Robert Olsson2373ce12005-08-25 13:01:29 -0700893 struct leaf_info *li = NULL, *last = NULL;
894 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700895
Robert Olsson2373ce12005-08-25 13:01:29 -0700896 if (hlist_empty(head)) {
897 hlist_add_head_rcu(&new->hlist, head);
898 } else {
899 hlist_for_each_entry(li, node, head, hlist) {
900 if (new->plen > li->plen)
901 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700902
Robert Olsson2373ce12005-08-25 13:01:29 -0700903 last = li;
904 }
905 if (last)
906 hlist_add_after_rcu(&last->hlist, &new->hlist);
907 else
908 hlist_add_before_rcu(&new->hlist, &li->hlist);
909 }
Robert Olsson19baf832005-06-21 12:43:18 -0700910}
911
Robert Olsson2373ce12005-08-25 13:01:29 -0700912/* rcu_read_lock needs to be hold by caller from readside */
913
Robert Olsson19baf832005-06-21 12:43:18 -0700914static struct leaf *
915fib_find_node(struct trie *t, u32 key)
916{
917 int pos;
918 struct tnode *tn;
919 struct node *n;
920
921 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700922 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700923
924 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
925 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700926
Robert Olsson19baf832005-06-21 12:43:18 -0700927 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700928
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700929 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700930 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700931 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700932 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700933 break;
934 }
935 /* Case we have found a leaf. Compare prefixes */
936
Olof Johansson91b9a272005-08-09 20:24:39 -0700937 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
938 return (struct leaf *)n;
939
Robert Olsson19baf832005-06-21 12:43:18 -0700940 return NULL;
941}
942
943static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
944{
Robert Olsson19baf832005-06-21 12:43:18 -0700945 int wasfull;
946 t_key cindex, key;
947 struct tnode *tp = NULL;
948
Robert Olsson19baf832005-06-21 12:43:18 -0700949 key = tn->key;
Robert Olsson19baf832005-06-21 12:43:18 -0700950
951 while (tn != NULL && NODE_PARENT(tn) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700952
953 tp = NODE_PARENT(tn);
954 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
955 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
956 tn = (struct tnode *) resize (t, (struct tnode *)tn);
957 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700958
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700959 if (!NODE_PARENT(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700960 break;
961
962 tn = NODE_PARENT(tn);
963 }
964 /* Handle last (top) tnode */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700965 if (IS_TNODE(tn))
Robert Olsson19baf832005-06-21 12:43:18 -0700966 tn = (struct tnode*) resize(t, (struct tnode *)tn);
967
968 return (struct node*) tn;
969}
970
Robert Olsson2373ce12005-08-25 13:01:29 -0700971/* only used from updater-side */
972
Robert Olssonf835e472005-06-28 15:00:39 -0700973static struct list_head *
974fib_insert_node(struct trie *t, int *err, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700975{
976 int pos, newpos;
977 struct tnode *tp = NULL, *tn = NULL;
978 struct node *n;
979 struct leaf *l;
980 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700981 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700982 struct leaf_info *li;
983 t_key cindex;
984
985 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700986 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700987
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700988 /* If we point to NULL, stop. Either the tree is empty and we should
989 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -0700990 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700991 * If we point to a T_TNODE, check if it matches our key. Note that
992 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -0700993 * not be the parent's 'pos'+'bits'!
994 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700995 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -0700996 * the index from our key, push the T_TNODE and walk the tree.
997 *
998 * If it doesn't, we have to replace it with a new T_TNODE.
999 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001000 * If we point to a T_LEAF, it might or might not have the same key
1001 * as we do. If it does, just change the value, update the T_LEAF's
1002 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001003 * If it doesn't, we need to replace it with a T_TNODE.
1004 */
1005
1006 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1007 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001008
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001009 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001010
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001011 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001012 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001013 pos = tn->pos + tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001014 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
1015
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001016 BUG_ON(n && NODE_PARENT(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001017 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001018 break;
1019 }
1020
1021 /*
1022 * n ----> NULL, LEAF or TNODE
1023 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001024 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001025 */
1026
Olof Johansson91b9a272005-08-09 20:24:39 -07001027 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001028
1029 /* Case 1: n is a leaf. Compare prefixes */
1030
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001031 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001032 struct leaf *l = (struct leaf *) n;
1033
Robert Olsson19baf832005-06-21 12:43:18 -07001034 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001035
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001036 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001037 *err = -ENOMEM;
1038 goto err;
1039 }
Robert Olsson19baf832005-06-21 12:43:18 -07001040
1041 fa_head = &li->falh;
1042 insert_leaf_info(&l->list, li);
1043 goto done;
1044 }
1045 t->size++;
1046 l = leaf_new();
1047
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001048 if (!l) {
Robert Olssonf835e472005-06-28 15:00:39 -07001049 *err = -ENOMEM;
1050 goto err;
1051 }
Robert Olsson19baf832005-06-21 12:43:18 -07001052
1053 l->key = key;
1054 li = leaf_info_new(plen);
1055
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001056 if (!li) {
Robert Olssonf835e472005-06-28 15:00:39 -07001057 tnode_free((struct tnode *) l);
1058 *err = -ENOMEM;
1059 goto err;
1060 }
Robert Olsson19baf832005-06-21 12:43:18 -07001061
1062 fa_head = &li->falh;
1063 insert_leaf_info(&l->list, li);
1064
Robert Olsson19baf832005-06-21 12:43:18 -07001065 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001066 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001067
1068 NODE_SET_PARENT(l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001069
Olof Johansson91b9a272005-08-09 20:24:39 -07001070 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1071 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1072 } else {
1073 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001074 /*
1075 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001076 * first tnode need some special handling
1077 */
1078
1079 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001080 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001081 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001082 pos = 0;
1083
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001084 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001085 newpos = tkey_mismatch(key, pos, n->key);
1086 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001087 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001088 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001089 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001090 }
Robert Olsson19baf832005-06-21 12:43:18 -07001091
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001092 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001093 free_leaf_info(li);
1094 tnode_free((struct tnode *) l);
1095 *err = -ENOMEM;
1096 goto err;
Olof Johansson91b9a272005-08-09 20:24:39 -07001097 }
1098
Robert Olsson19baf832005-06-21 12:43:18 -07001099 NODE_SET_PARENT(tn, tp);
1100
Olof Johansson91b9a272005-08-09 20:24:39 -07001101 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001102 put_child(t, tn, missbit, (struct node *)l);
1103 put_child(t, tn, 1-missbit, n);
1104
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001105 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001106 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1107 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001108 } else {
Robert Olsson2373ce12005-08-25 13:01:29 -07001109 rcu_assign_pointer(t->trie, (struct node *)tn); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001110 tp = tn;
1111 }
1112 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001113
1114 if (tp && tp->pos + tp->bits > 32)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001115 printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
Robert Olsson19baf832005-06-21 12:43:18 -07001116 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001117
Robert Olsson19baf832005-06-21 12:43:18 -07001118 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001119
1120 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Robert Olssonf835e472005-06-28 15:00:39 -07001121done:
1122 t->revision++;
Olof Johansson91b9a272005-08-09 20:24:39 -07001123err:
Robert Olsson19baf832005-06-21 12:43:18 -07001124 return fa_head;
1125}
1126
1127static int
1128fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1129 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1130{
1131 struct trie *t = (struct trie *) tb->tb_data;
1132 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001133 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001134 struct fib_info *fi;
1135 int plen = r->rtm_dst_len;
1136 int type = r->rtm_type;
1137 u8 tos = r->rtm_tos;
1138 u32 key, mask;
1139 int err;
1140 struct leaf *l;
1141
1142 if (plen > 32)
1143 return -EINVAL;
1144
1145 key = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001146 if (rta->rta_dst)
Robert Olsson19baf832005-06-21 12:43:18 -07001147 memcpy(&key, rta->rta_dst, 4);
1148
1149 key = ntohl(key);
1150
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001151 pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001152
Olof Johansson91b9a272005-08-09 20:24:39 -07001153 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001154
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001155 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001156 return -EINVAL;
1157
1158 key = key & mask;
1159
Olof Johansson91b9a272005-08-09 20:24:39 -07001160 fi = fib_create_info(r, rta, nlhdr, &err);
1161
1162 if (!fi)
Robert Olsson19baf832005-06-21 12:43:18 -07001163 goto err;
1164
1165 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001166 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001167
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001168 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001169 fa_head = get_fa_head(l, plen);
1170 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1171 }
1172
1173 /* Now fa, if non-NULL, points to the first fib alias
1174 * with the same keys [prefix,tos,priority], if such key already
1175 * exists or to the node before which we will insert new one.
1176 *
1177 * If fa is NULL, we will need to allocate a new one and
1178 * insert to the head of f.
1179 *
1180 * If f is NULL, no fib node matched the destination key
1181 * and we need to allocate a new one of those as well.
1182 */
1183
Olof Johansson91b9a272005-08-09 20:24:39 -07001184 if (fa && fa->fa_info->fib_priority == fi->fib_priority) {
Robert Olsson19baf832005-06-21 12:43:18 -07001185 struct fib_alias *fa_orig;
1186
1187 err = -EEXIST;
1188 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1189 goto out;
1190
1191 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1192 struct fib_info *fi_drop;
1193 u8 state;
1194
Robert Olsson2373ce12005-08-25 13:01:29 -07001195 err = -ENOBUFS;
1196 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1197 if (new_fa == NULL)
1198 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001199
1200 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001201 new_fa->fa_tos = fa->fa_tos;
1202 new_fa->fa_info = fi;
1203 new_fa->fa_type = type;
1204 new_fa->fa_scope = r->rtm_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001205 state = fa->fa_state;
Robert Olsson2373ce12005-08-25 13:01:29 -07001206 new_fa->fa_state &= ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001207
Robert Olsson2373ce12005-08-25 13:01:29 -07001208 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1209 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001210
1211 fib_release_info(fi_drop);
1212 if (state & FA_S_ACCESSED)
Olof Johansson91b9a272005-08-09 20:24:39 -07001213 rt_cache_flush(-1);
Robert Olsson19baf832005-06-21 12:43:18 -07001214
Olof Johansson91b9a272005-08-09 20:24:39 -07001215 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001216 }
1217 /* Error if we find a perfect match which
1218 * uses the same scope, type, and nexthop
1219 * information.
1220 */
1221 fa_orig = fa;
1222 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1223 if (fa->fa_tos != tos)
1224 break;
1225 if (fa->fa_info->fib_priority != fi->fib_priority)
1226 break;
1227 if (fa->fa_type == type &&
1228 fa->fa_scope == r->rtm_scope &&
1229 fa->fa_info == fi) {
1230 goto out;
1231 }
1232 }
1233 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1234 fa = fa_orig;
1235 }
1236 err = -ENOENT;
Olof Johansson91b9a272005-08-09 20:24:39 -07001237 if (!(nlhdr->nlmsg_flags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001238 goto out;
1239
1240 err = -ENOBUFS;
1241 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1242 if (new_fa == NULL)
1243 goto out;
1244
1245 new_fa->fa_info = fi;
1246 new_fa->fa_tos = tos;
1247 new_fa->fa_type = type;
1248 new_fa->fa_scope = r->rtm_scope;
1249 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001250 /*
1251 * Insert new entry to the list.
1252 */
1253
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001254 if (!fa_head) {
Robert Olssonf835e472005-06-28 15:00:39 -07001255 fa_head = fib_insert_node(t, &err, key, plen);
1256 err = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001257 if (err)
Robert Olssonf835e472005-06-28 15:00:39 -07001258 goto out_free_new_fa;
1259 }
Robert Olsson19baf832005-06-21 12:43:18 -07001260
Robert Olsson2373ce12005-08-25 13:01:29 -07001261 list_add_tail_rcu(&new_fa->fa_list,
1262 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001263
1264 rt_cache_flush(-1);
1265 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1266succeeded:
1267 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001268
1269out_free_new_fa:
1270 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001271out:
1272 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001273err:
Robert Olsson19baf832005-06-21 12:43:18 -07001274 return err;
1275}
1276
Robert Olsson2373ce12005-08-25 13:01:29 -07001277
Robert Olsson772cb712005-09-19 15:31:18 -07001278/* should be called with rcu_read_lock */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001279static inline int check_leaf(struct trie *t, struct leaf *l,
1280 t_key key, int *plen, const struct flowi *flp,
Patrick McHardy06c74272005-08-23 22:06:09 -07001281 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001282{
Patrick McHardy06c74272005-08-23 22:06:09 -07001283 int err, i;
Robert Olsson19baf832005-06-21 12:43:18 -07001284 t_key mask;
1285 struct leaf_info *li;
1286 struct hlist_head *hhead = &l->list;
1287 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001288
Robert Olsson2373ce12005-08-25 13:01:29 -07001289 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001290 i = li->plen;
1291 mask = ntohl(inet_make_mask(i));
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001292 if (l->key != (key & mask))
Robert Olsson19baf832005-06-21 12:43:18 -07001293 continue;
1294
Patrick McHardy06c74272005-08-23 22:06:09 -07001295 if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001296 *plen = i;
1297#ifdef CONFIG_IP_FIB_TRIE_STATS
1298 t->stats.semantic_match_passed++;
1299#endif
Patrick McHardy06c74272005-08-23 22:06:09 -07001300 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001301 }
1302#ifdef CONFIG_IP_FIB_TRIE_STATS
1303 t->stats.semantic_match_miss++;
1304#endif
1305 }
Patrick McHardy06c74272005-08-23 22:06:09 -07001306 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001307}
1308
1309static int
1310fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1311{
1312 struct trie *t = (struct trie *) tb->tb_data;
1313 int plen, ret = 0;
1314 struct node *n;
1315 struct tnode *pn;
1316 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001317 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001318 int chopped_off;
1319 t_key cindex = 0;
1320 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001321 struct tnode *cn;
1322 t_key node_prefix, key_prefix, pref_mismatch;
1323 int mp;
1324
Robert Olsson2373ce12005-08-25 13:01:29 -07001325 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001326
Robert Olsson2373ce12005-08-25 13:01:29 -07001327 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001328 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001329 goto failed;
1330
1331#ifdef CONFIG_IP_FIB_TRIE_STATS
1332 t->stats.gets++;
1333#endif
1334
1335 /* Just a leaf? */
1336 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001337 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001338 goto found;
1339 goto failed;
1340 }
1341 pn = (struct tnode *) n;
1342 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001343
Olof Johansson91b9a272005-08-09 20:24:39 -07001344 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001345 pos = pn->pos;
1346 bits = pn->bits;
1347
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001348 if (!chopped_off)
Robert Olsson19baf832005-06-21 12:43:18 -07001349 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1350
1351 n = tnode_get_child(pn, cindex);
1352
1353 if (n == NULL) {
1354#ifdef CONFIG_IP_FIB_TRIE_STATS
1355 t->stats.null_node_hit++;
1356#endif
1357 goto backtrace;
1358 }
1359
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001360 if (IS_LEAF(n)) {
Patrick McHardy06c74272005-08-23 22:06:09 -07001361 if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001362 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001363 else
1364 goto backtrace;
1365 }
1366
1367#define HL_OPTIMIZE
1368#ifdef HL_OPTIMIZE
1369 cn = (struct tnode *)n;
1370
1371 /*
1372 * It's a tnode, and we can do some extra checks here if we
1373 * like, to avoid descending into a dead-end branch.
1374 * This tnode is in the parent's child array at index
1375 * key[p_pos..p_pos+p_bits] but potentially with some bits
1376 * chopped off, so in reality the index may be just a
1377 * subprefix, padded with zero at the end.
1378 * We can also take a look at any skipped bits in this
1379 * tnode - everything up to p_pos is supposed to be ok,
1380 * and the non-chopped bits of the index (se previous
1381 * paragraph) are also guaranteed ok, but the rest is
1382 * considered unknown.
1383 *
1384 * The skipped bits are key[pos+bits..cn->pos].
1385 */
1386
1387 /* If current_prefix_length < pos+bits, we are already doing
1388 * actual prefix matching, which means everything from
1389 * pos+(bits-chopped_off) onward must be zero along some
1390 * branch of this subtree - otherwise there is *no* valid
1391 * prefix present. Here we can only check the skipped
1392 * bits. Remember, since we have already indexed into the
1393 * parent's child array, we know that the bits we chopped of
1394 * *are* zero.
1395 */
1396
1397 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1398
1399 if (current_prefix_length < pos+bits) {
1400 if (tkey_extract_bits(cn->key, current_prefix_length,
1401 cn->pos - current_prefix_length) != 0 ||
1402 !(cn->child[0]))
1403 goto backtrace;
1404 }
1405
1406 /*
1407 * If chopped_off=0, the index is fully validated and we
1408 * only need to look at the skipped bits for this, the new,
1409 * tnode. What we actually want to do is to find out if
1410 * these skipped bits match our key perfectly, or if we will
1411 * have to count on finding a matching prefix further down,
1412 * because if we do, we would like to have some way of
1413 * verifying the existence of such a prefix at this point.
1414 */
1415
1416 /* The only thing we can do at this point is to verify that
1417 * any such matching prefix can indeed be a prefix to our
1418 * key, and if the bits in the node we are inspecting that
1419 * do not match our key are not ZERO, this cannot be true.
1420 * Thus, find out where there is a mismatch (before cn->pos)
1421 * and verify that all the mismatching bits are zero in the
1422 * new tnode's key.
1423 */
1424
1425 /* Note: We aren't very concerned about the piece of the key
1426 * that precede pn->pos+pn->bits, since these have already been
1427 * checked. The bits after cn->pos aren't checked since these are
1428 * by definition "unknown" at this point. Thus, what we want to
1429 * see is if we are about to enter the "prefix matching" state,
1430 * and in that case verify that the skipped bits that will prevail
1431 * throughout this subtree are zero, as they have to be if we are
1432 * to find a matching prefix.
1433 */
1434
1435 node_prefix = MASK_PFX(cn->key, cn->pos);
1436 key_prefix = MASK_PFX(key, cn->pos);
1437 pref_mismatch = key_prefix^node_prefix;
1438 mp = 0;
1439
1440 /* In short: If skipped bits in this node do not match the search
1441 * key, enter the "prefix matching" state.directly.
1442 */
1443 if (pref_mismatch) {
1444 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1445 mp++;
1446 pref_mismatch = pref_mismatch <<1;
1447 }
1448 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1449
1450 if (key_prefix != 0)
1451 goto backtrace;
1452
1453 if (current_prefix_length >= cn->pos)
1454 current_prefix_length = mp;
1455 }
1456#endif
1457 pn = (struct tnode *)n; /* Descend */
1458 chopped_off = 0;
1459 continue;
1460
Robert Olsson19baf832005-06-21 12:43:18 -07001461backtrace:
1462 chopped_off++;
1463
1464 /* As zero don't change the child key (cindex) */
Olof Johansson91b9a272005-08-09 20:24:39 -07001465 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001466 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001467
1468 /* Decrease current_... with bits chopped off */
1469 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1470 current_prefix_length = pn->pos + pn->bits - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001471
Robert Olsson19baf832005-06-21 12:43:18 -07001472 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001473 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001474 * chopped off all bits in this tnode walk up to our parent.
1475 */
1476
Olof Johansson91b9a272005-08-09 20:24:39 -07001477 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001478 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001479 } else {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001480 if (NODE_PARENT(pn) == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001481 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001482
Robert Olsson19baf832005-06-21 12:43:18 -07001483 /* Get Child's index */
1484 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1485 pn = NODE_PARENT(pn);
1486 chopped_off = 0;
1487
1488#ifdef CONFIG_IP_FIB_TRIE_STATS
1489 t->stats.backtrack++;
1490#endif
1491 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001492 }
Robert Olsson19baf832005-06-21 12:43:18 -07001493 }
1494failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001495 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001496found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001497 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001498 return ret;
1499}
1500
Robert Olsson2373ce12005-08-25 13:01:29 -07001501/* only called from updater side */
Robert Olsson19baf832005-06-21 12:43:18 -07001502static int trie_leaf_remove(struct trie *t, t_key key)
1503{
1504 t_key cindex;
1505 struct tnode *tp = NULL;
1506 struct node *n = t->trie;
1507 struct leaf *l;
1508
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001509 pr_debug("entering trie_leaf_remove(%p)\n", n);
Robert Olsson19baf832005-06-21 12:43:18 -07001510
1511 /* Note that in the case skipped bits, those bits are *not* checked!
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001512 * When we finish this, we will have NULL or a T_LEAF, and the
Robert Olsson19baf832005-06-21 12:43:18 -07001513 * T_LEAF may or may not match our key.
1514 */
1515
Olof Johansson91b9a272005-08-09 20:24:39 -07001516 while (n != NULL && IS_TNODE(n)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001517 struct tnode *tn = (struct tnode *) n;
1518 check_tnode(tn);
1519 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1520
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001521 BUG_ON(n && NODE_PARENT(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001522 }
Robert Olsson19baf832005-06-21 12:43:18 -07001523 l = (struct leaf *) n;
1524
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001525 if (!n || !tkey_equals(l->key, key))
Robert Olsson19baf832005-06-21 12:43:18 -07001526 return 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001527
1528 /*
1529 * Key found.
1530 * Remove the leaf and rebalance the tree
Robert Olsson19baf832005-06-21 12:43:18 -07001531 */
1532
1533 t->revision++;
1534 t->size--;
1535
Robert Olsson2373ce12005-08-25 13:01:29 -07001536 preempt_disable();
Robert Olsson19baf832005-06-21 12:43:18 -07001537 tp = NODE_PARENT(n);
1538 tnode_free((struct tnode *) n);
1539
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001540 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001541 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1542 put_child(t, (struct tnode *)tp, cindex, NULL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001543 rcu_assign_pointer(t->trie, trie_rebalance(t, tp));
Olof Johansson91b9a272005-08-09 20:24:39 -07001544 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001545 rcu_assign_pointer(t->trie, NULL);
1546 preempt_enable();
Robert Olsson19baf832005-06-21 12:43:18 -07001547
1548 return 1;
1549}
1550
1551static int
1552fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
Olof Johansson91b9a272005-08-09 20:24:39 -07001553 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
Robert Olsson19baf832005-06-21 12:43:18 -07001554{
1555 struct trie *t = (struct trie *) tb->tb_data;
1556 u32 key, mask;
1557 int plen = r->rtm_dst_len;
1558 u8 tos = r->rtm_tos;
1559 struct fib_alias *fa, *fa_to_delete;
1560 struct list_head *fa_head;
1561 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001562 struct leaf_info *li;
1563
Robert Olsson19baf832005-06-21 12:43:18 -07001564
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001565 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001566 return -EINVAL;
1567
1568 key = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001569 if (rta->rta_dst)
Robert Olsson19baf832005-06-21 12:43:18 -07001570 memcpy(&key, rta->rta_dst, 4);
1571
1572 key = ntohl(key);
Olof Johansson91b9a272005-08-09 20:24:39 -07001573 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001574
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001575 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001576 return -EINVAL;
1577
1578 key = key & mask;
1579 l = fib_find_node(t, key);
1580
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001581 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001582 return -ESRCH;
1583
1584 fa_head = get_fa_head(l, plen);
1585 fa = fib_find_alias(fa_head, tos, 0);
1586
1587 if (!fa)
1588 return -ESRCH;
1589
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001590 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001591
1592 fa_to_delete = NULL;
1593 fa_head = fa->fa_list.prev;
Robert Olsson2373ce12005-08-25 13:01:29 -07001594
Robert Olsson19baf832005-06-21 12:43:18 -07001595 list_for_each_entry(fa, fa_head, fa_list) {
1596 struct fib_info *fi = fa->fa_info;
1597
1598 if (fa->fa_tos != tos)
1599 break;
1600
1601 if ((!r->rtm_type ||
1602 fa->fa_type == r->rtm_type) &&
1603 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1604 fa->fa_scope == r->rtm_scope) &&
1605 (!r->rtm_protocol ||
1606 fi->fib_protocol == r->rtm_protocol) &&
1607 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1608 fa_to_delete = fa;
1609 break;
1610 }
1611 }
1612
Olof Johansson91b9a272005-08-09 20:24:39 -07001613 if (!fa_to_delete)
1614 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001615
Olof Johansson91b9a272005-08-09 20:24:39 -07001616 fa = fa_to_delete;
1617 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
Robert Olsson19baf832005-06-21 12:43:18 -07001618
Olof Johansson91b9a272005-08-09 20:24:39 -07001619 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001620 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001621
Robert Olsson2373ce12005-08-25 13:01:29 -07001622 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001623
Olof Johansson91b9a272005-08-09 20:24:39 -07001624 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001625 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001626 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001627 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001628
1629 if (hlist_empty(&l->list))
1630 trie_leaf_remove(t, key);
1631
1632 if (fa->fa_state & FA_S_ACCESSED)
1633 rt_cache_flush(-1);
1634
Robert Olsson2373ce12005-08-25 13:01:29 -07001635 fib_release_info(fa->fa_info);
1636 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001637 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001638}
1639
1640static int trie_flush_list(struct trie *t, struct list_head *head)
1641{
1642 struct fib_alias *fa, *fa_node;
1643 int found = 0;
1644
1645 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1646 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001647
Robert Olsson2373ce12005-08-25 13:01:29 -07001648 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1649 list_del_rcu(&fa->fa_list);
1650 fib_release_info(fa->fa_info);
1651 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001652 found++;
1653 }
1654 }
1655 return found;
1656}
1657
1658static int trie_flush_leaf(struct trie *t, struct leaf *l)
1659{
1660 int found = 0;
1661 struct hlist_head *lih = &l->list;
1662 struct hlist_node *node, *tmp;
1663 struct leaf_info *li = NULL;
1664
1665 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Robert Olsson19baf832005-06-21 12:43:18 -07001666 found += trie_flush_list(t, &li->falh);
1667
1668 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001669 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001670 free_leaf_info(li);
1671 }
1672 }
1673 return found;
1674}
1675
Robert Olsson2373ce12005-08-25 13:01:29 -07001676/* rcu_read_lock needs to be hold by caller from readside */
1677
Robert Olsson19baf832005-06-21 12:43:18 -07001678static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1679{
1680 struct node *c = (struct node *) thisleaf;
1681 struct tnode *p;
1682 int idx;
Robert Olsson2373ce12005-08-25 13:01:29 -07001683 struct node *trie = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -07001684
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001685 if (c == NULL) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001686 if (trie == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -07001687 return NULL;
1688
Robert Olsson2373ce12005-08-25 13:01:29 -07001689 if (IS_LEAF(trie)) /* trie w. just a leaf */
1690 return (struct leaf *) trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001691
Robert Olsson2373ce12005-08-25 13:01:29 -07001692 p = (struct tnode*) trie; /* Start */
Olof Johansson91b9a272005-08-09 20:24:39 -07001693 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001694 p = (struct tnode *) NODE_PARENT(c);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001695
Robert Olsson19baf832005-06-21 12:43:18 -07001696 while (p) {
1697 int pos, last;
1698
1699 /* Find the next child of the parent */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001700 if (c)
1701 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1702 else
Robert Olsson19baf832005-06-21 12:43:18 -07001703 pos = 0;
1704
1705 last = 1 << p->bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001706 for (idx = pos; idx < last ; idx++) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001707 c = rcu_dereference(p->child[idx]);
1708
1709 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001710 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001711
Olof Johansson91b9a272005-08-09 20:24:39 -07001712 /* Decend if tnode */
Robert Olsson2373ce12005-08-25 13:01:29 -07001713 while (IS_TNODE(c)) {
1714 p = (struct tnode *) c;
1715 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001716
Olof Johansson91b9a272005-08-09 20:24:39 -07001717 /* Rightmost non-NULL branch */
1718 if (p && IS_TNODE(p))
Robert Olsson2373ce12005-08-25 13:01:29 -07001719 while (!(c = rcu_dereference(p->child[idx]))
1720 && idx < (1<<p->bits)) idx++;
Robert Olsson19baf832005-06-21 12:43:18 -07001721
Olof Johansson91b9a272005-08-09 20:24:39 -07001722 /* Done with this tnode? */
Robert Olsson2373ce12005-08-25 13:01:29 -07001723 if (idx >= (1 << p->bits) || !c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001724 goto up;
Robert Olsson19baf832005-06-21 12:43:18 -07001725 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001726 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001727 }
1728up:
1729 /* No more children go up one step */
Olof Johansson91b9a272005-08-09 20:24:39 -07001730 c = (struct node *) p;
Robert Olsson19baf832005-06-21 12:43:18 -07001731 p = (struct tnode *) NODE_PARENT(p);
1732 }
1733 return NULL; /* Ready. Root of trie */
1734}
1735
1736static int fn_trie_flush(struct fib_table *tb)
1737{
1738 struct trie *t = (struct trie *) tb->tb_data;
1739 struct leaf *ll = NULL, *l = NULL;
1740 int found = 0, h;
1741
1742 t->revision++;
1743
Olof Johansson91b9a272005-08-09 20:24:39 -07001744 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001745 found += trie_flush_leaf(t, l);
1746
1747 if (ll && hlist_empty(&ll->list))
1748 trie_leaf_remove(t, ll->key);
1749 ll = l;
1750 }
1751
1752 if (ll && hlist_empty(&ll->list))
1753 trie_leaf_remove(t, ll->key);
1754
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001755 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001756 return found;
1757}
1758
Olof Johansson91b9a272005-08-09 20:24:39 -07001759static int trie_last_dflt = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001760
1761static void
1762fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1763{
1764 struct trie *t = (struct trie *) tb->tb_data;
1765 int order, last_idx;
1766 struct fib_info *fi = NULL;
1767 struct fib_info *last_resort;
1768 struct fib_alias *fa = NULL;
1769 struct list_head *fa_head;
1770 struct leaf *l;
1771
1772 last_idx = -1;
1773 last_resort = NULL;
1774 order = -1;
1775
Robert Olsson2373ce12005-08-25 13:01:29 -07001776 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001777
Robert Olsson19baf832005-06-21 12:43:18 -07001778 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001779 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001780 goto out;
1781
1782 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001783 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001784 goto out;
1785
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001786 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001787 goto out;
1788
Robert Olsson2373ce12005-08-25 13:01:29 -07001789 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001790 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001791
Robert Olsson19baf832005-06-21 12:43:18 -07001792 if (fa->fa_scope != res->scope ||
1793 fa->fa_type != RTN_UNICAST)
1794 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001795
Robert Olsson19baf832005-06-21 12:43:18 -07001796 if (next_fi->fib_priority > res->fi->fib_priority)
1797 break;
1798 if (!next_fi->fib_nh[0].nh_gw ||
1799 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1800 continue;
1801 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001802
Robert Olsson19baf832005-06-21 12:43:18 -07001803 if (fi == NULL) {
1804 if (next_fi != res->fi)
1805 break;
1806 } else if (!fib_detect_death(fi, order, &last_resort,
1807 &last_idx, &trie_last_dflt)) {
1808 if (res->fi)
1809 fib_info_put(res->fi);
1810 res->fi = fi;
1811 atomic_inc(&fi->fib_clntref);
1812 trie_last_dflt = order;
1813 goto out;
1814 }
1815 fi = next_fi;
1816 order++;
1817 }
1818 if (order <= 0 || fi == NULL) {
1819 trie_last_dflt = -1;
1820 goto out;
1821 }
1822
1823 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1824 if (res->fi)
1825 fib_info_put(res->fi);
1826 res->fi = fi;
1827 atomic_inc(&fi->fib_clntref);
1828 trie_last_dflt = order;
1829 goto out;
1830 }
1831 if (last_idx >= 0) {
1832 if (res->fi)
1833 fib_info_put(res->fi);
1834 res->fi = last_resort;
1835 if (last_resort)
1836 atomic_inc(&last_resort->fib_clntref);
1837 }
1838 trie_last_dflt = last_idx;
1839 out:;
Robert Olsson2373ce12005-08-25 13:01:29 -07001840 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001841}
1842
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001843static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001844 struct sk_buff *skb, struct netlink_callback *cb)
1845{
1846 int i, s_i;
1847 struct fib_alias *fa;
1848
Olof Johansson91b9a272005-08-09 20:24:39 -07001849 u32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001850
Olof Johansson91b9a272005-08-09 20:24:39 -07001851 s_i = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001852 i = 0;
1853
Robert Olsson2373ce12005-08-25 13:01:29 -07001854 /* rcu_read_lock is hold by caller */
1855
1856 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001857 if (i < s_i) {
1858 i++;
1859 continue;
1860 }
Stephen Hemminger78c66712005-09-21 00:15:39 -07001861 BUG_ON(!fa->fa_info);
Robert Olsson19baf832005-06-21 12:43:18 -07001862
1863 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1864 cb->nlh->nlmsg_seq,
1865 RTM_NEWROUTE,
1866 tb->tb_id,
1867 fa->fa_type,
1868 fa->fa_scope,
1869 &xkey,
1870 plen,
1871 fa->fa_tos,
David S. Miller90f66912005-06-21 14:43:28 -07001872 fa->fa_info, 0) < 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001873 cb->args[3] = i;
1874 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001875 }
Robert Olsson19baf832005-06-21 12:43:18 -07001876 i++;
1877 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001878 cb->args[3] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001879 return skb->len;
1880}
1881
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001882static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
Robert Olsson19baf832005-06-21 12:43:18 -07001883 struct netlink_callback *cb)
1884{
1885 int h, s_h;
1886 struct list_head *fa_head;
1887 struct leaf *l = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001888
Olof Johansson91b9a272005-08-09 20:24:39 -07001889 s_h = cb->args[2];
Robert Olsson19baf832005-06-21 12:43:18 -07001890
Olof Johansson91b9a272005-08-09 20:24:39 -07001891 for (h = 0; (l = nextleaf(t, l)) != NULL; h++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001892 if (h < s_h)
1893 continue;
1894 if (h > s_h)
1895 memset(&cb->args[3], 0,
1896 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1897
1898 fa_head = get_fa_head(l, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001899
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001900 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001901 continue;
1902
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001903 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001904 continue;
1905
1906 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001907 cb->args[2] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001908 return -1;
1909 }
1910 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001911 cb->args[2] = h;
Robert Olsson19baf832005-06-21 12:43:18 -07001912 return skb->len;
1913}
1914
1915static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1916{
1917 int m, s_m;
1918 struct trie *t = (struct trie *) tb->tb_data;
1919
1920 s_m = cb->args[1];
1921
Robert Olsson2373ce12005-08-25 13:01:29 -07001922 rcu_read_lock();
Olof Johansson91b9a272005-08-09 20:24:39 -07001923 for (m = 0; m <= 32; m++) {
Robert Olsson19baf832005-06-21 12:43:18 -07001924 if (m < s_m)
1925 continue;
1926 if (m > s_m)
1927 memset(&cb->args[2], 0,
Olof Johansson91b9a272005-08-09 20:24:39 -07001928 sizeof(cb->args) - 2*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001929
1930 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1931 cb->args[1] = m;
1932 goto out;
1933 }
1934 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001935 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001936 cb->args[1] = m;
1937 return skb->len;
Olof Johansson91b9a272005-08-09 20:24:39 -07001938out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001939 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001940 return -1;
1941}
1942
1943/* Fix more generic FIB names for init later */
1944
1945#ifdef CONFIG_IP_MULTIPLE_TABLES
1946struct fib_table * fib_hash_init(int id)
1947#else
1948struct fib_table * __init fib_hash_init(int id)
1949#endif
1950{
1951 struct fib_table *tb;
1952 struct trie *t;
1953
1954 if (fn_alias_kmem == NULL)
1955 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1956 sizeof(struct fib_alias),
1957 0, SLAB_HWCACHE_ALIGN,
1958 NULL, NULL);
1959
1960 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1961 GFP_KERNEL);
1962 if (tb == NULL)
1963 return NULL;
1964
1965 tb->tb_id = id;
1966 tb->tb_lookup = fn_trie_lookup;
1967 tb->tb_insert = fn_trie_insert;
1968 tb->tb_delete = fn_trie_delete;
1969 tb->tb_flush = fn_trie_flush;
1970 tb->tb_select_default = fn_trie_select_default;
1971 tb->tb_dump = fn_trie_dump;
1972 memset(tb->tb_data, 0, sizeof(struct trie));
1973
1974 t = (struct trie *) tb->tb_data;
1975
1976 trie_init(t);
1977
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001978 if (id == RT_TABLE_LOCAL)
Olof Johansson91b9a272005-08-09 20:24:39 -07001979 trie_local = t;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001980 else if (id == RT_TABLE_MAIN)
Olof Johansson91b9a272005-08-09 20:24:39 -07001981 trie_main = t;
Robert Olsson19baf832005-06-21 12:43:18 -07001982
1983 if (id == RT_TABLE_LOCAL)
Stephen Hemminger78c66712005-09-21 00:15:39 -07001984 printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07001985
1986 return tb;
1987}
1988
Robert Olsson19baf832005-06-21 12:43:18 -07001989#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001990/* Depth first Trie walk iterator */
1991struct fib_trie_iter {
1992 struct tnode *tnode;
1993 struct trie *trie;
1994 unsigned index;
1995 unsigned depth;
1996};
Robert Olsson19baf832005-06-21 12:43:18 -07001997
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001998static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001999{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002000 struct tnode *tn = iter->tnode;
2001 unsigned cindex = iter->index;
2002 struct tnode *p;
2003
2004 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2005 iter->tnode, iter->index, iter->depth);
2006rescan:
2007 while (cindex < (1<<tn->bits)) {
2008 struct node *n = tnode_get_child(tn, cindex);
2009
2010 if (n) {
2011 if (IS_LEAF(n)) {
2012 iter->tnode = tn;
2013 iter->index = cindex + 1;
2014 } else {
2015 /* push down one level */
2016 iter->tnode = (struct tnode *) n;
2017 iter->index = 0;
2018 ++iter->depth;
2019 }
2020 return n;
2021 }
2022
2023 ++cindex;
2024 }
2025
2026 /* Current node exhausted, pop back up */
2027 p = NODE_PARENT(tn);
2028 if (p) {
2029 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2030 tn = p;
2031 --iter->depth;
2032 goto rescan;
2033 }
2034
2035 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002036 return NULL;
2037}
2038
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002039static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2040 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002041{
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002042 struct node *n ;
2043
2044 if(!t)
2045 return NULL;
2046
2047 n = rcu_dereference(t->trie);
2048
2049 if(!iter)
2050 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002051
2052 if (n && IS_TNODE(n)) {
2053 iter->tnode = (struct tnode *) n;
2054 iter->trie = t;
2055 iter->index = 0;
Robert Olsson1d25cd62005-09-19 15:29:52 -07002056 iter->depth = 1;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002057 return n;
2058 }
Robert Olsson19baf832005-06-21 12:43:18 -07002059 return NULL;
2060}
2061
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002062static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002063{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002064 struct node *n;
2065 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002066
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002067 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002068
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002069 rcu_read_lock();
2070 for (n = fib_trie_get_first(&iter, t); n;
2071 n = fib_trie_get_next(&iter)) {
2072 if (IS_LEAF(n)) {
2073 s->leaves++;
2074 s->totdepth += iter.depth;
2075 if (iter.depth > s->maxdepth)
2076 s->maxdepth = iter.depth;
2077 } else {
2078 const struct tnode *tn = (const struct tnode *) n;
2079 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002080
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002081 s->tnodes++;
Robert Olsson06ef9212006-03-20 21:35:01 -08002082 if(tn->bits < MAX_STAT_DEPTH)
2083 s->nodesizes[tn->bits]++;
2084
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002085 for (i = 0; i < (1<<tn->bits); i++)
2086 if (!tn->child[i])
2087 s->nullpointers++;
2088 }
2089 }
2090 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002091}
2092
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002093/*
Robert Olsson19baf832005-06-21 12:43:18 -07002094 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002095 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002096static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002097{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002098 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002099
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002100 if (stat->leaves)
2101 avdepth = stat->totdepth*100 / stat->leaves;
2102 else
2103 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002104
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002105 seq_printf(seq, "\tAver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2106 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002107
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002108 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Olof Johansson91b9a272005-08-09 20:24:39 -07002109
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002110 bytes = sizeof(struct leaf) * stat->leaves;
2111 seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
2112 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002113
Robert Olsson06ef9212006-03-20 21:35:01 -08002114 max = MAX_STAT_DEPTH;
2115 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002116 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002117
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002118 pointers = 0;
2119 for (i = 1; i <= max; i++)
2120 if (stat->nodesizes[i] != 0) {
2121 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2122 pointers += (1<<i) * stat->nodesizes[i];
2123 }
2124 seq_putc(seq, '\n');
2125 seq_printf(seq, "\tPointers: %d\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002126
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002127 bytes += sizeof(struct node *) * pointers;
2128 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2129 seq_printf(seq, "Total size: %d kB\n", (bytes + 1023) / 1024);
Robert Olsson19baf832005-06-21 12:43:18 -07002130
2131#ifdef CONFIG_IP_FIB_TRIE_STATS
2132 seq_printf(seq, "Counters:\n---------\n");
2133 seq_printf(seq,"gets = %d\n", t->stats.gets);
2134 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2135 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2136 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2137 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
Robert Olsson2f368952005-07-05 15:02:40 -07002138 seq_printf(seq,"skipped node resize = %d\n", t->stats.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002139#ifdef CLEAR_STATS
2140 memset(&(t->stats), 0, sizeof(t->stats));
2141#endif
2142#endif /* CONFIG_IP_FIB_TRIE_STATS */
2143}
2144
2145static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2146{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002147 struct trie_stat *stat;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002148
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002149 stat = kmalloc(sizeof(*stat), GFP_KERNEL);
2150 if (!stat)
2151 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002152
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002153 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2154 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002155
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002156 if (trie_local) {
2157 seq_printf(seq, "Local:\n");
2158 trie_collect_stats(trie_local, stat);
2159 trie_show_stats(seq, stat);
Robert Olsson19baf832005-06-21 12:43:18 -07002160 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002161
2162 if (trie_main) {
2163 seq_printf(seq, "Main:\n");
2164 trie_collect_stats(trie_main, stat);
2165 trie_show_stats(seq, stat);
2166 }
2167 kfree(stat);
2168
Robert Olsson19baf832005-06-21 12:43:18 -07002169 return 0;
2170}
2171
Robert Olsson19baf832005-06-21 12:43:18 -07002172static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2173{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002174 return single_open(file, fib_triestat_seq_show, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07002175}
2176
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002177static struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002178 .owner = THIS_MODULE,
2179 .open = fib_triestat_seq_open,
2180 .read = seq_read,
2181 .llseek = seq_lseek,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002182 .release = single_release,
Robert Olsson19baf832005-06-21 12:43:18 -07002183};
2184
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002185static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
2186 loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002187{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002188 loff_t idx = 0;
2189 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -07002190
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002191 for (n = fib_trie_get_first(iter, trie_local);
2192 n; ++idx, n = fib_trie_get_next(iter)) {
2193 if (pos == idx)
2194 return n;
2195 }
Robert Olsson19baf832005-06-21 12:43:18 -07002196
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002197 for (n = fib_trie_get_first(iter, trie_main);
2198 n; ++idx, n = fib_trie_get_next(iter)) {
2199 if (pos == idx)
2200 return n;
2201 }
Robert Olsson19baf832005-06-21 12:43:18 -07002202 return NULL;
2203}
2204
2205static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2206{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002207 rcu_read_lock();
2208 if (*pos == 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07002209 return SEQ_START_TOKEN;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002210 return fib_trie_get_idx(seq->private, *pos - 1);
Robert Olsson19baf832005-06-21 12:43:18 -07002211}
2212
2213static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2214{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002215 struct fib_trie_iter *iter = seq->private;
2216 void *l = v;
2217
Robert Olsson19baf832005-06-21 12:43:18 -07002218 ++*pos;
Olof Johansson91b9a272005-08-09 20:24:39 -07002219 if (v == SEQ_START_TOKEN)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002220 return fib_trie_get_idx(iter, 0);
Olof Johansson91b9a272005-08-09 20:24:39 -07002221
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002222 v = fib_trie_get_next(iter);
2223 BUG_ON(v == l);
2224 if (v)
2225 return v;
2226
2227 /* continue scan in next trie */
2228 if (iter->trie == trie_local)
2229 return fib_trie_get_first(iter, trie_main);
2230
2231 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07002232}
2233
2234static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2235{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002236 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002237}
2238
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002239static void seq_indent(struct seq_file *seq, int n)
2240{
2241 while (n-- > 0) seq_puts(seq, " ");
2242}
Robert Olsson19baf832005-06-21 12:43:18 -07002243
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002244static inline const char *rtn_scope(enum rt_scope_t s)
2245{
2246 static char buf[32];
2247
2248 switch(s) {
2249 case RT_SCOPE_UNIVERSE: return "universe";
2250 case RT_SCOPE_SITE: return "site";
2251 case RT_SCOPE_LINK: return "link";
2252 case RT_SCOPE_HOST: return "host";
2253 case RT_SCOPE_NOWHERE: return "nowhere";
2254 default:
2255 snprintf(buf, sizeof(buf), "scope=%d", s);
2256 return buf;
2257 }
2258}
2259
2260static const char *rtn_type_names[__RTN_MAX] = {
2261 [RTN_UNSPEC] = "UNSPEC",
2262 [RTN_UNICAST] = "UNICAST",
2263 [RTN_LOCAL] = "LOCAL",
2264 [RTN_BROADCAST] = "BROADCAST",
2265 [RTN_ANYCAST] = "ANYCAST",
2266 [RTN_MULTICAST] = "MULTICAST",
2267 [RTN_BLACKHOLE] = "BLACKHOLE",
2268 [RTN_UNREACHABLE] = "UNREACHABLE",
2269 [RTN_PROHIBIT] = "PROHIBIT",
2270 [RTN_THROW] = "THROW",
2271 [RTN_NAT] = "NAT",
2272 [RTN_XRESOLVE] = "XRESOLVE",
2273};
2274
2275static inline const char *rtn_type(unsigned t)
2276{
2277 static char buf[32];
2278
2279 if (t < __RTN_MAX && rtn_type_names[t])
2280 return rtn_type_names[t];
2281 snprintf(buf, sizeof(buf), "type %d", t);
2282 return buf;
2283}
2284
2285/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002286static int fib_trie_seq_show(struct seq_file *seq, void *v)
2287{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002288 const struct fib_trie_iter *iter = seq->private;
2289 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002290
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002291 if (v == SEQ_START_TOKEN)
2292 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002293
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002294 if (IS_TNODE(n)) {
2295 struct tnode *tn = (struct tnode *) n;
2296 t_key prf = ntohl(MASK_PFX(tn->key, tn->pos));
2297
2298 if (!NODE_PARENT(n)) {
2299 if (iter->trie == trie_local)
2300 seq_puts(seq, "<local>:\n");
2301 else
2302 seq_puts(seq, "<main>:\n");
Robert Olsson1d25cd62005-09-19 15:29:52 -07002303 }
2304 seq_indent(seq, iter->depth-1);
2305 seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n",
2306 NIPQUAD(prf), tn->pos, tn->bits, tn->full_children,
2307 tn->empty_children);
2308
Olof Johansson91b9a272005-08-09 20:24:39 -07002309 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002310 struct leaf *l = (struct leaf *) n;
2311 int i;
2312 u32 val = ntohl(l->key);
2313
2314 seq_indent(seq, iter->depth);
2315 seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val));
2316 for (i = 32; i >= 0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002317 struct leaf_info *li = find_leaf_info(l, i);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002318 if (li) {
2319 struct fib_alias *fa;
2320 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2321 seq_indent(seq, iter->depth+1);
2322 seq_printf(seq, " /%d %s %s", i,
2323 rtn_scope(fa->fa_scope),
2324 rtn_type(fa->fa_type));
2325 if (fa->fa_tos)
2326 seq_printf(seq, "tos =%d\n",
2327 fa->fa_tos);
2328 seq_putc(seq, '\n');
2329 }
2330 }
2331 }
Robert Olsson19baf832005-06-21 12:43:18 -07002332 }
2333
2334 return 0;
2335}
2336
2337static struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002338 .start = fib_trie_seq_start,
2339 .next = fib_trie_seq_next,
2340 .stop = fib_trie_seq_stop,
2341 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002342};
2343
2344static int fib_trie_seq_open(struct inode *inode, struct file *file)
2345{
2346 struct seq_file *seq;
2347 int rc = -ENOMEM;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002348 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2349
2350 if (!s)
2351 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07002352
2353 rc = seq_open(file, &fib_trie_seq_ops);
2354 if (rc)
2355 goto out_kfree;
2356
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002357 seq = file->private_data;
2358 seq->private = s;
2359 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002360out:
2361 return rc;
2362out_kfree:
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002363 kfree(s);
Robert Olsson19baf832005-06-21 12:43:18 -07002364 goto out;
2365}
2366
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002367static struct file_operations fib_trie_fops = {
2368 .owner = THIS_MODULE,
2369 .open = fib_trie_seq_open,
2370 .read = seq_read,
2371 .llseek = seq_lseek,
2372 .release = seq_release_private,
2373};
2374
2375static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
2376{
2377 static unsigned type2flags[RTN_MAX + 1] = {
2378 [7] = RTF_REJECT, [8] = RTF_REJECT,
2379 };
2380 unsigned flags = type2flags[type];
2381
2382 if (fi && fi->fib_nh->nh_gw)
2383 flags |= RTF_GATEWAY;
2384 if (mask == 0xFFFFFFFF)
2385 flags |= RTF_HOST;
2386 flags |= RTF_UP;
2387 return flags;
2388}
2389
2390/*
2391 * This outputs /proc/net/route.
2392 * The format of the file is not supposed to be changed
2393 * and needs to be same as fib_hash output to avoid breaking
2394 * legacy utilities
2395 */
2396static int fib_route_seq_show(struct seq_file *seq, void *v)
2397{
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002398 const struct fib_trie_iter *iter = seq->private;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002399 struct leaf *l = v;
2400 int i;
2401 char bf[128];
2402
2403 if (v == SEQ_START_TOKEN) {
2404 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2405 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2406 "\tWindow\tIRTT");
2407 return 0;
2408 }
2409
Patrick McHardyc9e53cb2005-11-20 21:09:00 -08002410 if (iter->trie == trie_local)
2411 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002412 if (IS_TNODE(l))
2413 return 0;
2414
2415 for (i=32; i>=0; i--) {
Robert Olsson772cb712005-09-19 15:31:18 -07002416 struct leaf_info *li = find_leaf_info(l, i);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002417 struct fib_alias *fa;
2418 u32 mask, prefix;
2419
2420 if (!li)
2421 continue;
2422
2423 mask = inet_make_mask(li->plen);
2424 prefix = htonl(l->key);
2425
2426 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002427 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002428 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
2429
2430 if (fa->fa_type == RTN_BROADCAST
2431 || fa->fa_type == RTN_MULTICAST)
2432 continue;
2433
2434 if (fi)
2435 snprintf(bf, sizeof(bf),
2436 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2437 fi->fib_dev ? fi->fib_dev->name : "*",
2438 prefix,
2439 fi->fib_nh->nh_gw, flags, 0, 0,
2440 fi->fib_priority,
2441 mask,
2442 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
2443 fi->fib_window,
2444 fi->fib_rtt >> 3);
2445 else
2446 snprintf(bf, sizeof(bf),
2447 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
2448 prefix, 0, flags, 0, 0, 0,
2449 mask, 0, 0, 0);
2450
2451 seq_printf(seq, "%-127s\n", bf);
2452 }
2453 }
2454
2455 return 0;
2456}
2457
2458static struct seq_operations fib_route_seq_ops = {
2459 .start = fib_trie_seq_start,
2460 .next = fib_trie_seq_next,
2461 .stop = fib_trie_seq_stop,
2462 .show = fib_route_seq_show,
2463};
2464
2465static int fib_route_seq_open(struct inode *inode, struct file *file)
2466{
2467 struct seq_file *seq;
2468 int rc = -ENOMEM;
2469 struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
2470
2471 if (!s)
2472 goto out;
2473
2474 rc = seq_open(file, &fib_route_seq_ops);
2475 if (rc)
2476 goto out_kfree;
2477
2478 seq = file->private_data;
2479 seq->private = s;
2480 memset(s, 0, sizeof(*s));
2481out:
2482 return rc;
2483out_kfree:
2484 kfree(s);
2485 goto out;
2486}
2487
2488static struct file_operations fib_route_fops = {
2489 .owner = THIS_MODULE,
2490 .open = fib_route_seq_open,
2491 .read = seq_read,
2492 .llseek = seq_lseek,
2493 .release = seq_release_private,
Robert Olsson19baf832005-06-21 12:43:18 -07002494};
2495
2496int __init fib_proc_init(void)
2497{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002498 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
2499 goto out1;
2500
2501 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
2502 goto out2;
2503
2504 if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
2505 goto out3;
2506
Robert Olsson19baf832005-06-21 12:43:18 -07002507 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002508
2509out3:
2510 proc_net_remove("fib_triestat");
2511out2:
2512 proc_net_remove("fib_trie");
2513out1:
2514 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002515}
2516
2517void __init fib_proc_exit(void)
2518{
2519 proc_net_remove("fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002520 proc_net_remove("fib_triestat");
2521 proc_net_remove("route");
Robert Olsson19baf832005-06-21 12:43:18 -07002522}
2523
2524#endif /* CONFIG_PROC_FS */