blob: 58ba9f4f2c92eade0c84b5b14c84e2754d0bbccb [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
15 * This work is based on the LPC-trie which is originally descibed in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
20 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Robert Olsson05eee482007-03-19 16:27:37 -070051#define VERSION "0.408"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
54#include <asm/system.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070055#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070056#include <linux/types.h>
57#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070058#include <linux/mm.h>
59#include <linux/string.h>
60#include <linux/socket.h>
61#include <linux/sockios.h>
62#include <linux/errno.h>
63#include <linux/in.h>
64#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080065#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070066#include <linux/netdevice.h>
67#include <linux/if_arp.h>
68#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070069#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070070#include <linux/skbuff.h>
71#include <linux/netlink.h>
72#include <linux/init.h>
73#include <linux/list.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020074#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070075#include <net/ip.h>
76#include <net/protocol.h>
77#include <net/route.h>
78#include <net/tcp.h>
79#include <net/sock.h>
80#include <net/ip_fib.h>
81#include "fib_lookup.h"
82
Robert Olsson06ef9212006-03-20 21:35:01 -080083#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070084
Robert Olsson19baf832005-06-21 12:43:18 -070085#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070086
Robert Olsson19baf832005-06-21 12:43:18 -070087typedef unsigned int t_key;
88
89#define T_TNODE 0
90#define T_LEAF 1
91#define NODE_TYPE_MASK 0x1UL
Robert Olsson2373ce12005-08-25 13:01:29 -070092#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
93
Olof Johansson91b9a272005-08-09 20:24:39 -070094#define IS_TNODE(n) (!(n->parent & T_LEAF))
95#define IS_LEAF(n) (n->parent & T_LEAF)
Robert Olsson19baf832005-06-21 12:43:18 -070096
97struct node {
Olof Johansson91b9a272005-08-09 20:24:39 -070098 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -080099 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700100};
101
102struct leaf {
Olof Johansson91b9a272005-08-09 20:24:39 -0700103 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -0800104 t_key key;
Robert Olsson19baf832005-06-21 12:43:18 -0700105 struct hlist_head list;
Robert Olsson2373ce12005-08-25 13:01:29 -0700106 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700107};
108
109struct leaf_info {
110 struct hlist_node hlist;
Robert Olsson2373ce12005-08-25 13:01:29 -0700111 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700112 int plen;
113 struct list_head falh;
114};
115
116struct tnode {
Olof Johansson91b9a272005-08-09 20:24:39 -0700117 unsigned long parent;
Eric Dumazet8d965442008-01-13 22:31:44 -0800118 t_key key;
Eric Dumazet112d8cf2008-01-12 21:27:41 -0800119 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
120 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
Eric Dumazet8d965442008-01-13 22:31:44 -0800121 unsigned int full_children; /* KEYLENGTH bits needed */
122 unsigned int empty_children; /* KEYLENGTH bits needed */
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700123 union {
124 struct rcu_head rcu;
125 struct work_struct work;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700126 struct tnode *tnode_free;
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700127 };
Olof Johansson91b9a272005-08-09 20:24:39 -0700128 struct node *child[0];
Robert Olsson19baf832005-06-21 12:43:18 -0700129};
130
131#ifdef CONFIG_IP_FIB_TRIE_STATS
132struct trie_use_stats {
133 unsigned int gets;
134 unsigned int backtrack;
135 unsigned int semantic_match_passed;
136 unsigned int semantic_match_miss;
137 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700138 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700139};
140#endif
141
142struct trie_stat {
143 unsigned int totdepth;
144 unsigned int maxdepth;
145 unsigned int tnodes;
146 unsigned int leaves;
147 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800148 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800149 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700150};
Robert Olsson19baf832005-06-21 12:43:18 -0700151
152struct trie {
Olof Johansson91b9a272005-08-09 20:24:39 -0700153 struct node *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700154#ifdef CONFIG_IP_FIB_TRIE_STATS
155 struct trie_use_stats stats;
156#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700157};
158
Robert Olsson19baf832005-06-21 12:43:18 -0700159static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800160static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
161 int wasfull);
Robert Olsson19baf832005-06-21 12:43:18 -0700162static struct node *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700163static struct tnode *inflate(struct trie *t, struct tnode *tn);
164static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700165/* tnodes to free after resize(); protected by RTNL */
166static struct tnode *tnode_free_head;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000167static size_t tnode_free_size;
168
169/*
170 * synchronize_rcu after call_rcu for that many pages; it should be especially
171 * useful before resizing the root node with PREEMPT_NONE configs; the value was
172 * obtained experimentally, aiming to avoid visible slowdown.
173 */
174static const int sync_pages = 128;
Robert Olsson19baf832005-06-21 12:43:18 -0700175
Christoph Lametere18b8902006-12-06 20:33:20 -0800176static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800177static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700178
Stephen Hemminger06801912007-08-10 15:22:13 -0700179static inline struct tnode *node_parent(struct node *node)
180{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800181 return (struct tnode *)(node->parent & ~NODE_TYPE_MASK);
182}
Stephen Hemminger06801912007-08-10 15:22:13 -0700183
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800184static inline struct tnode *node_parent_rcu(struct node *node)
185{
186 struct tnode *ret = node_parent(node);
187
Stephen Hemminger06801912007-08-10 15:22:13 -0700188 return rcu_dereference(ret);
189}
190
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700191/* Same as rcu_assign_pointer
192 * but that macro() assumes that value is a pointer.
193 */
Stephen Hemminger06801912007-08-10 15:22:13 -0700194static inline void node_set_parent(struct node *node, struct tnode *ptr)
195{
Stephen Hemminger6440cc92008-03-22 17:59:58 -0700196 smp_wmb();
197 node->parent = (unsigned long)ptr | NODE_TYPE(node);
Stephen Hemminger06801912007-08-10 15:22:13 -0700198}
Robert Olsson2373ce12005-08-25 13:01:29 -0700199
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800200static inline struct node *tnode_get_child(struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700201{
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800202 BUG_ON(i >= 1U << tn->bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700203
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800204 return tn->child[i];
205}
206
207static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
208{
209 struct node *ret = tnode_get_child(tn, i);
210
211 return rcu_dereference(ret);
Robert Olsson19baf832005-06-21 12:43:18 -0700212}
213
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700214static inline int tnode_child_length(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700215{
Olof Johansson91b9a272005-08-09 20:24:39 -0700216 return 1 << tn->bits;
Robert Olsson19baf832005-06-21 12:43:18 -0700217}
218
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700219static inline t_key mask_pfx(t_key k, unsigned short l)
220{
221 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
222}
223
Robert Olsson19baf832005-06-21 12:43:18 -0700224static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
225{
Olof Johansson91b9a272005-08-09 20:24:39 -0700226 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700227 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700228 else
Robert Olsson19baf832005-06-21 12:43:18 -0700229 return 0;
230}
231
232static inline int tkey_equals(t_key a, t_key b)
233{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700234 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700235}
236
237static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
238{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700239 if (bits == 0 || offset >= KEYLENGTH)
240 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700241 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
242 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700243}
Robert Olsson19baf832005-06-21 12:43:18 -0700244
245static inline int tkey_mismatch(t_key a, int offset, t_key b)
246{
247 t_key diff = a ^ b;
248 int i = offset;
249
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700250 if (!diff)
251 return 0;
252 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700253 i++;
254 return i;
255}
256
Robert Olsson19baf832005-06-21 12:43:18 -0700257/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900258 To understand this stuff, an understanding of keys and all their bits is
259 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700260 all of the bits in that key are significant.
261
262 Consider a node 'n' and its parent 'tp'.
263
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900264 If n is a leaf, every bit in its key is significant. Its presence is
265 necessitated by path compression, since during a tree traversal (when
266 searching for a leaf - unless we are doing an insertion) we will completely
267 ignore all skipped bits we encounter. Thus we need to verify, at the end of
268 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700269 correct key path.
270
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900271 Note that we can never "miss" the correct key in the tree if present by
272 following the wrong path. Path compression ensures that segments of the key
273 that are the same for all keys with a given prefix are skipped, but the
274 skipped part *is* identical for each node in the subtrie below the skipped
275 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700276 call to tkey_sub_equals() in trie_insert().
277
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700279 have many different meanings.
280
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900281 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700282 _________________________________________________________________
283 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
284 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900285 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700286
287 _________________________________________________________________
288 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
289 -----------------------------------------------------------------
290 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
291
292 tp->pos = 7
293 tp->bits = 3
294 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700295 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700296
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900297 First, let's just ignore the bits that come before the parent tp, that is
298 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700299 not use them for anything.
300
301 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900302 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700303 'n' among tp's children.
304
305 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
306 for the node n.
307
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900308 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700309 of the bits are really not needed or indeed known in n->key.
310
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900311 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700312 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900313
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700314
Robert Olsson19baf832005-06-21 12:43:18 -0700315 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
316 at this point.
317
318*/
319
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700320static inline void check_tnode(const struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700321{
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700322 WARN_ON(tn && tn->pos+tn->bits > 32);
Robert Olsson19baf832005-06-21 12:43:18 -0700323}
324
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800325static const int halve_threshold = 25;
326static const int inflate_threshold = 50;
Jarek Poplawski345aa032009-07-07 19:39:16 -0700327static const int halve_threshold_root = 15;
328static const int inflate_threshold_root = 25;
Robert Olsson19baf832005-06-21 12:43:18 -0700329
Robert Olsson2373ce12005-08-25 13:01:29 -0700330
331static void __alias_free_mem(struct rcu_head *head)
332{
333 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
334 kmem_cache_free(fn_alias_kmem, fa);
335}
336
337static inline void alias_free_mem_rcu(struct fib_alias *fa)
338{
339 call_rcu(&fa->rcu, __alias_free_mem);
340}
341
342static void __leaf_free_rcu(struct rcu_head *head)
343{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800344 struct leaf *l = container_of(head, struct leaf, rcu);
345 kmem_cache_free(trie_leaf_kmem, l);
Robert Olsson2373ce12005-08-25 13:01:29 -0700346}
347
Stephen Hemminger387a5482008-04-10 03:47:34 -0700348static inline void free_leaf(struct leaf *l)
349{
350 call_rcu_bh(&l->rcu, __leaf_free_rcu);
351}
352
Robert Olsson2373ce12005-08-25 13:01:29 -0700353static void __leaf_info_free_rcu(struct rcu_head *head)
354{
355 kfree(container_of(head, struct leaf_info, rcu));
356}
357
358static inline void free_leaf_info(struct leaf_info *leaf)
359{
360 call_rcu(&leaf->rcu, __leaf_info_free_rcu);
361}
362
Eric Dumazet8d965442008-01-13 22:31:44 -0800363static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700364{
Robert Olsson2373ce12005-08-25 13:01:29 -0700365 if (size <= PAGE_SIZE)
Eric Dumazet8d965442008-01-13 22:31:44 -0800366 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700367 else
368 return __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
369}
Robert Olsson2373ce12005-08-25 13:01:29 -0700370
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700371static void __tnode_vfree(struct work_struct *arg)
372{
373 struct tnode *tn = container_of(arg, struct tnode, work);
374 vfree(tn);
Robert Olsson2373ce12005-08-25 13:01:29 -0700375}
376
377static void __tnode_free_rcu(struct rcu_head *head)
378{
379 struct tnode *tn = container_of(head, struct tnode, rcu);
Eric Dumazet8d965442008-01-13 22:31:44 -0800380 size_t size = sizeof(struct tnode) +
381 (sizeof(struct node *) << tn->bits);
Robert Olsson2373ce12005-08-25 13:01:29 -0700382
383 if (size <= PAGE_SIZE)
384 kfree(tn);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700385 else {
386 INIT_WORK(&tn->work, __tnode_vfree);
387 schedule_work(&tn->work);
388 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700389}
390
391static inline void tnode_free(struct tnode *tn)
392{
Stephen Hemminger387a5482008-04-10 03:47:34 -0700393 if (IS_LEAF(tn))
394 free_leaf((struct leaf *) tn);
395 else
Robert Olsson550e29b2006-04-04 12:53:35 -0700396 call_rcu(&tn->rcu, __tnode_free_rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700397}
398
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700399static void tnode_free_safe(struct tnode *tn)
400{
401 BUG_ON(IS_LEAF(tn));
Jarek Poplawski7b855762009-06-18 00:28:51 -0700402 tn->tnode_free = tnode_free_head;
403 tnode_free_head = tn;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000404 tnode_free_size += sizeof(struct tnode) +
405 (sizeof(struct node *) << tn->bits);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700406}
407
408static void tnode_free_flush(void)
409{
410 struct tnode *tn;
411
412 while ((tn = tnode_free_head)) {
413 tnode_free_head = tn->tnode_free;
414 tn->tnode_free = NULL;
415 tnode_free(tn);
416 }
Jarek Poplawskic3059472009-07-14 08:33:08 +0000417
418 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
419 tnode_free_size = 0;
420 synchronize_rcu();
421 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700422}
423
Robert Olsson19baf832005-06-21 12:43:18 -0700424static struct leaf *leaf_new(void)
425{
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800426 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700427 if (l) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700428 l->parent = T_LEAF;
Robert Olsson19baf832005-06-21 12:43:18 -0700429 INIT_HLIST_HEAD(&l->list);
430 }
431 return l;
432}
433
434static struct leaf_info *leaf_info_new(int plen)
435{
436 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700437 if (li) {
438 li->plen = plen;
439 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700440 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700441 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700442}
443
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800444static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700445{
Eric Dumazet8d965442008-01-13 22:31:44 -0800446 size_t sz = sizeof(struct tnode) + (sizeof(struct node *) << bits);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700447 struct tnode *tn = tnode_alloc(sz);
Robert Olsson19baf832005-06-21 12:43:18 -0700448
Olof Johansson91b9a272005-08-09 20:24:39 -0700449 if (tn) {
Robert Olsson2373ce12005-08-25 13:01:29 -0700450 tn->parent = T_TNODE;
Robert Olsson19baf832005-06-21 12:43:18 -0700451 tn->pos = pos;
452 tn->bits = bits;
453 tn->key = key;
454 tn->full_children = 0;
455 tn->empty_children = 1<<bits;
456 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700457
Eric Dumazet8d965442008-01-13 22:31:44 -0800458 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode),
459 (unsigned long) (sizeof(struct node) << bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700460 return tn;
461}
462
Robert Olsson19baf832005-06-21 12:43:18 -0700463/*
464 * Check whether a tnode 'n' is "full", i.e. it is an internal node
465 * and no bits are skipped. See discussion in dyntree paper p. 6
466 */
467
Stephen Hemmignerbb435b82005-08-09 20:25:39 -0700468static inline int tnode_full(const struct tnode *tn, const struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700469{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700470 if (n == NULL || IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700471 return 0;
472
473 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
474}
475
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800476static inline void put_child(struct trie *t, struct tnode *tn, int i,
477 struct node *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700478{
479 tnode_put_child_reorg(tn, i, n, -1);
480}
481
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700482 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700483 * Add a child at position i overwriting the old value.
484 * Update the value of full_children and empty_children.
485 */
486
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800487static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n,
488 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700489{
Robert Olsson2373ce12005-08-25 13:01:29 -0700490 struct node *chi = tn->child[i];
Robert Olsson19baf832005-06-21 12:43:18 -0700491 int isfull;
492
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700493 BUG_ON(i >= 1<<tn->bits);
494
Robert Olsson19baf832005-06-21 12:43:18 -0700495 /* update emptyChildren */
496 if (n == NULL && chi != NULL)
497 tn->empty_children++;
498 else if (n != NULL && chi == NULL)
499 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700500
Robert Olsson19baf832005-06-21 12:43:18 -0700501 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700502 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700503 wasfull = tnode_full(tn, chi);
504
505 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700506 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700507 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700508 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700509 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700510
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700511 if (n)
Stephen Hemminger06801912007-08-10 15:22:13 -0700512 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700513
Robert Olsson2373ce12005-08-25 13:01:29 -0700514 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700515}
516
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700517static struct node *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700518{
519 int i;
Robert Olsson2f368952005-07-05 15:02:40 -0700520 int err = 0;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700521 struct tnode *old_tn;
Robert Olssone6308be2005-10-04 13:01:58 -0700522 int inflate_threshold_use;
523 int halve_threshold_use;
Robert Olsson05eee482007-03-19 16:27:37 -0700524 int max_resize;
Robert Olsson19baf832005-06-21 12:43:18 -0700525
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900526 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700527 return NULL;
528
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700529 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
530 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700531
532 /* No children */
533 if (tn->empty_children == tnode_child_length(tn)) {
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700534 tnode_free_safe(tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700535 return NULL;
536 }
537 /* One child */
538 if (tn->empty_children == tnode_child_length(tn) - 1)
539 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700540 struct node *n;
Robert Olsson19baf832005-06-21 12:43:18 -0700541
Olof Johansson91b9a272005-08-09 20:24:39 -0700542 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700543 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700544 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700545
546 /* compress one level */
Stephen Hemminger06801912007-08-10 15:22:13 -0700547 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700548 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700549 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700550 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700551 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700552 * Double as long as the resulting node has a number of
553 * nonempty nodes that are above the threshold.
554 */
555
556 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700557 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
558 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700559 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700560 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700561 * children in the *doubled* node is at least 'high'."
562 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700563 * 'high' in this instance is the variable 'inflate_threshold'. It
564 * is expressed as a percentage, so we multiply it with
565 * tnode_child_length() and instead of multiplying by 2 (since the
566 * child array will be doubled by inflate()) and multiplying
567 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700568 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700569 *
570 * The left-hand side may look a bit weird: tnode_child_length(tn)
571 * - tn->empty_children is of course the number of non-null children
572 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700573 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700574 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700575 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700576 *
Robert Olsson19baf832005-06-21 12:43:18 -0700577 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700578 *
Robert Olsson19baf832005-06-21 12:43:18 -0700579 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700580 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700581 * tn->full_children;
582 *
583 * new_child_length = tnode_child_length(tn) * 2;
584 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700585 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700586 * new_child_length;
587 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700588 *
589 * ...and so on, tho it would mess up the while () loop.
590 *
Robert Olsson19baf832005-06-21 12:43:18 -0700591 * anyway,
592 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
593 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700594 *
Robert Olsson19baf832005-06-21 12:43:18 -0700595 * avoid a division:
596 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
597 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700598 *
Robert Olsson19baf832005-06-21 12:43:18 -0700599 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700600 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700601 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700602 *
Robert Olsson19baf832005-06-21 12:43:18 -0700603 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700604 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700605 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700606 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700607 *
Robert Olsson19baf832005-06-21 12:43:18 -0700608 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700609 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700610 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700611 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700612 *
Robert Olsson19baf832005-06-21 12:43:18 -0700613 */
614
615 check_tnode(tn);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700616
Robert Olssone6308be2005-10-04 13:01:58 -0700617 /* Keep root node larger */
618
Stephen Hemminger132adf52007-03-08 20:44:43 -0800619 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700620 inflate_threshold_use = inflate_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900621 else
Robert Olssone6308be2005-10-04 13:01:58 -0700622 inflate_threshold_use = inflate_threshold;
623
Robert Olsson2f368952005-07-05 15:02:40 -0700624 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700625 max_resize = 10;
626 while ((tn->full_children > 0 && max_resize-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800627 50 * (tn->full_children + tnode_child_length(tn)
628 - tn->empty_children)
629 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700630
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700631 old_tn = tn;
632 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800633
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700634 if (IS_ERR(tn)) {
635 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700636#ifdef CONFIG_IP_FIB_TRIE_STATS
637 t->stats.resize_node_skipped++;
638#endif
639 break;
640 }
Robert Olsson19baf832005-06-21 12:43:18 -0700641 }
642
Robert Olsson05eee482007-03-19 16:27:37 -0700643 if (max_resize < 0) {
644 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800645 pr_warning("Fix inflate_threshold_root."
646 " Now=%d size=%d bits\n",
647 inflate_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700648 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800649 pr_warning("Fix inflate_threshold."
650 " Now=%d size=%d bits\n",
651 inflate_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700652 }
653
Robert Olsson19baf832005-06-21 12:43:18 -0700654 check_tnode(tn);
655
656 /*
657 * Halve as long as the number of empty children in this
658 * node is above threshold.
659 */
Robert Olsson2f368952005-07-05 15:02:40 -0700660
Robert Olssone6308be2005-10-04 13:01:58 -0700661
662 /* Keep root node larger */
663
Stephen Hemminger132adf52007-03-08 20:44:43 -0800664 if (!tn->parent)
Robert Olssone6308be2005-10-04 13:01:58 -0700665 halve_threshold_use = halve_threshold_root;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900666 else
Robert Olssone6308be2005-10-04 13:01:58 -0700667 halve_threshold_use = halve_threshold;
668
Robert Olsson2f368952005-07-05 15:02:40 -0700669 err = 0;
Robert Olsson05eee482007-03-19 16:27:37 -0700670 max_resize = 10;
671 while (tn->bits > 1 && max_resize-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700672 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700673 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700674
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700675 old_tn = tn;
676 tn = halve(t, tn);
677 if (IS_ERR(tn)) {
678 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700679#ifdef CONFIG_IP_FIB_TRIE_STATS
680 t->stats.resize_node_skipped++;
681#endif
682 break;
683 }
684 }
685
Robert Olsson05eee482007-03-19 16:27:37 -0700686 if (max_resize < 0) {
687 if (!tn->parent)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800688 pr_warning("Fix halve_threshold_root."
689 " Now=%d size=%d bits\n",
690 halve_threshold_root, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700691 else
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800692 pr_warning("Fix halve_threshold."
693 " Now=%d size=%d bits\n",
694 halve_threshold, tn->bits);
Robert Olsson05eee482007-03-19 16:27:37 -0700695 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700696
Robert Olsson19baf832005-06-21 12:43:18 -0700697 /* Only one child remains */
Robert Olsson19baf832005-06-21 12:43:18 -0700698 if (tn->empty_children == tnode_child_length(tn) - 1)
699 for (i = 0; i < tnode_child_length(tn); i++) {
Olof Johansson91b9a272005-08-09 20:24:39 -0700700 struct node *n;
701
Olof Johansson91b9a272005-08-09 20:24:39 -0700702 n = tn->child[i];
Robert Olsson2373ce12005-08-25 13:01:29 -0700703 if (!n)
Olof Johansson91b9a272005-08-09 20:24:39 -0700704 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -0700705
706 /* compress one level */
707
Stephen Hemminger06801912007-08-10 15:22:13 -0700708 node_set_parent(n, NULL);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700709 tnode_free_safe(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -0700710 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700711 }
712
713 return (struct node *) tn;
714}
715
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700716static struct tnode *inflate(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700717{
Robert Olsson19baf832005-06-21 12:43:18 -0700718 struct tnode *oldtnode = tn;
719 int olen = tnode_child_length(tn);
720 int i;
721
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700722 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700723
724 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
725
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700726 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700727 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700728
729 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700730 * Preallocate and store tnodes before the actual work so we
731 * don't get into an inconsistent state if memory allocation
732 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700733 * of tnode is ignored.
734 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700735
736 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800737 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700738
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800739 inode = (struct tnode *) tnode_get_child(oldtnode, i);
Robert Olsson2f368952005-07-05 15:02:40 -0700740 if (inode &&
741 IS_TNODE(inode) &&
742 inode->pos == oldtnode->pos + oldtnode->bits &&
743 inode->bits > 1) {
744 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700745 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700746
Robert Olsson2f368952005-07-05 15:02:40 -0700747 left = tnode_new(inode->key&(~m), inode->pos + 1,
748 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700749 if (!left)
750 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700751
Robert Olsson2f368952005-07-05 15:02:40 -0700752 right = tnode_new(inode->key|m, inode->pos + 1,
753 inode->bits - 1);
754
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900755 if (!right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700756 tnode_free(left);
757 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900758 }
Robert Olsson2f368952005-07-05 15:02:40 -0700759
760 put_child(t, tn, 2*i, (struct node *) left);
761 put_child(t, tn, 2*i+1, (struct node *) right);
762 }
763 }
764
Olof Johansson91b9a272005-08-09 20:24:39 -0700765 for (i = 0; i < olen; i++) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -0800766 struct tnode *inode;
Robert Olsson19baf832005-06-21 12:43:18 -0700767 struct node *node = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700768 struct tnode *left, *right;
769 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700770
Robert Olsson19baf832005-06-21 12:43:18 -0700771 /* An empty child */
772 if (node == NULL)
773 continue;
774
775 /* A leaf or an internal node with skipped bits */
776
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700777 if (IS_LEAF(node) || ((struct tnode *) node)->pos >
Robert Olsson19baf832005-06-21 12:43:18 -0700778 tn->pos + tn->bits - 1) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800779 if (tkey_extract_bits(node->key,
780 oldtnode->pos + oldtnode->bits,
781 1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700782 put_child(t, tn, 2*i, node);
783 else
784 put_child(t, tn, 2*i+1, node);
785 continue;
786 }
787
788 /* An internal node with two children */
789 inode = (struct tnode *) node;
790
791 if (inode->bits == 1) {
792 put_child(t, tn, 2*i, inode->child[0]);
793 put_child(t, tn, 2*i+1, inode->child[1]);
794
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700795 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700796 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700797 }
798
Olof Johansson91b9a272005-08-09 20:24:39 -0700799 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700800
Olof Johansson91b9a272005-08-09 20:24:39 -0700801 /* We will replace this node 'inode' with two new
802 * ones, 'left' and 'right', each with half of the
803 * original children. The two new nodes will have
804 * a position one bit further down the key and this
805 * means that the "significant" part of their keys
806 * (see the discussion near the top of this file)
807 * will differ by one bit, which will be "0" in
808 * left's key and "1" in right's key. Since we are
809 * moving the key position by one step, the bit that
810 * we are moving away from - the bit at position
811 * (inode->pos) - is the one that will differ between
812 * left and right. So... we synthesize that bit in the
813 * two new keys.
814 * The mask 'm' below will be a single "one" bit at
815 * the position (inode->pos)
816 */
Robert Olsson19baf832005-06-21 12:43:18 -0700817
Olof Johansson91b9a272005-08-09 20:24:39 -0700818 /* Use the old key, but set the new significant
819 * bit to zero.
820 */
Robert Olsson19baf832005-06-21 12:43:18 -0700821
Olof Johansson91b9a272005-08-09 20:24:39 -0700822 left = (struct tnode *) tnode_get_child(tn, 2*i);
823 put_child(t, tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700824
Olof Johansson91b9a272005-08-09 20:24:39 -0700825 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700826
Olof Johansson91b9a272005-08-09 20:24:39 -0700827 right = (struct tnode *) tnode_get_child(tn, 2*i+1);
828 put_child(t, tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700829
Olof Johansson91b9a272005-08-09 20:24:39 -0700830 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700831
Olof Johansson91b9a272005-08-09 20:24:39 -0700832 size = tnode_child_length(left);
833 for (j = 0; j < size; j++) {
834 put_child(t, left, j, inode->child[j]);
835 put_child(t, right, j, inode->child[j + size]);
Robert Olsson19baf832005-06-21 12:43:18 -0700836 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700837 put_child(t, tn, 2*i, resize(t, left));
838 put_child(t, tn, 2*i+1, resize(t, right));
839
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700840 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700841 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700842 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700843 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700844nomem:
845 {
846 int size = tnode_child_length(tn);
847 int j;
848
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700849 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700850 if (tn->child[j])
851 tnode_free((struct tnode *)tn->child[j]);
852
853 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700854
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700855 return ERR_PTR(-ENOMEM);
856 }
Robert Olsson19baf832005-06-21 12:43:18 -0700857}
858
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700859static struct tnode *halve(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700860{
861 struct tnode *oldtnode = tn;
862 struct node *left, *right;
863 int i;
864 int olen = tnode_child_length(tn);
865
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700866 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700867
868 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700869
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700870 if (!tn)
871 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700872
873 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700874 * Preallocate and store tnodes before the actual work so we
875 * don't get into an inconsistent state if memory allocation
876 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700877 * of tnode is ignored.
878 */
879
Olof Johansson91b9a272005-08-09 20:24:39 -0700880 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700881 left = tnode_get_child(oldtnode, i);
882 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700883
Robert Olsson2f368952005-07-05 15:02:40 -0700884 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700885 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700886 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700887
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700888 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700889
890 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700891 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700892
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700893 put_child(t, tn, i/2, (struct node *)newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700894 }
Robert Olsson2f368952005-07-05 15:02:40 -0700895
Robert Olsson2f368952005-07-05 15:02:40 -0700896 }
Robert Olsson19baf832005-06-21 12:43:18 -0700897
Olof Johansson91b9a272005-08-09 20:24:39 -0700898 for (i = 0; i < olen; i += 2) {
899 struct tnode *newBinNode;
900
Robert Olsson19baf832005-06-21 12:43:18 -0700901 left = tnode_get_child(oldtnode, i);
902 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700903
Robert Olsson19baf832005-06-21 12:43:18 -0700904 /* At least one of the children is empty */
905 if (left == NULL) {
906 if (right == NULL) /* Both are empty */
907 continue;
908 put_child(t, tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700909 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700910 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700911
912 if (right == NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700913 put_child(t, tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700914 continue;
915 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700916
Robert Olsson19baf832005-06-21 12:43:18 -0700917 /* Two nonempty children */
Olof Johansson91b9a272005-08-09 20:24:39 -0700918 newBinNode = (struct tnode *) tnode_get_child(tn, i/2);
919 put_child(t, tn, i/2, NULL);
Olof Johansson91b9a272005-08-09 20:24:39 -0700920 put_child(t, newBinNode, 0, left);
921 put_child(t, newBinNode, 1, right);
922 put_child(t, tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700923 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700924 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700925 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700926nomem:
927 {
928 int size = tnode_child_length(tn);
929 int j;
930
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700931 for (j = 0; j < size; j++)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700932 if (tn->child[j])
933 tnode_free((struct tnode *)tn->child[j]);
934
935 tnode_free(tn);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700936
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700937 return ERR_PTR(-ENOMEM);
938 }
Robert Olsson19baf832005-06-21 12:43:18 -0700939}
940
Robert Olsson772cb712005-09-19 15:31:18 -0700941/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700942 via get_fa_head and dump */
943
Robert Olsson772cb712005-09-19 15:31:18 -0700944static struct leaf_info *find_leaf_info(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700945{
Robert Olsson772cb712005-09-19 15:31:18 -0700946 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700947 struct hlist_node *node;
948 struct leaf_info *li;
949
Robert Olsson2373ce12005-08-25 13:01:29 -0700950 hlist_for_each_entry_rcu(li, node, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700951 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700952 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700953
Robert Olsson19baf832005-06-21 12:43:18 -0700954 return NULL;
955}
956
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800957static inline struct list_head *get_fa_head(struct leaf *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700958{
Robert Olsson772cb712005-09-19 15:31:18 -0700959 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700960
Olof Johansson91b9a272005-08-09 20:24:39 -0700961 if (!li)
962 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700963
Olof Johansson91b9a272005-08-09 20:24:39 -0700964 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700965}
966
967static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
968{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900969 struct leaf_info *li = NULL, *last = NULL;
970 struct hlist_node *node;
Robert Olsson19baf832005-06-21 12:43:18 -0700971
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900972 if (hlist_empty(head)) {
973 hlist_add_head_rcu(&new->hlist, head);
974 } else {
975 hlist_for_each_entry(li, node, head, hlist) {
976 if (new->plen > li->plen)
977 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700978
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900979 last = li;
980 }
981 if (last)
982 hlist_add_after_rcu(&last->hlist, &new->hlist);
983 else
984 hlist_add_before_rcu(&new->hlist, &li->hlist);
985 }
Robert Olsson19baf832005-06-21 12:43:18 -0700986}
987
Robert Olsson2373ce12005-08-25 13:01:29 -0700988/* rcu_read_lock needs to be hold by caller from readside */
989
Robert Olsson19baf832005-06-21 12:43:18 -0700990static struct leaf *
991fib_find_node(struct trie *t, u32 key)
992{
993 int pos;
994 struct tnode *tn;
995 struct node *n;
996
997 pos = 0;
Robert Olsson2373ce12005-08-25 13:01:29 -0700998 n = rcu_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700999
1000 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1001 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001002
Robert Olsson19baf832005-06-21 12:43:18 -07001003 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001004
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001005 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001006 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001007 n = tnode_get_child_rcu(tn,
1008 tkey_extract_bits(key,
1009 tn->pos,
1010 tn->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -07001011 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001012 break;
1013 }
1014 /* Case we have found a leaf. Compare prefixes */
1015
Olof Johansson91b9a272005-08-09 20:24:39 -07001016 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
1017 return (struct leaf *)n;
1018
Robert Olsson19baf832005-06-21 12:43:18 -07001019 return NULL;
1020}
1021
Jarek Poplawski7b855762009-06-18 00:28:51 -07001022static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -07001023{
Robert Olsson19baf832005-06-21 12:43:18 -07001024 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -07001025 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -07001026 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001027
Robert Olsson3ed18d72009-05-21 15:20:59 -07001028 key = tn->key;
1029
Stephen Hemminger06801912007-08-10 15:22:13 -07001030 while (tn != NULL && (tp = node_parent((struct node *)tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -07001031 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1032 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001033 tn = (struct tnode *) resize(t, (struct tnode *)tn);
1034
1035 tnode_put_child_reorg((struct tnode *)tp, cindex,
1036 (struct node *)tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -07001037
Stephen Hemminger06801912007-08-10 15:22:13 -07001038 tp = node_parent((struct node *) tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -07001039 if (!tp)
1040 rcu_assign_pointer(t->trie, (struct node *)tn);
1041
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -07001042 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -07001043 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -07001044 break;
Stephen Hemminger06801912007-08-10 15:22:13 -07001045 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -07001046 }
Stephen Hemminger06801912007-08-10 15:22:13 -07001047
Robert Olsson19baf832005-06-21 12:43:18 -07001048 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -07001049 if (IS_TNODE(tn))
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001050 tn = (struct tnode *)resize(t, (struct tnode *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001051
Jarek Poplawski7b855762009-06-18 00:28:51 -07001052 rcu_assign_pointer(t->trie, (struct node *)tn);
1053 tnode_free_flush();
1054
1055 return;
Robert Olsson19baf832005-06-21 12:43:18 -07001056}
1057
Robert Olsson2373ce12005-08-25 13:01:29 -07001058/* only used from updater-side */
1059
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001060static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -07001061{
1062 int pos, newpos;
1063 struct tnode *tp = NULL, *tn = NULL;
1064 struct node *n;
1065 struct leaf *l;
1066 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001067 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001068 struct leaf_info *li;
1069 t_key cindex;
1070
1071 pos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001072 n = t->trie;
Robert Olsson19baf832005-06-21 12:43:18 -07001073
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001074 /* If we point to NULL, stop. Either the tree is empty and we should
1075 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -07001076 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001077 * If we point to a T_TNODE, check if it matches our key. Note that
1078 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -07001079 * not be the parent's 'pos'+'bits'!
1080 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001081 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -07001082 * the index from our key, push the T_TNODE and walk the tree.
1083 *
1084 * If it doesn't, we have to replace it with a new T_TNODE.
1085 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001086 * If we point to a T_LEAF, it might or might not have the same key
1087 * as we do. If it does, just change the value, update the T_LEAF's
1088 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -07001089 * If it doesn't, we need to replace it with a T_TNODE.
1090 */
1091
1092 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
1093 tn = (struct tnode *) n;
Olof Johansson91b9a272005-08-09 20:24:39 -07001094
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001095 check_tnode(tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001096
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001097 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001098 tp = tn;
Olof Johansson91b9a272005-08-09 20:24:39 -07001099 pos = tn->pos + tn->bits;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001100 n = tnode_get_child(tn,
1101 tkey_extract_bits(key,
1102 tn->pos,
1103 tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -07001104
Stephen Hemminger06801912007-08-10 15:22:13 -07001105 BUG_ON(n && node_parent(n) != tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001106 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001107 break;
1108 }
1109
1110 /*
1111 * n ----> NULL, LEAF or TNODE
1112 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001113 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001114 */
1115
Olof Johansson91b9a272005-08-09 20:24:39 -07001116 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001117
1118 /* Case 1: n is a leaf. Compare prefixes */
1119
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001120 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08001121 l = (struct leaf *) n;
Robert Olsson19baf832005-06-21 12:43:18 -07001122 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001123
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001124 if (!li)
1125 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001126
1127 fa_head = &li->falh;
1128 insert_leaf_info(&l->list, li);
1129 goto done;
1130 }
Robert Olsson19baf832005-06-21 12:43:18 -07001131 l = leaf_new();
1132
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001133 if (!l)
1134 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001135
1136 l->key = key;
1137 li = leaf_info_new(plen);
1138
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001139 if (!li) {
Stephen Hemminger387a5482008-04-10 03:47:34 -07001140 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001141 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001142 }
Robert Olsson19baf832005-06-21 12:43:18 -07001143
1144 fa_head = &li->falh;
1145 insert_leaf_info(&l->list, li);
1146
Robert Olsson19baf832005-06-21 12:43:18 -07001147 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001148 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001149
Stephen Hemminger06801912007-08-10 15:22:13 -07001150 node_set_parent((struct node *)l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001151
Olof Johansson91b9a272005-08-09 20:24:39 -07001152 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1153 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
1154 } else {
1155 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001156 /*
1157 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001158 * first tnode need some special handling
1159 */
1160
1161 if (tp)
Olof Johansson91b9a272005-08-09 20:24:39 -07001162 pos = tp->pos+tp->bits;
Robert Olsson19baf832005-06-21 12:43:18 -07001163 else
Olof Johansson91b9a272005-08-09 20:24:39 -07001164 pos = 0;
1165
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001166 if (n) {
Robert Olsson19baf832005-06-21 12:43:18 -07001167 newpos = tkey_mismatch(key, pos, n->key);
1168 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001169 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001170 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001171 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001172 }
Robert Olsson19baf832005-06-21 12:43:18 -07001173
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001174 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001175 free_leaf_info(li);
Stephen Hemminger387a5482008-04-10 03:47:34 -07001176 free_leaf(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001177 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001178 }
1179
Stephen Hemminger06801912007-08-10 15:22:13 -07001180 node_set_parent((struct node *)tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001181
Olof Johansson91b9a272005-08-09 20:24:39 -07001182 missbit = tkey_extract_bits(key, newpos, 1);
Robert Olsson19baf832005-06-21 12:43:18 -07001183 put_child(t, tn, missbit, (struct node *)l);
1184 put_child(t, tn, 1-missbit, n);
1185
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001186 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001187 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001188 put_child(t, (struct tnode *)tp, cindex,
1189 (struct node *)tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001190 } else {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001191 rcu_assign_pointer(t->trie, (struct node *)tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001192 tp = tn;
1193 }
1194 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001195
1196 if (tp && tp->pos + tp->bits > 32)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001197 pr_warning("fib_trie"
1198 " tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1199 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001200
Robert Olsson19baf832005-06-21 12:43:18 -07001201 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001202
Jarek Poplawski7b855762009-06-18 00:28:51 -07001203 trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001204done:
Robert Olsson19baf832005-06-21 12:43:18 -07001205 return fa_head;
1206}
1207
Robert Olssond562f1f2007-03-26 14:22:22 -07001208/*
1209 * Caller must hold RTNL.
1210 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001211static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001212{
1213 struct trie *t = (struct trie *) tb->tb_data;
1214 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001215 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001216 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001217 int plen = cfg->fc_dst_len;
1218 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001219 u32 key, mask;
1220 int err;
1221 struct leaf *l;
1222
1223 if (plen > 32)
1224 return -EINVAL;
1225
Thomas Graf4e902c52006-08-17 18:14:52 -07001226 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001227
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001228 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001229
Olof Johansson91b9a272005-08-09 20:24:39 -07001230 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001231
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001232 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001233 return -EINVAL;
1234
1235 key = key & mask;
1236
Thomas Graf4e902c52006-08-17 18:14:52 -07001237 fi = fib_create_info(cfg);
1238 if (IS_ERR(fi)) {
1239 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001240 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001241 }
Robert Olsson19baf832005-06-21 12:43:18 -07001242
1243 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001244 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001245
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001246 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001247 fa_head = get_fa_head(l, plen);
1248 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1249 }
1250
1251 /* Now fa, if non-NULL, points to the first fib alias
1252 * with the same keys [prefix,tos,priority], if such key already
1253 * exists or to the node before which we will insert new one.
1254 *
1255 * If fa is NULL, we will need to allocate a new one and
1256 * insert to the head of f.
1257 *
1258 * If f is NULL, no fib node matched the destination key
1259 * and we need to allocate a new one of those as well.
1260 */
1261
Julian Anastasov936f6f82008-01-28 21:18:06 -08001262 if (fa && fa->fa_tos == tos &&
1263 fa->fa_info->fib_priority == fi->fib_priority) {
1264 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001265
1266 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001267 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001268 goto out;
1269
Julian Anastasov936f6f82008-01-28 21:18:06 -08001270 /* We have 2 goals:
1271 * 1. Find exact match for type, scope, fib_info to avoid
1272 * duplicate routes
1273 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1274 */
1275 fa_match = NULL;
1276 fa_first = fa;
1277 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1278 list_for_each_entry_continue(fa, fa_head, fa_list) {
1279 if (fa->fa_tos != tos)
1280 break;
1281 if (fa->fa_info->fib_priority != fi->fib_priority)
1282 break;
1283 if (fa->fa_type == cfg->fc_type &&
1284 fa->fa_scope == cfg->fc_scope &&
1285 fa->fa_info == fi) {
1286 fa_match = fa;
1287 break;
1288 }
1289 }
1290
Thomas Graf4e902c52006-08-17 18:14:52 -07001291 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001292 struct fib_info *fi_drop;
1293 u8 state;
1294
Julian Anastasov936f6f82008-01-28 21:18:06 -08001295 fa = fa_first;
1296 if (fa_match) {
1297 if (fa == fa_match)
1298 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001299 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001300 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001301 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001302 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001303 if (new_fa == NULL)
1304 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001305
1306 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001307 new_fa->fa_tos = fa->fa_tos;
1308 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001309 new_fa->fa_type = cfg->fc_type;
1310 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001311 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001312 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001313
Robert Olsson2373ce12005-08-25 13:01:29 -07001314 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1315 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001316
1317 fib_release_info(fi_drop);
1318 if (state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001319 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Milan Kocianb8f55832007-05-23 14:55:06 -07001320 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1321 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001322
Olof Johansson91b9a272005-08-09 20:24:39 -07001323 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001324 }
1325 /* Error if we find a perfect match which
1326 * uses the same scope, type, and nexthop
1327 * information.
1328 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001329 if (fa_match)
1330 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001331
Thomas Graf4e902c52006-08-17 18:14:52 -07001332 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001333 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001334 }
1335 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001336 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001337 goto out;
1338
1339 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001340 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001341 if (new_fa == NULL)
1342 goto out;
1343
1344 new_fa->fa_info = fi;
1345 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001346 new_fa->fa_type = cfg->fc_type;
1347 new_fa->fa_scope = cfg->fc_scope;
Robert Olsson19baf832005-06-21 12:43:18 -07001348 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001349 /*
1350 * Insert new entry to the list.
1351 */
1352
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001353 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001354 fa_head = fib_insert_node(t, key, plen);
1355 if (unlikely(!fa_head)) {
1356 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001357 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001358 }
Robert Olssonf835e472005-06-28 15:00:39 -07001359 }
Robert Olsson19baf832005-06-21 12:43:18 -07001360
Robert Olsson2373ce12005-08-25 13:01:29 -07001361 list_add_tail_rcu(&new_fa->fa_list,
1362 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001363
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001364 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Thomas Graf4e902c52006-08-17 18:14:52 -07001365 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001366 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001367succeeded:
1368 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001369
1370out_free_new_fa:
1371 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001372out:
1373 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001374err:
Robert Olsson19baf832005-06-21 12:43:18 -07001375 return err;
1376}
1377
Robert Olsson772cb712005-09-19 15:31:18 -07001378/* should be called with rcu_read_lock */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001379static int check_leaf(struct trie *t, struct leaf *l,
1380 t_key key, const struct flowi *flp,
1381 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001382{
Robert Olsson19baf832005-06-21 12:43:18 -07001383 struct leaf_info *li;
1384 struct hlist_head *hhead = &l->list;
1385 struct hlist_node *node;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001386
Robert Olsson2373ce12005-08-25 13:01:29 -07001387 hlist_for_each_entry_rcu(li, node, hhead, hlist) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001388 int err;
1389 int plen = li->plen;
1390 __be32 mask = inet_make_mask(plen);
1391
Al Viro888454c2006-09-19 13:42:46 -07001392 if (l->key != (key & ntohl(mask)))
Robert Olsson19baf832005-06-21 12:43:18 -07001393 continue;
1394
Rami Rosene204a342009-05-18 01:19:12 +00001395 err = fib_semantic_match(&li->falh, flp, res, plen);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001396
Robert Olsson19baf832005-06-21 12:43:18 -07001397#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001398 if (err <= 0)
Robert Olsson19baf832005-06-21 12:43:18 -07001399 t->stats.semantic_match_passed++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001400 else
1401 t->stats.semantic_match_miss++;
Robert Olsson19baf832005-06-21 12:43:18 -07001402#endif
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001403 if (err <= 0)
Ben Hutchings2e655572008-07-10 16:52:52 -07001404 return err;
Robert Olsson19baf832005-06-21 12:43:18 -07001405 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001406
Ben Hutchings2e655572008-07-10 16:52:52 -07001407 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001408}
1409
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001410static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp,
1411 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001412{
1413 struct trie *t = (struct trie *) tb->tb_data;
Ben Hutchings2e655572008-07-10 16:52:52 -07001414 int ret;
Robert Olsson19baf832005-06-21 12:43:18 -07001415 struct node *n;
1416 struct tnode *pn;
1417 int pos, bits;
Olof Johansson91b9a272005-08-09 20:24:39 -07001418 t_key key = ntohl(flp->fl4_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001419 int chopped_off;
1420 t_key cindex = 0;
1421 int current_prefix_length = KEYLENGTH;
Olof Johansson91b9a272005-08-09 20:24:39 -07001422 struct tnode *cn;
1423 t_key node_prefix, key_prefix, pref_mismatch;
1424 int mp;
1425
Robert Olsson2373ce12005-08-25 13:01:29 -07001426 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001427
Robert Olsson2373ce12005-08-25 13:01:29 -07001428 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001429 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001430 goto failed;
1431
1432#ifdef CONFIG_IP_FIB_TRIE_STATS
1433 t->stats.gets++;
1434#endif
1435
1436 /* Just a leaf? */
1437 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001438 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001439 goto found;
Robert Olsson19baf832005-06-21 12:43:18 -07001440 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001441
Robert Olsson19baf832005-06-21 12:43:18 -07001442 pn = (struct tnode *) n;
1443 chopped_off = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001444
Olof Johansson91b9a272005-08-09 20:24:39 -07001445 while (pn) {
Robert Olsson19baf832005-06-21 12:43:18 -07001446 pos = pn->pos;
1447 bits = pn->bits;
1448
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001449 if (!chopped_off)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001450 cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length),
1451 pos, bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001452
1453 n = tnode_get_child(pn, cindex);
1454
1455 if (n == NULL) {
1456#ifdef CONFIG_IP_FIB_TRIE_STATS
1457 t->stats.null_node_hit++;
1458#endif
1459 goto backtrace;
1460 }
1461
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001462 if (IS_LEAF(n)) {
Ben Hutchings2e655572008-07-10 16:52:52 -07001463 ret = check_leaf(t, (struct leaf *)n, key, flp, res);
1464 if (ret > 0)
Olof Johansson91b9a272005-08-09 20:24:39 -07001465 goto backtrace;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001466 goto found;
Olof Johansson91b9a272005-08-09 20:24:39 -07001467 }
1468
Olof Johansson91b9a272005-08-09 20:24:39 -07001469 cn = (struct tnode *)n;
1470
1471 /*
1472 * It's a tnode, and we can do some extra checks here if we
1473 * like, to avoid descending into a dead-end branch.
1474 * This tnode is in the parent's child array at index
1475 * key[p_pos..p_pos+p_bits] but potentially with some bits
1476 * chopped off, so in reality the index may be just a
1477 * subprefix, padded with zero at the end.
1478 * We can also take a look at any skipped bits in this
1479 * tnode - everything up to p_pos is supposed to be ok,
1480 * and the non-chopped bits of the index (se previous
1481 * paragraph) are also guaranteed ok, but the rest is
1482 * considered unknown.
1483 *
1484 * The skipped bits are key[pos+bits..cn->pos].
1485 */
1486
1487 /* If current_prefix_length < pos+bits, we are already doing
1488 * actual prefix matching, which means everything from
1489 * pos+(bits-chopped_off) onward must be zero along some
1490 * branch of this subtree - otherwise there is *no* valid
1491 * prefix present. Here we can only check the skipped
1492 * bits. Remember, since we have already indexed into the
1493 * parent's child array, we know that the bits we chopped of
1494 * *are* zero.
1495 */
1496
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001497 /* NOTA BENE: Checking only skipped bits
1498 for the new node here */
Olof Johansson91b9a272005-08-09 20:24:39 -07001499
1500 if (current_prefix_length < pos+bits) {
1501 if (tkey_extract_bits(cn->key, current_prefix_length,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001502 cn->pos - current_prefix_length)
1503 || !(cn->child[0]))
Olof Johansson91b9a272005-08-09 20:24:39 -07001504 goto backtrace;
1505 }
1506
1507 /*
1508 * If chopped_off=0, the index is fully validated and we
1509 * only need to look at the skipped bits for this, the new,
1510 * tnode. What we actually want to do is to find out if
1511 * these skipped bits match our key perfectly, or if we will
1512 * have to count on finding a matching prefix further down,
1513 * because if we do, we would like to have some way of
1514 * verifying the existence of such a prefix at this point.
1515 */
1516
1517 /* The only thing we can do at this point is to verify that
1518 * any such matching prefix can indeed be a prefix to our
1519 * key, and if the bits in the node we are inspecting that
1520 * do not match our key are not ZERO, this cannot be true.
1521 * Thus, find out where there is a mismatch (before cn->pos)
1522 * and verify that all the mismatching bits are zero in the
1523 * new tnode's key.
1524 */
1525
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001526 /*
1527 * Note: We aren't very concerned about the piece of
1528 * the key that precede pn->pos+pn->bits, since these
1529 * have already been checked. The bits after cn->pos
1530 * aren't checked since these are by definition
1531 * "unknown" at this point. Thus, what we want to see
1532 * is if we are about to enter the "prefix matching"
1533 * state, and in that case verify that the skipped
1534 * bits that will prevail throughout this subtree are
1535 * zero, as they have to be if we are to find a
1536 * matching prefix.
Olof Johansson91b9a272005-08-09 20:24:39 -07001537 */
1538
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07001539 node_prefix = mask_pfx(cn->key, cn->pos);
1540 key_prefix = mask_pfx(key, cn->pos);
Olof Johansson91b9a272005-08-09 20:24:39 -07001541 pref_mismatch = key_prefix^node_prefix;
1542 mp = 0;
1543
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001544 /*
1545 * In short: If skipped bits in this node do not match
1546 * the search key, enter the "prefix matching"
1547 * state.directly.
Olof Johansson91b9a272005-08-09 20:24:39 -07001548 */
1549 if (pref_mismatch) {
1550 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1551 mp++;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001552 pref_mismatch = pref_mismatch << 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001553 }
1554 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1555
1556 if (key_prefix != 0)
1557 goto backtrace;
1558
1559 if (current_prefix_length >= cn->pos)
1560 current_prefix_length = mp;
1561 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001562
Olof Johansson91b9a272005-08-09 20:24:39 -07001563 pn = (struct tnode *)n; /* Descend */
1564 chopped_off = 0;
1565 continue;
1566
Robert Olsson19baf832005-06-21 12:43:18 -07001567backtrace:
1568 chopped_off++;
1569
1570 /* As zero don't change the child key (cindex) */
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001571 while ((chopped_off <= pn->bits)
1572 && !(cindex & (1<<(chopped_off-1))))
Robert Olsson19baf832005-06-21 12:43:18 -07001573 chopped_off++;
Robert Olsson19baf832005-06-21 12:43:18 -07001574
1575 /* Decrease current_... with bits chopped off */
1576 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001577 current_prefix_length = pn->pos + pn->bits
1578 - chopped_off;
Olof Johansson91b9a272005-08-09 20:24:39 -07001579
Robert Olsson19baf832005-06-21 12:43:18 -07001580 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001581 * Either we do the actual chop off according or if we have
Robert Olsson19baf832005-06-21 12:43:18 -07001582 * chopped off all bits in this tnode walk up to our parent.
1583 */
1584
Olof Johansson91b9a272005-08-09 20:24:39 -07001585 if (chopped_off <= pn->bits) {
Robert Olsson19baf832005-06-21 12:43:18 -07001586 cindex &= ~(1 << (chopped_off-1));
Olof Johansson91b9a272005-08-09 20:24:39 -07001587 } else {
Stephen Hemminger06801912007-08-10 15:22:13 -07001588 struct tnode *parent = node_parent((struct node *) pn);
1589 if (!parent)
Robert Olsson19baf832005-06-21 12:43:18 -07001590 goto failed;
Olof Johansson91b9a272005-08-09 20:24:39 -07001591
Robert Olsson19baf832005-06-21 12:43:18 -07001592 /* Get Child's index */
Stephen Hemminger06801912007-08-10 15:22:13 -07001593 cindex = tkey_extract_bits(pn->key, parent->pos, parent->bits);
1594 pn = parent;
Robert Olsson19baf832005-06-21 12:43:18 -07001595 chopped_off = 0;
1596
1597#ifdef CONFIG_IP_FIB_TRIE_STATS
1598 t->stats.backtrack++;
1599#endif
1600 goto backtrace;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001601 }
Robert Olsson19baf832005-06-21 12:43:18 -07001602 }
1603failed:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001604 ret = 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001605found:
Robert Olsson2373ce12005-08-25 13:01:29 -07001606 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001607 return ret;
1608}
1609
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001610/*
1611 * Remove the leaf and return parent.
1612 */
1613static void trie_leaf_remove(struct trie *t, struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001614{
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001615 struct tnode *tp = node_parent((struct node *) l);
Robert Olsson19baf832005-06-21 12:43:18 -07001616
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001617 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001618
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001619 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001620 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Robert Olsson19baf832005-06-21 12:43:18 -07001621 put_child(t, (struct tnode *)tp, cindex, NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001622 trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001623 } else
Robert Olsson2373ce12005-08-25 13:01:29 -07001624 rcu_assign_pointer(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001625
Stephen Hemminger387a5482008-04-10 03:47:34 -07001626 free_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001627}
1628
Robert Olssond562f1f2007-03-26 14:22:22 -07001629/*
1630 * Caller must hold RTNL.
1631 */
Thomas Graf4e902c52006-08-17 18:14:52 -07001632static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001633{
1634 struct trie *t = (struct trie *) tb->tb_data;
1635 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001636 int plen = cfg->fc_dst_len;
1637 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001638 struct fib_alias *fa, *fa_to_delete;
1639 struct list_head *fa_head;
1640 struct leaf *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001641 struct leaf_info *li;
1642
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001643 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001644 return -EINVAL;
1645
Thomas Graf4e902c52006-08-17 18:14:52 -07001646 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001647 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001648
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001649 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001650 return -EINVAL;
1651
1652 key = key & mask;
1653 l = fib_find_node(t, key);
1654
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001655 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001656 return -ESRCH;
1657
1658 fa_head = get_fa_head(l, plen);
1659 fa = fib_find_alias(fa_head, tos, 0);
1660
1661 if (!fa)
1662 return -ESRCH;
1663
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001664 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001665
1666 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001667 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1668 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001669 struct fib_info *fi = fa->fa_info;
1670
1671 if (fa->fa_tos != tos)
1672 break;
1673
Thomas Graf4e902c52006-08-17 18:14:52 -07001674 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1675 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
1676 fa->fa_scope == cfg->fc_scope) &&
1677 (!cfg->fc_protocol ||
1678 fi->fib_protocol == cfg->fc_protocol) &&
1679 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001680 fa_to_delete = fa;
1681 break;
1682 }
1683 }
1684
Olof Johansson91b9a272005-08-09 20:24:39 -07001685 if (!fa_to_delete)
1686 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001687
Olof Johansson91b9a272005-08-09 20:24:39 -07001688 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001689 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001690 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001691
Olof Johansson91b9a272005-08-09 20:24:39 -07001692 l = fib_find_node(t, key);
Robert Olsson772cb712005-09-19 15:31:18 -07001693 li = find_leaf_info(l, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001694
Robert Olsson2373ce12005-08-25 13:01:29 -07001695 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001696
Olof Johansson91b9a272005-08-09 20:24:39 -07001697 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001698 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001699 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001700 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001701
1702 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001703 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001704
1705 if (fa->fa_state & FA_S_ACCESSED)
Denis V. Lunev76e6ebf2008-07-05 19:00:44 -07001706 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001707
Robert Olsson2373ce12005-08-25 13:01:29 -07001708 fib_release_info(fa->fa_info);
1709 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001710 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001711}
1712
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001713static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001714{
1715 struct fib_alias *fa, *fa_node;
1716 int found = 0;
1717
1718 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1719 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001720
Robert Olsson2373ce12005-08-25 13:01:29 -07001721 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1722 list_del_rcu(&fa->fa_list);
1723 fib_release_info(fa->fa_info);
1724 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001725 found++;
1726 }
1727 }
1728 return found;
1729}
1730
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001731static int trie_flush_leaf(struct leaf *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001732{
1733 int found = 0;
1734 struct hlist_head *lih = &l->list;
1735 struct hlist_node *node, *tmp;
1736 struct leaf_info *li = NULL;
1737
1738 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001739 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001740
1741 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001742 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001743 free_leaf_info(li);
1744 }
1745 }
1746 return found;
1747}
1748
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001749/*
1750 * Scan for the next right leaf starting at node p->child[idx]
1751 * Since we have back pointer, no recursion necessary.
1752 */
1753static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001754{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001755 do {
1756 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001757
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001758 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001759 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001760 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001761 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001762
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001763 while (idx < 1u << p->bits) {
1764 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001765 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001766 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001767
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001768 if (IS_LEAF(c)) {
1769 prefetch(p->child[idx]);
1770 return (struct leaf *) c;
Robert Olsson19baf832005-06-21 12:43:18 -07001771 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001772
1773 /* Rescan start scanning in new node */
1774 p = (struct tnode *) c;
1775 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001776 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001777
1778 /* Node empty, walk back up to parent */
Olof Johansson91b9a272005-08-09 20:24:39 -07001779 c = (struct node *) p;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001780 } while ( (p = node_parent_rcu(c)) != NULL);
1781
1782 return NULL; /* Root of trie */
1783}
1784
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001785static struct leaf *trie_firstleaf(struct trie *t)
1786{
1787 struct tnode *n = (struct tnode *) rcu_dereference(t->trie);
1788
1789 if (!n)
1790 return NULL;
1791
1792 if (IS_LEAF(n)) /* trie is just a leaf */
1793 return (struct leaf *) n;
1794
1795 return leaf_walk_rcu(n, NULL);
1796}
1797
1798static struct leaf *trie_nextleaf(struct leaf *l)
1799{
1800 struct node *c = (struct node *) l;
1801 struct tnode *p = node_parent(c);
1802
1803 if (!p)
1804 return NULL; /* trie with just one leaf */
1805
1806 return leaf_walk_rcu(p, c);
Robert Olsson19baf832005-06-21 12:43:18 -07001807}
1808
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001809static struct leaf *trie_leafindex(struct trie *t, int index)
1810{
1811 struct leaf *l = trie_firstleaf(t);
1812
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001813 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001814 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001815
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001816 return l;
1817}
1818
1819
Robert Olssond562f1f2007-03-26 14:22:22 -07001820/*
1821 * Caller must hold RTNL.
1822 */
Robert Olsson19baf832005-06-21 12:43:18 -07001823static int fn_trie_flush(struct fib_table *tb)
1824{
1825 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001826 struct leaf *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001827 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001828
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001829 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001830 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001831
1832 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001833 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001834 ll = l;
1835 }
1836
1837 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001838 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001839
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001840 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001841 return found;
1842}
1843
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001844static void fn_trie_select_default(struct fib_table *tb,
1845 const struct flowi *flp,
1846 struct fib_result *res)
Robert Olsson19baf832005-06-21 12:43:18 -07001847{
1848 struct trie *t = (struct trie *) tb->tb_data;
1849 int order, last_idx;
1850 struct fib_info *fi = NULL;
1851 struct fib_info *last_resort;
1852 struct fib_alias *fa = NULL;
1853 struct list_head *fa_head;
1854 struct leaf *l;
1855
1856 last_idx = -1;
1857 last_resort = NULL;
1858 order = -1;
1859
Robert Olsson2373ce12005-08-25 13:01:29 -07001860 rcu_read_lock();
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001861
Robert Olsson19baf832005-06-21 12:43:18 -07001862 l = fib_find_node(t, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001863 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001864 goto out;
1865
1866 fa_head = get_fa_head(l, 0);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001867 if (!fa_head)
Robert Olsson19baf832005-06-21 12:43:18 -07001868 goto out;
1869
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001870 if (list_empty(fa_head))
Robert Olsson19baf832005-06-21 12:43:18 -07001871 goto out;
1872
Robert Olsson2373ce12005-08-25 13:01:29 -07001873 list_for_each_entry_rcu(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001874 struct fib_info *next_fi = fa->fa_info;
Olof Johansson91b9a272005-08-09 20:24:39 -07001875
Robert Olsson19baf832005-06-21 12:43:18 -07001876 if (fa->fa_scope != res->scope ||
1877 fa->fa_type != RTN_UNICAST)
1878 continue;
Olof Johansson91b9a272005-08-09 20:24:39 -07001879
Robert Olsson19baf832005-06-21 12:43:18 -07001880 if (next_fi->fib_priority > res->fi->fib_priority)
1881 break;
1882 if (!next_fi->fib_nh[0].nh_gw ||
1883 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1884 continue;
1885 fa->fa_state |= FA_S_ACCESSED;
Olof Johansson91b9a272005-08-09 20:24:39 -07001886
Robert Olsson19baf832005-06-21 12:43:18 -07001887 if (fi == NULL) {
1888 if (next_fi != res->fi)
1889 break;
1890 } else if (!fib_detect_death(fi, order, &last_resort,
Denis V. Lunev971b8932007-12-08 00:32:23 -08001891 &last_idx, tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001892 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001893 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001894 goto out;
1895 }
1896 fi = next_fi;
1897 order++;
1898 }
1899 if (order <= 0 || fi == NULL) {
Denis V. Lunev971b8932007-12-08 00:32:23 -08001900 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001901 goto out;
1902 }
1903
Denis V. Lunev971b8932007-12-08 00:32:23 -08001904 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
1905 tb->tb_default)) {
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001906 fib_result_assign(res, fi);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001907 tb->tb_default = order;
Robert Olsson19baf832005-06-21 12:43:18 -07001908 goto out;
1909 }
Denis V. Luneva2bbe682007-12-08 00:31:44 -08001910 if (last_idx >= 0)
1911 fib_result_assign(res, last_resort);
Denis V. Lunev971b8932007-12-08 00:32:23 -08001912 tb->tb_default = last_idx;
1913out:
Robert Olsson2373ce12005-08-25 13:01:29 -07001914 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001915}
1916
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001917static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1918 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001919 struct sk_buff *skb, struct netlink_callback *cb)
1920{
1921 int i, s_i;
1922 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001923 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001924
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001925 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001926 i = 0;
1927
Robert Olsson2373ce12005-08-25 13:01:29 -07001928 /* rcu_read_lock is hold by caller */
1929
1930 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001931 if (i < s_i) {
1932 i++;
1933 continue;
1934 }
Robert Olsson19baf832005-06-21 12:43:18 -07001935
1936 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1937 cb->nlh->nlmsg_seq,
1938 RTM_NEWROUTE,
1939 tb->tb_id,
1940 fa->fa_type,
1941 fa->fa_scope,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001942 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001943 plen,
1944 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001945 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001946 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001947 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001948 }
Robert Olsson19baf832005-06-21 12:43:18 -07001949 i++;
1950 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001951 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001952 return skb->len;
1953}
1954
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001955static int fn_trie_dump_leaf(struct leaf *l, struct fib_table *tb,
1956 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001957{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001958 struct leaf_info *li;
1959 struct hlist_node *node;
1960 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001961
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001962 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001963 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001964
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001965 /* rcu_read_lock is hold by caller */
1966 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
1967 if (i < s_i) {
1968 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001969 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001970 }
Robert Olsson19baf832005-06-21 12:43:18 -07001971
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001972 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001973 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001974
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001975 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001976 continue;
1977
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001978 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001979 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001980 return -1;
1981 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001982 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001983 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001984
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001985 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001986 return skb->len;
1987}
1988
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001989static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb,
1990 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001991{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001992 struct leaf *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001993 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001994 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001995 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001996
Robert Olsson2373ce12005-08-25 13:01:29 -07001997 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001998 /* Dump starting at last key.
1999 * Note: 0.0.0.0/0 (ie default) is first key.
2000 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002001 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002002 l = trie_firstleaf(t);
2003 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002004 /* Normally, continue from last key, but if that is missing
2005 * fallback to using slow rescan
2006 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002007 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002008 if (!l)
2009 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002010 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002011
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002012 while (l) {
2013 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002014 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002015 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002016 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002017 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002018 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002019
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002020 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08002021 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002022 memset(&cb->args[4], 0,
2023 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07002024 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08002025 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07002026 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08002027
Robert Olsson19baf832005-06-21 12:43:18 -07002028 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07002029}
2030
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002031void __init fib_hash_init(void)
2032{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002033 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
2034 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08002035 0, SLAB_PANIC, NULL);
2036
2037 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
2038 max(sizeof(struct leaf),
2039 sizeof(struct leaf_info)),
2040 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002041}
Robert Olsson19baf832005-06-21 12:43:18 -07002042
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08002043
2044/* Fix more generic FIB names for init later */
2045struct fib_table *fib_hash_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07002046{
2047 struct fib_table *tb;
2048 struct trie *t;
2049
Robert Olsson19baf832005-06-21 12:43:18 -07002050 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
2051 GFP_KERNEL);
2052 if (tb == NULL)
2053 return NULL;
2054
2055 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08002056 tb->tb_default = -1;
Robert Olsson19baf832005-06-21 12:43:18 -07002057 tb->tb_lookup = fn_trie_lookup;
2058 tb->tb_insert = fn_trie_insert;
2059 tb->tb_delete = fn_trie_delete;
2060 tb->tb_flush = fn_trie_flush;
2061 tb->tb_select_default = fn_trie_select_default;
2062 tb->tb_dump = fn_trie_dump;
Robert Olsson19baf832005-06-21 12:43:18 -07002063
2064 t = (struct trie *) tb->tb_data;
Stephen Hemmingerc28a1cf2008-01-12 20:49:13 -08002065 memset(t, 0, sizeof(*t));
Robert Olsson19baf832005-06-21 12:43:18 -07002066
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002067 if (id == RT_TABLE_LOCAL)
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002068 pr_info("IPv4 FIB: Using LC-trie version %s\n", VERSION);
Robert Olsson19baf832005-06-21 12:43:18 -07002069
2070 return tb;
2071}
2072
Robert Olsson19baf832005-06-21 12:43:18 -07002073#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002074/* Depth first Trie walk iterator */
2075struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002076 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002077 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002078 struct tnode *tnode;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002079 unsigned index;
2080 unsigned depth;
2081};
Robert Olsson19baf832005-06-21 12:43:18 -07002082
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002083static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07002084{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002085 struct tnode *tn = iter->tnode;
2086 unsigned cindex = iter->index;
2087 struct tnode *p;
2088
Eric W. Biederman6640e692007-01-24 14:42:04 -08002089 /* A single entry routing table */
2090 if (!tn)
2091 return NULL;
2092
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002093 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
2094 iter->tnode, iter->index, iter->depth);
2095rescan:
2096 while (cindex < (1<<tn->bits)) {
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002097 struct node *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002098
2099 if (n) {
2100 if (IS_LEAF(n)) {
2101 iter->tnode = tn;
2102 iter->index = cindex + 1;
2103 } else {
2104 /* push down one level */
2105 iter->tnode = (struct tnode *) n;
2106 iter->index = 0;
2107 ++iter->depth;
2108 }
2109 return n;
2110 }
2111
2112 ++cindex;
2113 }
2114
2115 /* Current node exhausted, pop back up */
Eric Dumazetb59cfbf2008-01-18 03:31:36 -08002116 p = node_parent_rcu((struct node *)tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002117 if (p) {
2118 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
2119 tn = p;
2120 --iter->depth;
2121 goto rescan;
2122 }
2123
2124 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07002125 return NULL;
2126}
2127
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002128static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
2129 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07002130{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002131 struct node *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002132
Stephen Hemminger132adf52007-03-08 20:44:43 -08002133 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002134 return NULL;
2135
2136 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002137 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08002138 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002139
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002140 if (IS_TNODE(n)) {
2141 iter->tnode = (struct tnode *) n;
2142 iter->index = 0;
2143 iter->depth = 1;
2144 } else {
2145 iter->tnode = NULL;
2146 iter->index = 0;
2147 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002148 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002149
2150 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002151}
2152
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002153static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07002154{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002155 struct node *n;
2156 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07002157
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002158 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07002159
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002160 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002161 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002162 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08002163 struct leaf *l = (struct leaf *)n;
2164 struct leaf_info *li;
2165 struct hlist_node *tmp;
2166
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002167 s->leaves++;
2168 s->totdepth += iter.depth;
2169 if (iter.depth > s->maxdepth)
2170 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08002171
2172 hlist_for_each_entry_rcu(li, tmp, &l->list, hlist)
2173 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002174 } else {
2175 const struct tnode *tn = (const struct tnode *) n;
2176 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07002177
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002178 s->tnodes++;
Stephen Hemminger132adf52007-03-08 20:44:43 -08002179 if (tn->bits < MAX_STAT_DEPTH)
Robert Olsson06ef9212006-03-20 21:35:01 -08002180 s->nodesizes[tn->bits]++;
2181
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002182 for (i = 0; i < (1<<tn->bits); i++)
2183 if (!tn->child[i])
2184 s->nullpointers++;
2185 }
2186 }
2187 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002188}
2189
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002190/*
Robert Olsson19baf832005-06-21 12:43:18 -07002191 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07002192 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002193static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07002194{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002195 unsigned i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07002196
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002197 if (stat->leaves)
2198 avdepth = stat->totdepth*100 / stat->leaves;
2199 else
2200 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07002201
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002202 seq_printf(seq, "\tAver depth: %u.%02d\n",
2203 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002204 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07002205
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002206 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002207 bytes = sizeof(struct leaf) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08002208
2209 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
2210 bytes += sizeof(struct leaf_info) * stat->prefixes;
2211
Stephen Hemminger187b5182008-01-12 20:55:55 -08002212 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002213 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002214
Robert Olsson06ef9212006-03-20 21:35:01 -08002215 max = MAX_STAT_DEPTH;
2216 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002217 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002218
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002219 pointers = 0;
2220 for (i = 1; i <= max; i++)
2221 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002222 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002223 pointers += (1<<i) * stat->nodesizes[i];
2224 }
2225 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002226 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002227
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002228 bytes += sizeof(struct node *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002229 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2230 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002231}
Robert Olsson19baf832005-06-21 12:43:18 -07002232
2233#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002234static void trie_show_usage(struct seq_file *seq,
2235 const struct trie_use_stats *stats)
2236{
2237 seq_printf(seq, "\nCounters:\n---------\n");
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002238 seq_printf(seq, "gets = %u\n", stats->gets);
2239 seq_printf(seq, "backtracks = %u\n", stats->backtrack);
2240 seq_printf(seq, "semantic match passed = %u\n",
2241 stats->semantic_match_passed);
2242 seq_printf(seq, "semantic match miss = %u\n",
2243 stats->semantic_match_miss);
2244 seq_printf(seq, "null node hit= %u\n", stats->null_node_hit);
2245 seq_printf(seq, "skipped node resize = %u\n\n",
2246 stats->resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002247}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002248#endif /* CONFIG_IP_FIB_TRIE_STATS */
2249
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002250static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002251{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002252 if (tb->tb_id == RT_TABLE_LOCAL)
2253 seq_puts(seq, "Local:\n");
2254 else if (tb->tb_id == RT_TABLE_MAIN)
2255 seq_puts(seq, "Main:\n");
2256 else
2257 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002258}
Robert Olsson19baf832005-06-21 12:43:18 -07002259
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002260
Robert Olsson19baf832005-06-21 12:43:18 -07002261static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2262{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002263 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002264 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002265
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002266 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002267 "Basic info: size of leaf:"
2268 " %Zd bytes, size of tnode: %Zd bytes.\n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002269 sizeof(struct leaf), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002270
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002271 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2272 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2273 struct hlist_node *node;
2274 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002275
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002276 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2277 struct trie *t = (struct trie *) tb->tb_data;
2278 struct trie_stat stat;
2279
2280 if (!t)
2281 continue;
2282
2283 fib_table_print(seq, tb);
2284
2285 trie_collect_stats(t, &stat);
2286 trie_show_stats(seq, &stat);
2287#ifdef CONFIG_IP_FIB_TRIE_STATS
2288 trie_show_usage(seq, &t->stats);
2289#endif
2290 }
2291 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002292
Robert Olsson19baf832005-06-21 12:43:18 -07002293 return 0;
2294}
2295
Robert Olsson19baf832005-06-21 12:43:18 -07002296static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2297{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002298 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002299}
2300
Arjan van de Ven9a321442007-02-12 00:55:35 -08002301static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002302 .owner = THIS_MODULE,
2303 .open = fib_triestat_seq_open,
2304 .read = seq_read,
2305 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002306 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002307};
2308
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002309static struct node *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002310{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002311 struct fib_trie_iter *iter = seq->private;
2312 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002313 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002314 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002315
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002316 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2317 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2318 struct hlist_node *node;
2319 struct fib_table *tb;
2320
2321 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) {
2322 struct node *n;
2323
2324 for (n = fib_trie_get_first(iter,
2325 (struct trie *) tb->tb_data);
2326 n; n = fib_trie_get_next(iter))
2327 if (pos == idx++) {
2328 iter->tb = tb;
2329 return n;
2330 }
2331 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002332 }
Robert Olsson19baf832005-06-21 12:43:18 -07002333
Robert Olsson19baf832005-06-21 12:43:18 -07002334 return NULL;
2335}
2336
2337static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002338 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002339{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002340 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002341 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002342}
2343
2344static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2345{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002346 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002347 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002348 struct fib_table *tb = iter->tb;
2349 struct hlist_node *tb_node;
2350 unsigned int h;
2351 struct node *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002352
Robert Olsson19baf832005-06-21 12:43:18 -07002353 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002354 /* next node in same table */
2355 n = fib_trie_get_next(iter);
2356 if (n)
2357 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002358
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002359 /* walk rest of this hash chain */
2360 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
2361 while ( (tb_node = rcu_dereference(tb->tb_hlist.next)) ) {
2362 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2363 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2364 if (n)
2365 goto found;
2366 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002367
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002368 /* new hash chain */
2369 while (++h < FIB_TABLE_HASHSZ) {
2370 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
2371 hlist_for_each_entry_rcu(tb, tb_node, head, tb_hlist) {
2372 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2373 if (n)
2374 goto found;
2375 }
2376 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002377 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002378
2379found:
2380 iter->tb = tb;
2381 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002382}
2383
2384static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002385 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002386{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002387 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002388}
2389
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002390static void seq_indent(struct seq_file *seq, int n)
2391{
2392 while (n-- > 0) seq_puts(seq, " ");
2393}
Robert Olsson19baf832005-06-21 12:43:18 -07002394
Eric Dumazet28d36e32008-01-14 23:09:56 -08002395static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002396{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002397 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002398 case RT_SCOPE_UNIVERSE: return "universe";
2399 case RT_SCOPE_SITE: return "site";
2400 case RT_SCOPE_LINK: return "link";
2401 case RT_SCOPE_HOST: return "host";
2402 case RT_SCOPE_NOWHERE: return "nowhere";
2403 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002404 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002405 return buf;
2406 }
2407}
2408
2409static const char *rtn_type_names[__RTN_MAX] = {
2410 [RTN_UNSPEC] = "UNSPEC",
2411 [RTN_UNICAST] = "UNICAST",
2412 [RTN_LOCAL] = "LOCAL",
2413 [RTN_BROADCAST] = "BROADCAST",
2414 [RTN_ANYCAST] = "ANYCAST",
2415 [RTN_MULTICAST] = "MULTICAST",
2416 [RTN_BLACKHOLE] = "BLACKHOLE",
2417 [RTN_UNREACHABLE] = "UNREACHABLE",
2418 [RTN_PROHIBIT] = "PROHIBIT",
2419 [RTN_THROW] = "THROW",
2420 [RTN_NAT] = "NAT",
2421 [RTN_XRESOLVE] = "XRESOLVE",
2422};
2423
Eric Dumazet28d36e32008-01-14 23:09:56 -08002424static inline const char *rtn_type(char *buf, size_t len, unsigned t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002425{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002426 if (t < __RTN_MAX && rtn_type_names[t])
2427 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002428 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002429 return buf;
2430}
2431
2432/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002433static int fib_trie_seq_show(struct seq_file *seq, void *v)
2434{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002435 const struct fib_trie_iter *iter = seq->private;
2436 struct node *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002437
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002438 if (!node_parent_rcu(n))
2439 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002440
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002441 if (IS_TNODE(n)) {
2442 struct tnode *tn = (struct tnode *) n;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -07002443 __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002444
Robert Olsson1d25cd62005-09-19 15:29:52 -07002445 seq_indent(seq, iter->depth-1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002446 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
2447 &prf, tn->pos, tn->bits, tn->full_children,
Robert Olsson1d25cd62005-09-19 15:29:52 -07002448 tn->empty_children);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002449
Olof Johansson91b9a272005-08-09 20:24:39 -07002450 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002451 struct leaf *l = (struct leaf *) n;
Stephen Hemminger13280422008-01-22 21:54:37 -08002452 struct leaf_info *li;
2453 struct hlist_node *node;
Al Viro32ab5f82006-09-26 22:21:45 -07002454 __be32 val = htonl(l->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002455
2456 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002457 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002458
Stephen Hemminger13280422008-01-22 21:54:37 -08002459 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
2460 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002461
Stephen Hemminger13280422008-01-22 21:54:37 -08002462 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2463 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002464
Stephen Hemminger13280422008-01-22 21:54:37 -08002465 seq_indent(seq, iter->depth+1);
2466 seq_printf(seq, " /%d %s %s", li->plen,
2467 rtn_scope(buf1, sizeof(buf1),
2468 fa->fa_scope),
2469 rtn_type(buf2, sizeof(buf2),
2470 fa->fa_type));
2471 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002472 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002473 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002474 }
2475 }
Robert Olsson19baf832005-06-21 12:43:18 -07002476 }
2477
2478 return 0;
2479}
2480
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002481static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002482 .start = fib_trie_seq_start,
2483 .next = fib_trie_seq_next,
2484 .stop = fib_trie_seq_stop,
2485 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002486};
2487
2488static int fib_trie_seq_open(struct inode *inode, struct file *file)
2489{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002490 return seq_open_net(inode, file, &fib_trie_seq_ops,
2491 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002492}
2493
Arjan van de Ven9a321442007-02-12 00:55:35 -08002494static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002495 .owner = THIS_MODULE,
2496 .open = fib_trie_seq_open,
2497 .read = seq_read,
2498 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002499 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002500};
2501
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002502struct fib_route_iter {
2503 struct seq_net_private p;
2504 struct trie *main_trie;
2505 loff_t pos;
2506 t_key key;
2507};
2508
2509static struct leaf *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
2510{
2511 struct leaf *l = NULL;
2512 struct trie *t = iter->main_trie;
2513
2514 /* use cache location of last found key */
2515 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2516 pos -= iter->pos;
2517 else {
2518 iter->pos = 0;
2519 l = trie_firstleaf(t);
2520 }
2521
2522 while (l && pos-- > 0) {
2523 iter->pos++;
2524 l = trie_nextleaf(l);
2525 }
2526
2527 if (l)
2528 iter->key = pos; /* remember it */
2529 else
2530 iter->pos = 0; /* forget it */
2531
2532 return l;
2533}
2534
2535static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2536 __acquires(RCU)
2537{
2538 struct fib_route_iter *iter = seq->private;
2539 struct fib_table *tb;
2540
2541 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002542 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002543 if (!tb)
2544 return NULL;
2545
2546 iter->main_trie = (struct trie *) tb->tb_data;
2547 if (*pos == 0)
2548 return SEQ_START_TOKEN;
2549 else
2550 return fib_route_get_idx(iter, *pos - 1);
2551}
2552
2553static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2554{
2555 struct fib_route_iter *iter = seq->private;
2556 struct leaf *l = v;
2557
2558 ++*pos;
2559 if (v == SEQ_START_TOKEN) {
2560 iter->pos = 0;
2561 l = trie_firstleaf(iter->main_trie);
2562 } else {
2563 iter->pos++;
2564 l = trie_nextleaf(l);
2565 }
2566
2567 if (l)
2568 iter->key = l->key;
2569 else
2570 iter->pos = 0;
2571 return l;
2572}
2573
2574static void fib_route_seq_stop(struct seq_file *seq, void *v)
2575 __releases(RCU)
2576{
2577 rcu_read_unlock();
2578}
2579
Al Viro32ab5f82006-09-26 22:21:45 -07002580static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002581{
2582 static unsigned type2flags[RTN_MAX + 1] = {
2583 [7] = RTF_REJECT, [8] = RTF_REJECT,
2584 };
2585 unsigned flags = type2flags[type];
2586
2587 if (fi && fi->fib_nh->nh_gw)
2588 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002589 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002590 flags |= RTF_HOST;
2591 flags |= RTF_UP;
2592 return flags;
2593}
2594
2595/*
2596 * This outputs /proc/net/route.
2597 * The format of the file is not supposed to be changed
2598 * and needs to be same as fib_hash output to avoid breaking
2599 * legacy utilities
2600 */
2601static int fib_route_seq_show(struct seq_file *seq, void *v)
2602{
2603 struct leaf *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002604 struct leaf_info *li;
2605 struct hlist_node *node;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002606
2607 if (v == SEQ_START_TOKEN) {
2608 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2609 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2610 "\tWindow\tIRTT");
2611 return 0;
2612 }
2613
Stephen Hemminger13280422008-01-22 21:54:37 -08002614 hlist_for_each_entry_rcu(li, node, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002615 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002616 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002617
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002618 mask = inet_make_mask(li->plen);
2619 prefix = htonl(l->key);
2620
2621 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002622 const struct fib_info *fi = fa->fa_info;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002623 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002624 int len;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002625
2626 if (fa->fa_type == RTN_BROADCAST
2627 || fa->fa_type == RTN_MULTICAST)
2628 continue;
2629
2630 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002631 seq_printf(seq,
2632 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
2633 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002634 fi->fib_dev ? fi->fib_dev->name : "*",
2635 prefix,
2636 fi->fib_nh->nh_gw, flags, 0, 0,
2637 fi->fib_priority,
2638 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002639 (fi->fib_advmss ?
2640 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002641 fi->fib_window,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002642 fi->fib_rtt >> 3, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002643 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002644 seq_printf(seq,
2645 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
2646 "%d\t%08X\t%d\t%u\t%u%n",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002647 prefix, 0, flags, 0, 0, 0,
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002648 mask, 0, 0, 0, &len);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002649
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002650 seq_printf(seq, "%*s\n", 127 - len, "");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002651 }
2652 }
2653
2654 return 0;
2655}
2656
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002657static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002658 .start = fib_route_seq_start,
2659 .next = fib_route_seq_next,
2660 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002661 .show = fib_route_seq_show,
2662};
2663
2664static int fib_route_seq_open(struct inode *inode, struct file *file)
2665{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002666 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002667 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002668}
2669
Arjan van de Ven9a321442007-02-12 00:55:35 -08002670static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002671 .owner = THIS_MODULE,
2672 .open = fib_route_seq_open,
2673 .read = seq_read,
2674 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002675 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002676};
2677
Denis V. Lunev61a02652008-01-10 03:21:09 -08002678int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002679{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002680 if (!proc_net_fops_create(net, "fib_trie", S_IRUGO, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002681 goto out1;
2682
Denis V. Lunev61a02652008-01-10 03:21:09 -08002683 if (!proc_net_fops_create(net, "fib_triestat", S_IRUGO,
2684 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002685 goto out2;
2686
Denis V. Lunev61a02652008-01-10 03:21:09 -08002687 if (!proc_net_fops_create(net, "route", S_IRUGO, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002688 goto out3;
2689
Robert Olsson19baf832005-06-21 12:43:18 -07002690 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002691
2692out3:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002693 proc_net_remove(net, "fib_triestat");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002694out2:
Denis V. Lunev61a02652008-01-10 03:21:09 -08002695 proc_net_remove(net, "fib_trie");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002696out1:
2697 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002698}
2699
Denis V. Lunev61a02652008-01-10 03:21:09 -08002700void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002701{
Denis V. Lunev61a02652008-01-10 03:21:09 -08002702 proc_net_remove(net, "fib_trie");
2703 proc_net_remove(net, "fib_triestat");
2704 proc_net_remove(net, "route");
Robert Olsson19baf832005-06-21 12:43:18 -07002705}
2706
2707#endif /* CONFIG_PROC_FS */