1 From: Alexander Duyck <alexander.h.duyck@redhat.com>
2 Date: Wed, 31 Dec 2014 10:55:35 -0800
3 Subject: [PATCH] fib_trie: Make leaf and tnode more uniform
5 This change makes some fundamental changes to the way leaves and tnodes are
6 constructed. The big differences are:
7 1. Leaves now populate pos and bits indicating their full key size.
8 2. Trie nodes now mask out their lower bits to be consistent with the leaf
9 3. Both structures have been reordered so that rt_trie_node now consisists
10 of a much larger region including the pos, bits, and rcu portions of
13 On 32b systems this will result in the leaf being 4B larger as the pos and
14 bits values were added to a hole created by the key as it was only 4B in
17 Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
18 Signed-off-by: David S. Miller <davem@davemloft.net>
21 --- a/net/ipv4/fib_trie.c
22 +++ b/net/ipv4/fib_trie.c
25 typedef unsigned int t_key;
29 -#define NODE_TYPE_MASK 0x1UL
30 -#define NODE_TYPE(node) ((node)->parent & NODE_TYPE_MASK)
31 +#define IS_TNODE(n) ((n)->bits)
32 +#define IS_LEAF(n) (!(n)->bits)
34 -#define IS_TNODE(n) (!(n->parent & T_LEAF))
35 -#define IS_LEAF(n) (n->parent & T_LEAF)
38 + unsigned char bits; /* 2log(KEYLENGTH) bits needed */
39 + unsigned char pos; /* 2log(KEYLENGTH) bits needed */
40 + struct tnode __rcu *parent;
42 + struct rcu_head rcu;
43 + struct tnode *tnode_free;
45 + unsigned int full_children; /* KEYLENGTH bits needed */
46 + unsigned int empty_children; /* KEYLENGTH bits needed */
47 + struct rt_trie_node __rcu *child[0];
51 - unsigned long parent;
55 + struct tnode __rcu *parent;
56 + struct rcu_head rcu;
60 - unsigned long parent;
62 - struct hlist_head list;
65 + struct tnode __rcu *parent;
67 + struct hlist_head list;
71 @@ -115,20 +129,6 @@ struct leaf_info {
76 - unsigned long parent;
78 - unsigned char pos; /* 2log(KEYLENGTH) bits needed */
79 - unsigned char bits; /* 2log(KEYLENGTH) bits needed */
80 - unsigned int full_children; /* KEYLENGTH bits needed */
81 - unsigned int empty_children; /* KEYLENGTH bits needed */
83 - struct rcu_head rcu;
84 - struct tnode *tnode_free;
86 - struct rt_trie_node __rcu *child[0];
89 #ifdef CONFIG_IP_FIB_TRIE_STATS
90 struct trie_use_stats {
92 @@ -176,38 +176,27 @@ static const int sync_pages = 128;
93 static struct kmem_cache *fn_alias_kmem __read_mostly;
94 static struct kmem_cache *trie_leaf_kmem __read_mostly;
97 - * caller must hold RTNL
99 -static inline struct tnode *node_parent(const struct rt_trie_node *node)
101 - unsigned long parent;
102 +/* caller must hold RTNL */
103 +#define node_parent(n) rtnl_dereference((n)->parent)
105 - parent = rcu_dereference_index_check(node->parent, lockdep_rtnl_is_held());
106 +/* caller must hold RCU read lock or RTNL */
107 +#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
109 - return (struct tnode *)(parent & ~NODE_TYPE_MASK);
113 - * caller must hold RCU read lock or RTNL
115 -static inline struct tnode *node_parent_rcu(const struct rt_trie_node *node)
116 +/* wrapper for rcu_assign_pointer */
117 +static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
119 - unsigned long parent;
121 - parent = rcu_dereference_index_check(node->parent, rcu_read_lock_held() ||
122 - lockdep_rtnl_is_held());
124 - return (struct tnode *)(parent & ~NODE_TYPE_MASK);
126 + rcu_assign_pointer(node->parent, ptr);
129 -/* Same as rcu_assign_pointer
130 - * but that macro() assumes that value is a pointer.
131 +#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
133 +/* This provides us with the number of children in this node, in the case of a
134 + * leaf this will return 0 meaning none of the children are accessible.
136 -static inline void node_set_parent(struct rt_trie_node *node, struct tnode *ptr)
137 +static inline int tnode_child_length(const struct tnode *tn)
140 - node->parent = (unsigned long)ptr | NODE_TYPE(node);
141 + return (1ul << tn->bits) & ~(1ul);
145 @@ -215,7 +204,7 @@ static inline void node_set_parent(struc
147 static inline struct rt_trie_node *tnode_get_child(const struct tnode *tn, unsigned int i)
149 - BUG_ON(i >= 1U << tn->bits);
150 + BUG_ON(i >= tnode_child_length(tn));
152 return rtnl_dereference(tn->child[i]);
154 @@ -225,16 +214,11 @@ static inline struct rt_trie_node *tnode
156 static inline struct rt_trie_node *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
158 - BUG_ON(i >= 1U << tn->bits);
159 + BUG_ON(i >= tnode_child_length(tn));
161 return rcu_dereference_rtnl(tn->child[i]);
164 -static inline int tnode_child_length(const struct tnode *tn)
166 - return 1 << tn->bits;
169 static inline t_key mask_pfx(t_key k, unsigned int l)
171 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
172 @@ -336,11 +320,6 @@ static inline int tkey_mismatch(t_key a,
176 -static inline void check_tnode(const struct tnode *tn)
178 - WARN_ON(tn && tn->pos+tn->bits > 32);
181 static const int halve_threshold = 25;
182 static const int inflate_threshold = 50;
183 static const int halve_threshold_root = 15;
184 @@ -426,11 +405,20 @@ static void tnode_free_flush(void)
188 -static struct leaf *leaf_new(void)
189 +static struct leaf *leaf_new(t_key key)
191 struct leaf *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
193 - l->parent = T_LEAF;
195 + /* set key and pos to reflect full key value
196 + * any trailing zeros in the key should be ignored
197 + * as the nodes are searched
200 + l->pos = KEYLENGTH;
201 + /* set bits to 0 indicating we are not a tnode */
204 INIT_HLIST_HEAD(&l->list);
207 @@ -451,12 +439,16 @@ static struct tnode *tnode_new(t_key key
209 size_t sz = sizeof(struct tnode) + (sizeof(struct rt_trie_node *) << bits);
210 struct tnode *tn = tnode_alloc(sz);
211 + unsigned int shift = pos + bits;
213 + /* verify bits and pos their msb bits clear and values are valid */
214 + BUG_ON(!bits || (shift > KEYLENGTH));
217 - tn->parent = T_TNODE;
222 + tn->key = mask_pfx(key, pos);
223 tn->full_children = 0;
224 tn->empty_children = 1<<bits;
226 @@ -473,10 +465,7 @@ static struct tnode *tnode_new(t_key key
228 static inline int tnode_full(const struct tnode *tn, const struct rt_trie_node *n)
230 - if (n == NULL || IS_LEAF(n))
233 - return ((struct tnode *) n)->pos == tn->pos + tn->bits;
234 + return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
237 static inline void put_child(struct tnode *tn, int i,
238 @@ -514,8 +503,7 @@ static void tnode_put_child_reorg(struct
239 else if (!wasfull && isfull)
243 - node_set_parent(n, tn);
244 + node_set_parent(n, tn);
246 rcu_assign_pointer(tn->child[i], n);
248 @@ -523,7 +511,7 @@ static void tnode_put_child_reorg(struct
250 static struct rt_trie_node *resize(struct trie *t, struct tnode *tn)
253 + struct rt_trie_node *n = NULL;
254 struct tnode *old_tn;
255 int inflate_threshold_use;
256 int halve_threshold_use;
257 @@ -536,12 +524,11 @@ static struct rt_trie_node *resize(struc
258 tn, inflate_threshold, halve_threshold);
261 - if (tn->empty_children == tnode_child_length(tn)) {
262 - tnode_free_safe(tn);
265 + if (tn->empty_children > (tnode_child_length(tn) - 1))
269 - if (tn->empty_children == tnode_child_length(tn) - 1)
270 + if (tn->empty_children == (tnode_child_length(tn) - 1))
273 * Double as long as the resulting node has a number of
274 @@ -607,11 +594,9 @@ static struct rt_trie_node *resize(struc
280 /* Keep root node larger */
282 - if (!node_parent((struct rt_trie_node *)tn)) {
283 + if (!node_parent(tn)) {
284 inflate_threshold_use = inflate_threshold_root;
285 halve_threshold_use = halve_threshold_root;
287 @@ -637,8 +622,6 @@ static struct rt_trie_node *resize(struc
293 /* Return if at least one inflate is run */
294 if (max_work != MAX_WORK)
295 return (struct rt_trie_node *) tn;
296 @@ -666,21 +649,16 @@ static struct rt_trie_node *resize(struc
299 /* Only one child remains */
300 - if (tn->empty_children == tnode_child_length(tn) - 1) {
301 + if (tn->empty_children == (tnode_child_length(tn) - 1)) {
304 - for (i = 0; i < tnode_child_length(tn); i++) {
305 - struct rt_trie_node *n;
307 - n = rtnl_dereference(tn->child[i]);
311 - /* compress one level */
313 - node_set_parent(n, NULL);
314 - tnode_free_safe(tn);
317 + for (i = tnode_child_length(tn); !n && i;)
318 + n = tnode_get_child(tn, --i);
320 + /* compress one level */
321 + node_set_parent(n, NULL);
322 + tnode_free_safe(tn);
325 return (struct rt_trie_node *) tn;
327 @@ -760,8 +738,7 @@ static struct tnode *inflate(struct trie
329 /* A leaf or an internal node with skipped bits */
331 - if (IS_LEAF(node) || ((struct tnode *) node)->pos >
332 - tn->pos + tn->bits - 1) {
333 + if (IS_LEAF(node) || (node->pos > (tn->pos + tn->bits - 1))) {
335 tkey_extract_bits(node->key, oldtnode->pos, oldtnode->bits + 1),
337 @@ -958,11 +935,9 @@ fib_find_node(struct trie *t, u32 key)
339 n = rcu_dereference_rtnl(t->trie);
341 - while (n != NULL && NODE_TYPE(n) == T_TNODE) {
342 + while (n && IS_TNODE(n)) {
343 tn = (struct tnode *) n;
347 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
348 pos = tn->pos + tn->bits;
349 n = tnode_get_child_rcu(tn,
350 @@ -988,7 +963,7 @@ static void trie_rebalance(struct trie *
354 - while (tn != NULL && (tp = node_parent((struct rt_trie_node *)tn)) != NULL) {
355 + while (tn != NULL && (tp = node_parent(tn)) != NULL) {
356 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
357 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
358 tn = (struct tnode *)resize(t, tn);
359 @@ -996,7 +971,7 @@ static void trie_rebalance(struct trie *
360 tnode_put_child_reorg(tp, cindex,
361 (struct rt_trie_node *)tn, wasfull);
363 - tp = node_parent((struct rt_trie_node *) tn);
364 + tp = node_parent(tn);
366 rcu_assign_pointer(t->trie, (struct rt_trie_node *)tn);
368 @@ -1048,11 +1023,9 @@ static struct list_head *fib_insert_node
369 * If it doesn't, we need to replace it with a T_TNODE.
372 - while (n != NULL && NODE_TYPE(n) == T_TNODE) {
373 + while (n && IS_TNODE(n)) {
374 tn = (struct tnode *) n;
378 if (tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
380 pos = tn->pos + tn->bits;
381 @@ -1087,12 +1060,11 @@ static struct list_head *fib_insert_node
382 insert_leaf_info(&l->list, li);
392 li = leaf_info_new(plen);
395 @@ -1569,7 +1541,7 @@ backtrace:
396 if (chopped_off <= pn->bits) {
397 cindex &= ~(1 << (chopped_off-1));
399 - struct tnode *parent = node_parent_rcu((struct rt_trie_node *) pn);
400 + struct tnode *parent = node_parent_rcu(pn);
404 @@ -1597,7 +1569,7 @@ EXPORT_SYMBOL_GPL(fib_table_lookup);
406 static void trie_leaf_remove(struct trie *t, struct leaf *l)
408 - struct tnode *tp = node_parent((struct rt_trie_node *) l);
409 + struct tnode *tp = node_parent(l);
411 pr_debug("entering trie_leaf_remove(%p)\n", l);
413 @@ -2374,7 +2346,7 @@ static int fib_trie_seq_show(struct seq_
416 struct tnode *tn = (struct tnode *) n;
417 - __be32 prf = htonl(mask_pfx(tn->key, tn->pos));
418 + __be32 prf = htonl(tn->key);
420 seq_indent(seq, iter->depth-1);
421 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",