1 From: Alexander Duyck <alexander.h.duyck@redhat.com>
2 Date: Wed, 31 Dec 2014 10:57:08 -0800
3 Subject: [PATCH] fib_trie: Add tracking value for suffix length
5 This change adds a tracking value for the maximum suffix length of all
6 prefixes stored in any given tnode. With this value we can determine if we
7 need to backtrace or not based on if the suffix is greater than the pos
10 By doing this we can reduce the CPU overhead for lookups in the local table
11 as many of the prefixes there are 32b long and have a suffix length of 0
12 meaning we can immediately backtrace to the root node without needing to
13 test any of the nodes between it and where we ended up.
15 Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
16 Signed-off-by: David S. Miller <davem@davemloft.net>
19 --- a/net/ipv4/fib_trie.c
20 +++ b/net/ipv4/fib_trie.c
21 @@ -96,6 +96,7 @@ struct tnode {
23 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
24 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
26 struct tnode __rcu *parent;
29 @@ -311,6 +312,7 @@ static struct tnode *leaf_new(t_key key)
30 * as the nodes are searched
35 /* set bits to 0 indicating we are not a tnode */
37 @@ -342,6 +344,7 @@ static struct tnode *tnode_new(t_key key
44 tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
45 @@ -387,6 +390,9 @@ static void put_child(struct tnode *tn,
46 else if (!wasfull && isfull)
49 + if (n && (tn->slen < n->slen))
52 rcu_assign_pointer(tn->child[i], n);
55 @@ -635,6 +641,41 @@ static int halve(struct trie *t, struct
59 +static unsigned char update_suffix(struct tnode *tn)
61 + unsigned char slen = tn->pos;
62 + unsigned long stride, i;
64 + /* search though the list of children looking for nodes that might
65 + * have a suffix greater than the one we currently have. This is
66 + * why we start with a stride of 2 since a stride of 1 would
67 + * represent the nodes with suffix length equal to tn->pos
69 + for (i = 0, stride = 0x2ul ; i < tnode_child_length(tn); i += stride) {
70 + struct tnode *n = tnode_get_child(tn, i);
72 + if (!n || (n->slen <= slen))
75 + /* update stride and slen based on new value */
76 + stride <<= (n->slen - slen);
80 + /* if slen covers all but the last bit we can stop here
81 + * there will be nothing longer than that since only node
82 + * 0 and 1 << (bits - 1) could have that as their suffix
85 + if ((slen + 1) >= (tn->pos + tn->bits))
94 /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
95 * the Helsinki University of Technology and Matti Tikkanen of Nokia
96 * Telecommunications, page 6:
97 @@ -790,6 +831,19 @@ no_children:
104 + /* Return if at least one deflate was run */
105 + if (max_work != MAX_WORK)
108 + /* push the suffix length to the parent node */
109 + if (tn->slen > tn->pos) {
110 + unsigned char slen = update_suffix(tn);
112 + if (tp && (slen > tp->slen))
117 @@ -818,8 +872,58 @@ static inline struct list_head *get_fa_h
121 -static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
122 +static void leaf_pull_suffix(struct tnode *l)
124 + struct tnode *tp = node_parent(l);
126 + while (tp && (tp->slen > tp->pos) && (tp->slen > l->slen)) {
127 + if (update_suffix(tp) > l->slen)
129 + tp = node_parent(tp);
133 +static void leaf_push_suffix(struct tnode *l)
135 + struct tnode *tn = node_parent(l);
137 + /* if this is a new leaf then tn will be NULL and we can sort
138 + * out parent suffix lengths as a part of trie_rebalance
140 + while (tn && (tn->slen < l->slen)) {
141 + tn->slen = l->slen;
142 + tn = node_parent(tn);
146 +static void remove_leaf_info(struct tnode *l, struct leaf_info *old)
148 + struct hlist_node *prev;
150 + /* record the location of the pointer to this object */
151 + prev = rtnl_dereference(hlist_pprev_rcu(&old->hlist));
153 + /* remove the leaf info from the list */
154 + hlist_del_rcu(&old->hlist);
156 + /* if we emptied the list this leaf will be freed and we can sort
157 + * out parent suffix lengths as a part of trie_rebalance
159 + if (hlist_empty(&l->list))
162 + /* if we removed the tail then we need to update slen */
163 + if (!rcu_access_pointer(hlist_next_rcu(prev))) {
164 + struct leaf_info *li = hlist_entry(prev, typeof(*li), hlist);
166 + l->slen = KEYLENGTH - li->plen;
167 + leaf_pull_suffix(l);
171 +static void insert_leaf_info(struct tnode *l, struct leaf_info *new)
173 + struct hlist_head *head = &l->list;
174 struct leaf_info *li = NULL, *last = NULL;
176 if (hlist_empty(head)) {
177 @@ -836,6 +940,12 @@ static void insert_leaf_info(struct hlis
179 hlist_add_before_rcu(&new->hlist, &li->hlist);
182 + /* if we added to the tail node then we need to update slen */
183 + if (!rcu_access_pointer(hlist_next_rcu(&new->hlist))) {
184 + l->slen = KEYLENGTH - new->plen;
185 + leaf_push_suffix(l);
189 /* rcu_read_lock needs to be hold by caller from readside */
190 @@ -925,7 +1035,7 @@ static struct list_head *fib_insert_node
191 /* we have found a leaf. Prefixes have already been compared */
193 /* Case 1: n is a leaf, and prefixes match*/
194 - insert_leaf_info(&n->list, li);
195 + insert_leaf_info(n, li);
199 @@ -939,7 +1049,7 @@ static struct list_head *fib_insert_node
203 - insert_leaf_info(&l->list, li);
204 + insert_leaf_info(l, li);
206 /* Case 2: n is a LEAF or a TNODE and the key doesn't match.
208 @@ -1206,7 +1316,7 @@ int fib_table_lookup(struct fib_table *t
209 /* only record pn and cindex if we are going to be chopping
210 * bits later. Otherwise we are just wasting cycles.
213 + if (n->slen > n->pos) {
217 @@ -1225,7 +1335,7 @@ int fib_table_lookup(struct fib_table *t
218 * between the key and the prefix exist in the region of
219 * the lsb and higher in the prefix.
221 - if (unlikely(prefix_mismatch(key, n)))
222 + if (unlikely(prefix_mismatch(key, n)) || (n->slen == n->pos))
225 /* exit out and process leaf */
226 @@ -1425,7 +1535,7 @@ int fib_table_delete(struct fib_table *t
227 tb->tb_num_default--;
229 if (list_empty(fa_head)) {
230 - hlist_del_rcu(&li->hlist);
231 + remove_leaf_info(l, li);