kernel: update kernel 3.18 to version 3.18.23
[openwrt/staging/chunkeey.git] / target / linux / generic / patches-3.18 / 080-09-fib_trie-Update-meaning-of-pos-to-represent-unchecke.patch
1 From: Alexander Duyck <alexander.h.duyck@redhat.com>
2 Date: Wed, 31 Dec 2014 10:56:12 -0800
3 Subject: [PATCH] fib_trie: Update meaning of pos to represent unchecked
4 bits
5
6 This change moves the pos value to the other side of the "bits" field. By
7 doing this it actually simplifies a significant amount of code in the trie.
8
9 For example when halving a tree we know that the bit lost exists at
10 oldnode->pos, and if we inflate the tree the new bit being add is at
11 tn->pos. Previously to find those bits you would have to subtract pos and
12 bits from the keylength or start with a value of (1 << 31) and then shift
13 that.
14
15 There are a number of spots throughout the code that benefit from this. In
16 the case of the hot-path searches the main advantage is that we can drop 2
17 or more operations from the search path as we no longer need to compute the
18 value for the index to be shifted by and can instead just use the raw pos
19 value.
20
21 In addition the tkey_extract_bits is now defunct and can be replaced by
22 get_index since the two operations were doing the same thing, but now
23 get_index does it much more quickly as it is only an xor and shift versus a
24 pair of shifts and a subtraction.
25
26 Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
27 Signed-off-by: David S. Miller <davem@davemloft.net>
28 ---
29
30 --- a/net/ipv4/fib_trie.c
31 +++ b/net/ipv4/fib_trie.c
32 @@ -90,8 +90,7 @@ typedef unsigned int t_key;
33 #define IS_TNODE(n) ((n)->bits)
34 #define IS_LEAF(n) (!(n)->bits)
35
36 -#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
37 -#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
38 +#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> (_kv)->pos)
39
40 struct tnode {
41 t_key key;
42 @@ -209,81 +208,64 @@ static inline struct tnode *tnode_get_ch
43 return rcu_dereference_rtnl(tn->child[i]);
44 }
45
46 -static inline t_key mask_pfx(t_key k, unsigned int l)
47 -{
48 - return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
49 -}
50 -
51 -static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
52 -{
53 - if (offset < KEYLENGTH)
54 - return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
55 - else
56 - return 0;
57 -}
58 -
59 -/*
60 - To understand this stuff, an understanding of keys and all their bits is
61 - necessary. Every node in the trie has a key associated with it, but not
62 - all of the bits in that key are significant.
63 -
64 - Consider a node 'n' and its parent 'tp'.
65 -
66 - If n is a leaf, every bit in its key is significant. Its presence is
67 - necessitated by path compression, since during a tree traversal (when
68 - searching for a leaf - unless we are doing an insertion) we will completely
69 - ignore all skipped bits we encounter. Thus we need to verify, at the end of
70 - a potentially successful search, that we have indeed been walking the
71 - correct key path.
72 -
73 - Note that we can never "miss" the correct key in the tree if present by
74 - following the wrong path. Path compression ensures that segments of the key
75 - that are the same for all keys with a given prefix are skipped, but the
76 - skipped part *is* identical for each node in the subtrie below the skipped
77 - bit! trie_insert() in this implementation takes care of that - note the
78 - call to tkey_sub_equals() in trie_insert().
79 -
80 - if n is an internal node - a 'tnode' here, the various parts of its key
81 - have many different meanings.
82 -
83 - Example:
84 - _________________________________________________________________
85 - | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
86 - -----------------------------------------------------------------
87 - 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
88 -
89 - _________________________________________________________________
90 - | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
91 - -----------------------------------------------------------------
92 - 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
93 -
94 - tp->pos = 7
95 - tp->bits = 3
96 - n->pos = 15
97 - n->bits = 4
98 -
99 - First, let's just ignore the bits that come before the parent tp, that is
100 - the bits from 0 to (tp->pos-1). They are *known* but at this point we do
101 - not use them for anything.
102 -
103 - The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
104 - index into the parent's child array. That is, they will be used to find
105 - 'n' among tp's children.
106 -
107 - The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
108 - for the node n.
109 -
110 - All the bits we have seen so far are significant to the node n. The rest
111 - of the bits are really not needed or indeed known in n->key.
112 -
113 - The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
114 - n's child array, and will of course be different for each child.
115 -
116 -
117 - The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
118 - at this point.
119 -
120 -*/
121 +/* To understand this stuff, an understanding of keys and all their bits is
122 + * necessary. Every node in the trie has a key associated with it, but not
123 + * all of the bits in that key are significant.
124 + *
125 + * Consider a node 'n' and its parent 'tp'.
126 + *
127 + * If n is a leaf, every bit in its key is significant. Its presence is
128 + * necessitated by path compression, since during a tree traversal (when
129 + * searching for a leaf - unless we are doing an insertion) we will completely
130 + * ignore all skipped bits we encounter. Thus we need to verify, at the end of
131 + * a potentially successful search, that we have indeed been walking the
132 + * correct key path.
133 + *
134 + * Note that we can never "miss" the correct key in the tree if present by
135 + * following the wrong path. Path compression ensures that segments of the key
136 + * that are the same for all keys with a given prefix are skipped, but the
137 + * skipped part *is* identical for each node in the subtrie below the skipped
138 + * bit! trie_insert() in this implementation takes care of that.
139 + *
140 + * if n is an internal node - a 'tnode' here, the various parts of its key
141 + * have many different meanings.
142 + *
143 + * Example:
144 + * _________________________________________________________________
145 + * | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
146 + * -----------------------------------------------------------------
147 + * 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16
148 + *
149 + * _________________________________________________________________
150 + * | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
151 + * -----------------------------------------------------------------
152 + * 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
153 + *
154 + * tp->pos = 22
155 + * tp->bits = 3
156 + * n->pos = 13
157 + * n->bits = 4
158 + *
159 + * First, let's just ignore the bits that come before the parent tp, that is
160 + * the bits from (tp->pos + tp->bits) to 31. They are *known* but at this
161 + * point we do not use them for anything.
162 + *
163 + * The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
164 + * index into the parent's child array. That is, they will be used to find
165 + * 'n' among tp's children.
166 + *
167 + * The bits from (n->pos + n->bits) to (tn->pos - 1) - "S" - are skipped bits
168 + * for the node n.
169 + *
170 + * All the bits we have seen so far are significant to the node n. The rest
171 + * of the bits are really not needed or indeed known in n->key.
172 + *
173 + * The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
174 + * n's child array, and will of course be different for each child.
175 + *
176 + * The rest of the bits, from 0 to (n->pos + n->bits), are completely unknown
177 + * at this point.
178 + */
179
180 static const int halve_threshold = 25;
181 static const int inflate_threshold = 50;
182 @@ -367,7 +349,7 @@ static struct tnode *leaf_new(t_key key)
183 * as the nodes are searched
184 */
185 l->key = key;
186 - l->pos = KEYLENGTH;
187 + l->pos = 0;
188 /* set bits to 0 indicating we are not a tnode */
189 l->bits = 0;
190
191 @@ -400,7 +382,7 @@ static struct tnode *tnode_new(t_key key
192 tn->parent = NULL;
193 tn->pos = pos;
194 tn->bits = bits;
195 - tn->key = mask_pfx(key, pos);
196 + tn->key = (shift < KEYLENGTH) ? (key >> shift) << shift : 0;
197 tn->full_children = 0;
198 tn->empty_children = 1<<bits;
199 }
200 @@ -410,14 +392,12 @@ static struct tnode *tnode_new(t_key key
201 return tn;
202 }
203
204 -/*
205 - * Check whether a tnode 'n' is "full", i.e. it is an internal node
206 +/* Check whether a tnode 'n' is "full", i.e. it is an internal node
207 * and no bits are skipped. See discussion in dyntree paper p. 6
208 */
209 -
210 static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
211 {
212 - return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
213 + return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
214 }
215
216 static inline void put_child(struct tnode *tn, int i,
217 @@ -641,11 +621,12 @@ static struct tnode *inflate(struct trie
218 {
219 int olen = tnode_child_length(oldtnode);
220 struct tnode *tn;
221 + t_key m;
222 int i;
223
224 pr_debug("In inflate\n");
225
226 - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
227 + tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
228
229 if (!tn)
230 return ERR_PTR(-ENOMEM);
231 @@ -656,21 +637,18 @@ static struct tnode *inflate(struct trie
232 * fails. In case of failure we return the oldnode and inflate
233 * of tnode is ignored.
234 */
235 + for (i = 0, m = 1u << tn->pos; i < olen; i++) {
236 + struct tnode *inode = tnode_get_child(oldtnode, i);
237
238 - for (i = 0; i < olen; i++) {
239 - struct tnode *inode;
240 -
241 - inode = tnode_get_child(oldtnode, i);
242 - if (tnode_full(oldtnode, inode) && inode->bits > 1) {
243 + if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
244 struct tnode *left, *right;
245 - t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
246
247 - left = tnode_new(inode->key&(~m), inode->pos + 1,
248 + left = tnode_new(inode->key & ~m, inode->pos,
249 inode->bits - 1);
250 if (!left)
251 goto nomem;
252
253 - right = tnode_new(inode->key|m, inode->pos + 1,
254 + right = tnode_new(inode->key | m, inode->pos,
255 inode->bits - 1);
256
257 if (!right) {
258 @@ -694,9 +672,7 @@ static struct tnode *inflate(struct trie
259
260 /* A leaf or an internal node with skipped bits */
261 if (!tnode_full(oldtnode, inode)) {
262 - put_child(tn,
263 - tkey_extract_bits(inode->key, tn->pos, tn->bits),
264 - inode);
265 + put_child(tn, get_index(inode->key, tn), inode);
266 continue;
267 }
268
269 @@ -767,7 +743,7 @@ static struct tnode *halve(struct trie *
270
271 pr_debug("In halve\n");
272
273 - tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
274 + tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
275
276 if (!tn)
277 return ERR_PTR(-ENOMEM);
278 @@ -787,7 +763,7 @@ static struct tnode *halve(struct trie *
279 if (left && right) {
280 struct tnode *newn;
281
282 - newn = tnode_new(left->key, tn->pos + tn->bits, 1);
283 + newn = tnode_new(left->key, oldtnode->pos, 1);
284
285 if (!newn)
286 goto nomem;
287 @@ -915,7 +891,7 @@ static void trie_rebalance(struct trie *
288 key = tn->key;
289
290 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
291 - cindex = tkey_extract_bits(key, tp->pos, tp->bits);
292 + cindex = get_index(key, tp);
293 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
294 tn = resize(t, tn);
295
296 @@ -1005,11 +981,8 @@ static struct list_head *fib_insert_node
297 */
298 if (n) {
299 struct tnode *tn;
300 - int newpos;
301 -
302 - newpos = KEYLENGTH - __fls(n->key ^ key) - 1;
303
304 - tn = tnode_new(key, newpos, 1);
305 + tn = tnode_new(key, __fls(key ^ n->key), 1);
306 if (!tn) {
307 free_leaf_info(li);
308 node_free(l);
309 @@ -1559,12 +1532,7 @@ static int trie_flush_leaf(struct tnode
310 static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
311 {
312 do {
313 - t_key idx;
314 -
315 - if (c)
316 - idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
317 - else
318 - idx = 0;
319 + t_key idx = c ? idx = get_index(c->key, p) + 1 : 0;
320
321 while (idx < 1u << p->bits) {
322 c = tnode_get_child_rcu(p, idx++);
323 @@ -1851,7 +1819,7 @@ rescan:
324 /* Current node exhausted, pop back up */
325 p = node_parent_rcu(tn);
326 if (p) {
327 - cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
328 + cindex = get_index(tn->key, p) + 1;
329 tn = p;
330 --iter->depth;
331 goto rescan;
332 @@ -2186,10 +2154,10 @@ static int fib_trie_seq_show(struct seq_
333 if (IS_TNODE(n)) {
334 __be32 prf = htonl(n->key);
335
336 - seq_indent(seq, iter->depth - 1);
337 - seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
338 - &prf, n->pos, n->bits, n->full_children,
339 - n->empty_children);
340 + seq_indent(seq, iter->depth-1);
341 + seq_printf(seq, " +-- %pI4/%zu %u %u %u\n",
342 + &prf, KEYLENGTH - n->pos - n->bits, n->bits,
343 + n->full_children, n->empty_children);
344 } else {
345 struct leaf_info *li;
346 __be32 val = htonl(n->key);