1 From: Alexander Duyck <alexander.h.duyck@redhat.com>
2 Date: Wed, 31 Dec 2014 10:56:55 -0800
3 Subject: [PATCH] fib_trie: inflate/halve nodes in a more RCU friendly
6 This change pulls the node_set_parent functionality out of put_child_reorg
7 and instead leaves that to the function to take care of as well. By doing
8 this we can fully construct the new cluster of tnodes and all of the
9 pointers out of it before we start routing pointers into it.
11 I am suspecting this will likely fix some concurency issues though I don't
12 have a good test to show as such.
14 Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
15 Signed-off-by: David S. Miller <davem@davemloft.net>
18 --- a/net/ipv4/fib_trie.c
19 +++ b/net/ipv4/fib_trie.c
20 @@ -391,8 +391,6 @@ static void put_child(struct tnode *tn,
21 else if (!wasfull && isfull)
24 - node_set_parent(n, tn);
26 rcu_assign_pointer(tn->child[i], n);
29 @@ -436,10 +434,8 @@ static void tnode_free(struct tnode *tn)
31 static int inflate(struct trie *t, struct tnode *oldtnode)
33 - unsigned long olen = tnode_child_length(oldtnode);
34 - struct tnode *tp = node_parent(oldtnode);
37 + struct tnode *inode, *node0, *node1, *tn, *tp;
38 + unsigned long i, j, k;
41 pr_debug("In inflate\n");
42 @@ -448,43 +444,13 @@ static int inflate(struct trie *t, struc
47 - * Preallocate and store tnodes before the actual work so we
48 - * don't get into an inconsistent state if memory allocation
49 - * fails. In case of failure we return the oldnode and inflate
50 - * of tnode is ignored.
51 + /* Assemble all of the pointers in our cluster, in this case that
52 + * represents all of the pointers out of our allocated nodes that
53 + * point to existing tnodes and the links between our allocated
56 - for (i = 0, m = 1u << tn->pos; i < olen; i++) {
57 - struct tnode *inode = tnode_get_child(oldtnode, i);
59 - if (tnode_full(oldtnode, inode) && (inode->bits > 1)) {
60 - struct tnode *left, *right;
62 - left = tnode_new(inode->key & ~m, inode->pos,
66 - tnode_free_append(tn, left);
68 - right = tnode_new(inode->key | m, inode->pos,
73 - tnode_free_append(tn, right);
75 - put_child(tn, 2*i, left);
76 - put_child(tn, 2*i+1, right);
80 - /* prepare oldtnode to be freed */
81 - tnode_free_init(oldtnode);
83 - for (i = 0; i < olen; i++) {
84 - struct tnode *inode = tnode_get_child(oldtnode, i);
85 - struct tnode *left, *right;
86 - unsigned long size, j;
87 + for (i = tnode_child_length(oldtnode), m = 1u << tn->pos; i;) {
88 + inode = tnode_get_child(oldtnode, --i);
92 @@ -496,65 +462,99 @@ static int inflate(struct trie *t, struc
96 - /* drop the node in the old tnode free list */
97 - tnode_free_append(oldtnode, inode);
99 /* An internal node with two children */
100 if (inode->bits == 1) {
101 - put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
102 - put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
103 + put_child(tn, 2 * i + 1, tnode_get_child(inode, 1));
104 + put_child(tn, 2 * i, tnode_get_child(inode, 0));
108 - /* An internal node with more than two children */
110 /* We will replace this node 'inode' with two new
111 - * ones, 'left' and 'right', each with half of the
112 + * ones, 'node0' and 'node1', each with half of the
113 * original children. The two new nodes will have
114 * a position one bit further down the key and this
115 * means that the "significant" part of their keys
116 * (see the discussion near the top of this file)
117 * will differ by one bit, which will be "0" in
118 - * left's key and "1" in right's key. Since we are
119 + * node0's key and "1" in node1's key. Since we are
120 * moving the key position by one step, the bit that
121 * we are moving away from - the bit at position
122 - * (inode->pos) - is the one that will differ between
123 - * left and right. So... we synthesize that bit in the
125 - * The mask 'm' below will be a single "one" bit at
126 - * the position (inode->pos)
127 + * (tn->pos) - is the one that will differ between
128 + * node0 and node1. So... we synthesize that bit in the
131 + node1 = tnode_new(inode->key | m, inode->pos, inode->bits - 1);
134 + tnode_free_append(tn, node1);
136 + node0 = tnode_new(inode->key & ~m, inode->pos, inode->bits - 1);
139 + tnode_free_append(tn, node0);
141 + /* populate child pointers in new nodes */
142 + for (k = tnode_child_length(inode), j = k / 2; j;) {
143 + put_child(node1, --j, tnode_get_child(inode, --k));
144 + put_child(node0, j, tnode_get_child(inode, j));
145 + put_child(node1, --j, tnode_get_child(inode, --k));
146 + put_child(node0, j, tnode_get_child(inode, j));
149 + /* link new nodes to parent */
150 + NODE_INIT_PARENT(node1, tn);
151 + NODE_INIT_PARENT(node0, tn);
153 + /* link parent to nodes */
154 + put_child(tn, 2 * i + 1, node1);
155 + put_child(tn, 2 * i, node0);
158 + /* setup the parent pointer into and out of this node */
159 + tp = node_parent(oldtnode);
160 + NODE_INIT_PARENT(tn, tp);
161 + put_child_root(tp, t, tn->key, tn);
163 - /* Use the old key, but set the new significant
166 + /* prepare oldtnode to be freed */
167 + tnode_free_init(oldtnode);
169 - left = tnode_get_child(tn, 2*i);
170 - put_child(tn, 2*i, NULL);
171 + /* update all child nodes parent pointers to route to us */
172 + for (i = tnode_child_length(oldtnode); i;) {
173 + inode = tnode_get_child(oldtnode, --i);
176 + /* A leaf or an internal node with skipped bits */
177 + if (!tnode_full(oldtnode, inode)) {
178 + node_set_parent(inode, tn);
182 - right = tnode_get_child(tn, 2*i+1);
183 - put_child(tn, 2*i+1, NULL);
184 + /* drop the node in the old tnode free list */
185 + tnode_free_append(oldtnode, inode);
188 + /* fetch new nodes */
189 + node1 = tnode_get_child(tn, 2 * i + 1);
190 + node0 = tnode_get_child(tn, 2 * i);
192 - size = tnode_child_length(left);
193 - for (j = 0; j < size; j++) {
194 - put_child(left, j, rtnl_dereference(inode->child[j]));
195 - put_child(right, j, rtnl_dereference(inode->child[j + size]));
196 + /* bits == 1 then node0 and node1 represent inode's children */
197 + if (inode->bits == 1) {
198 + node_set_parent(node1, tn);
199 + node_set_parent(node0, tn);
203 - put_child(tn, 2 * i, left);
204 - put_child(tn, 2 * i + 1, right);
205 + /* update parent pointers in child node's children */
206 + for (k = tnode_child_length(inode), j = k / 2; j;) {
207 + node_set_parent(tnode_get_child(inode, --k), node1);
208 + node_set_parent(tnode_get_child(inode, --j), node0);
209 + node_set_parent(tnode_get_child(inode, --k), node1);
210 + node_set_parent(tnode_get_child(inode, --j), node0);
213 /* resize child nodes */
220 - put_child_root(tp, t, tn->key, tn);
222 /* we completed without error, prepare to free old node */
223 tnode_free(oldtnode);
225 @@ -566,10 +566,8 @@ nomem:
227 static int halve(struct trie *t, struct tnode *oldtnode)
229 - unsigned long olen = tnode_child_length(oldtnode);
230 - struct tnode *tp = node_parent(oldtnode);
231 - struct tnode *tn, *left, *right;
233 + struct tnode *tn, *tp, *inode, *node0, *node1;
236 pr_debug("In halve\n");
238 @@ -577,68 +575,64 @@ static int halve(struct trie *t, struct
243 - * Preallocate and store tnodes before the actual work so we
244 - * don't get into an inconsistent state if memory allocation
245 - * fails. In case of failure we return the oldnode and halve
246 - * of tnode is ignored.
247 + /* Assemble all of the pointers in our cluster, in this case that
248 + * represents all of the pointers out of our allocated nodes that
249 + * point to existing tnodes and the links between our allocated
252 + for (i = tnode_child_length(oldtnode); i;) {
253 + node1 = tnode_get_child(oldtnode, --i);
254 + node0 = tnode_get_child(oldtnode, --i);
256 - for (i = 0; i < olen; i += 2) {
257 - left = tnode_get_child(oldtnode, i);
258 - right = tnode_get_child(oldtnode, i+1);
259 + /* At least one of the children is empty */
260 + if (!node1 || !node0) {
261 + put_child(tn, i / 2, node1 ? : node0);
265 /* Two nonempty children */
266 - if (left && right) {
267 - struct tnode *newn;
269 - newn = tnode_new(left->key, oldtnode->pos, 1);
274 - tnode_free_append(tn, newn);
276 - put_child(tn, i/2, newn);
277 + inode = tnode_new(node0->key, oldtnode->pos, 1);
282 + tnode_free_append(tn, inode);
284 + /* initialize pointers out of node */
285 + put_child(inode, 1, node1);
286 + put_child(inode, 0, node0);
287 + NODE_INIT_PARENT(inode, tn);
289 + /* link parent to node */
290 + put_child(tn, i / 2, inode);
293 + /* setup the parent pointer out of and back into this node */
294 + tp = node_parent(oldtnode);
295 + NODE_INIT_PARENT(tn, tp);
296 + put_child_root(tp, t, tn->key, tn);
298 /* prepare oldtnode to be freed */
299 tnode_free_init(oldtnode);
301 - for (i = 0; i < olen; i += 2) {
302 - struct tnode *newBinNode;
304 - left = tnode_get_child(oldtnode, i);
305 - right = tnode_get_child(oldtnode, i+1);
307 - /* At least one of the children is empty */
308 - if (left == NULL) {
309 - if (right == NULL) /* Both are empty */
311 - put_child(tn, i/2, right);
315 - if (right == NULL) {
316 - put_child(tn, i/2, left);
317 + /* update all of the child parent pointers */
318 + for (i = tnode_child_length(tn); i;) {
319 + inode = tnode_get_child(tn, --i);
321 + /* only new tnodes will be considered "full" nodes */
322 + if (!tnode_full(tn, inode)) {
323 + node_set_parent(inode, tn);
327 /* Two nonempty children */
328 - newBinNode = tnode_get_child(tn, i/2);
329 - put_child(newBinNode, 0, left);
330 - put_child(newBinNode, 1, right);
332 - put_child(tn, i / 2, newBinNode);
333 + node_set_parent(tnode_get_child(inode, 1), inode);
334 + node_set_parent(tnode_get_child(inode, 0), inode);
336 /* resize child node */
337 - resize(t, newBinNode);
341 - put_child_root(tp, t, tn->key, tn);
343 /* all pointers should be clean so we are done */
344 tnode_free(oldtnode);