kernel/generic: add a few missing kernel config symbols
[openwrt/svn-archive/archive.git] / target / linux / generic / patches-3.3 / 130-pppoatm-queue-depth.patch
1 From 9d02daf754238adac48fa075ee79e7edd3d79ed3 Mon Sep 17 00:00:00 2001
2 From: David Woodhouse <dwmw2@infradead.org>
3 Date: Sun, 8 Apr 2012 09:55:43 +0000
4 Subject: [PATCH] pppoatm: Fix excessive queue bloat
5
6 We discovered that PPPoATM has an excessively deep transmit queue. A
7 queue the size of the default socket send buffer (wmem_default) is
8 maintained between the PPP generic core and the ATM device.
9
10 Fix it to queue a maximum of *two* packets. The one the ATM device is
11 currently working on, and one more for the ATM driver to process
12 immediately in its TX done interrupt handler. The PPP core is designed
13 to feed packets to the channel with minimal latency, so that really
14 ought to be enough to keep the ATM device busy.
15
16 While we're at it, fix the fact that we were triggering the wakeup
17 tasklet on *every* pppoatm_pop() call. The comment saying "this is
18 inefficient, but doing it right is too hard" turns out to be overly
19 pessimistic... I think :)
20
21 On machines like the Traverse Geos, with a slow Geode CPU and two
22 high-speed ADSL2+ interfaces, there were reports of extremely high CPU
23 usage which could partly be attributed to the extra wakeups.
24
25 (The wakeup handling could actually be made a whole lot easier if we
26 stop checking sk->sk_sndbuf altogether. Given that we now only queue
27 *two* packets ever, one wonders what the point is. As it is, you could
28 already deadlock the thing by setting the sk_sndbuf to a value lower
29 than the MTU of the device, and it'd just block for ever.)
30
31 Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
32 Signed-off-by: David S. Miller <davem@davemloft.net>
33 ---
34 net/atm/pppoatm.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++-----
35 1 files changed, 85 insertions(+), 10 deletions(-)
36
37 --- a/net/atm/pppoatm.c
38 +++ b/net/atm/pppoatm.c
39 @@ -62,12 +62,25 @@ struct pppoatm_vcc {
40 void (*old_pop)(struct atm_vcc *, struct sk_buff *);
41 /* keep old push/pop for detaching */
42 enum pppoatm_encaps encaps;
43 + atomic_t inflight;
44 + unsigned long blocked;
45 int flags; /* SC_COMP_PROT - compress protocol */
46 struct ppp_channel chan; /* interface to generic ppp layer */
47 struct tasklet_struct wakeup_tasklet;
48 };
49
50 /*
51 + * We want to allow two packets in the queue. The one that's currently in
52 + * flight, and *one* queued up ready for the ATM device to send immediately
53 + * from its TX done IRQ. We want to be able to use atomic_inc_not_zero(), so
54 + * inflight == -2 represents an empty queue, -1 one packet, and zero means
55 + * there are two packets in the queue.
56 + */
57 +#define NONE_INFLIGHT -2
58 +
59 +#define BLOCKED 0
60 +
61 +/*
62 * Header used for LLC Encapsulated PPP (4 bytes) followed by the LCP protocol
63 * ID (0xC021) used in autodetection
64 */
65 @@ -102,16 +115,30 @@ static void pppoatm_wakeup_sender(unsign
66 static void pppoatm_pop(struct atm_vcc *atmvcc, struct sk_buff *skb)
67 {
68 struct pppoatm_vcc *pvcc = atmvcc_to_pvcc(atmvcc);
69 +
70 pvcc->old_pop(atmvcc, skb);
71 + atomic_dec(&pvcc->inflight);
72 +
73 /*
74 - * We don't really always want to do this since it's
75 - * really inefficient - it would be much better if we could
76 - * test if we had actually throttled the generic layer.
77 - * Unfortunately then there would be a nasty SMP race where
78 - * we could clear that flag just as we refuse another packet.
79 - * For now we do the safe thing.
80 + * We always used to run the wakeup tasklet unconditionally here, for
81 + * fear of race conditions where we clear the BLOCKED flag just as we
82 + * refuse another packet in pppoatm_send(). This was quite inefficient.
83 + *
84 + * In fact it's OK. The PPP core will only ever call pppoatm_send()
85 + * while holding the channel->downl lock. And ppp_output_wakeup() as
86 + * called by the tasklet will *also* grab that lock. So even if another
87 + * CPU is in pppoatm_send() right now, the tasklet isn't going to race
88 + * with it. The wakeup *will* happen after the other CPU is safely out
89 + * of pppoatm_send() again.
90 + *
91 + * So if the CPU in pppoatm_send() has already set the BLOCKED bit and
92 + * it about to return, that's fine. We trigger a wakeup which will
93 + * happen later. And if the CPU in pppoatm_send() *hasn't* set the
94 + * BLOCKED bit yet, that's fine too because of the double check in
95 + * pppoatm_may_send() which is commented there.
96 */
97 - tasklet_schedule(&pvcc->wakeup_tasklet);
98 + if (test_and_clear_bit(BLOCKED, &pvcc->blocked))
99 + tasklet_schedule(&pvcc->wakeup_tasklet);
100 }
101
102 /*
103 @@ -184,6 +211,51 @@ error:
104 ppp_input_error(&pvcc->chan, 0);
105 }
106
107 +static inline int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size)
108 +{
109 + /*
110 + * It's not clear that we need to bother with using atm_may_send()
111 + * to check we don't exceed sk->sk_sndbuf. If userspace sets a
112 + * value of sk_sndbuf which is lower than the MTU, we're going to
113 + * block for ever. But the code always did that before we introduced
114 + * the packet count limit, so...
115 + */
116 + if (atm_may_send(pvcc->atmvcc, size) &&
117 + atomic_inc_not_zero_hint(&pvcc->inflight, NONE_INFLIGHT))
118 + return 1;
119 +
120 + /*
121 + * We use test_and_set_bit() rather than set_bit() here because
122 + * we need to ensure there's a memory barrier after it. The bit
123 + * *must* be set before we do the atomic_inc() on pvcc->inflight.
124 + * There's no smp_mb__after_set_bit(), so it's this or abuse
125 + * smp_mb__after_clear_bit().
126 + */
127 + test_and_set_bit(BLOCKED, &pvcc->blocked);
128 +
129 + /*
130 + * We may have raced with pppoatm_pop(). If it ran for the
131 + * last packet in the queue, *just* before we set the BLOCKED
132 + * bit, then it might never run again and the channel could
133 + * remain permanently blocked. Cope with that race by checking
134 + * *again*. If it did run in that window, we'll have space on
135 + * the queue now and can return success. It's harmless to leave
136 + * the BLOCKED flag set, since it's only used as a trigger to
137 + * run the wakeup tasklet. Another wakeup will never hurt.
138 + * If pppoatm_pop() is running but hasn't got as far as making
139 + * space on the queue yet, then it hasn't checked the BLOCKED
140 + * flag yet either, so we're safe in that case too. It'll issue
141 + * an "immediate" wakeup... where "immediate" actually involves
142 + * taking the PPP channel's ->downl lock, which is held by the
143 + * code path that calls pppoatm_send(), and is thus going to
144 + * wait for us to finish.
145 + */
146 + if (atm_may_send(pvcc->atmvcc, size) &&
147 + atomic_inc_not_zero(&pvcc->inflight))
148 + return 1;
149 +
150 + return 0;
151 +}
152 /*
153 * Called by the ppp_generic.c to send a packet - returns true if packet
154 * was accepted. If we return false, then it's our job to call
155 @@ -207,7 +279,7 @@ static int pppoatm_send(struct ppp_chann
156 struct sk_buff *n;
157 n = skb_realloc_headroom(skb, LLC_LEN);
158 if (n != NULL &&
159 - !atm_may_send(pvcc->atmvcc, n->truesize)) {
160 + !pppoatm_may_send(pvcc, n->truesize)) {
161 kfree_skb(n);
162 goto nospace;
163 }
164 @@ -215,12 +287,12 @@ static int pppoatm_send(struct ppp_chann
165 skb = n;
166 if (skb == NULL)
167 return DROP_PACKET;
168 - } else if (!atm_may_send(pvcc->atmvcc, skb->truesize))
169 + } else if (!pppoatm_may_send(pvcc, skb->truesize))
170 goto nospace;
171 memcpy(skb_push(skb, LLC_LEN), pppllc, LLC_LEN);
172 break;
173 case e_vc:
174 - if (!atm_may_send(pvcc->atmvcc, skb->truesize))
175 + if (!pppoatm_may_send(pvcc, skb->truesize))
176 goto nospace;
177 break;
178 case e_autodetect:
179 @@ -285,6 +357,9 @@ static int pppoatm_assign_vcc(struct atm
180 if (pvcc == NULL)
181 return -ENOMEM;
182 pvcc->atmvcc = atmvcc;
183 +
184 + /* Maximum is zero, so that we can use atomic_inc_not_zero() */
185 + atomic_set(&pvcc->inflight, NONE_INFLIGHT);
186 pvcc->old_push = atmvcc->push;
187 pvcc->old_pop = atmvcc->pop;
188 pvcc->encaps = (enum pppoatm_encaps) be.encaps;