mac80211: fix packet loss on fq reordering
[openwrt/staging/dedeckeh.git] / package / kernel / mac80211 / patches / 346-mac80211-Move-reorder-sensitive-TX-handlers-to-after.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Date: Sun, 4 Sep 2016 17:46:24 +0200
3 Subject: [PATCH] mac80211: fix sequence number assignment for PS response
4 frames
5
6 When using intermediate queues, sequence number allocation is deferred
7 until dequeue. This doesn't work for PS response frames, which bypass
8 those queues.
9
10 Signed-off-by: Felix Fietkau <nbd@nbd.name>
11 ---
12
13 --- a/net/mac80211/tx.c
14 +++ b/net/mac80211/tx.c
15 @@ -38,6 +38,12 @@
16 #include "wme.h"
17 #include "rate.h"
18
19 +static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx);
20 +static bool ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
21 + struct sta_info *sta, u8 pn_offs,
22 + struct ieee80211_key_conf *key_conf,
23 + struct sk_buff *skb);
24 +
25 /* misc utils */
26
27 static inline void ieee80211_tx_stats(struct net_device *dev, u32 len)
28 @@ -849,8 +855,7 @@ ieee80211_tx_h_sequence(struct ieee80211
29 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
30 tx->sta->tx_stats.msdu[tid]++;
31
32 - if (!tx->sta->sta.txq[0])
33 - hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
34 + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
35
36 return TX_CONTINUE;
37 }
38 @@ -1398,6 +1403,7 @@ void ieee80211_txq_init(struct ieee80211
39 fq_tin_init(&txqi->tin);
40 fq_flow_init(&txqi->def_flow);
41 codel_vars_init(&txqi->def_cvars);
42 + __skb_queue_head_init(&txqi->frags);
43
44 txqi->txq.vif = &sdata->vif;
45
46 @@ -1420,6 +1426,7 @@ void ieee80211_txq_purge(struct ieee8021
47 struct fq_tin *tin = &txqi->tin;
48
49 fq_tin_reset(fq, tin, fq_skb_free_func);
50 + ieee80211_purge_tx_queue(&local->hw, &txqi->frags);
51 }
52
53 int ieee80211_txq_setup_flows(struct ieee80211_local *local)
54 @@ -1476,12 +1483,19 @@ struct sk_buff *ieee80211_tx_dequeue(str
55 struct sk_buff *skb = NULL;
56 struct fq *fq = &local->fq;
57 struct fq_tin *tin = &txqi->tin;
58 + struct ieee80211_tx_info *info;
59
60 spin_lock_bh(&fq->lock);
61
62 if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
63 goto out;
64
65 + /* Make sure fragments stay together. */
66 + skb = __skb_dequeue(&txqi->frags);
67 + if (skb)
68 + goto out;
69 +
70 +begin:
71 skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
72 if (!skb)
73 goto out;
74 @@ -1489,16 +1503,38 @@ struct sk_buff *ieee80211_tx_dequeue(str
75 ieee80211_set_skb_vif(skb, txqi);
76
77 hdr = (struct ieee80211_hdr *)skb->data;
78 - if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
79 + info = IEEE80211_SKB_CB(skb);
80 + if (txq->sta && info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) {
81 struct sta_info *sta = container_of(txq->sta, struct sta_info,
82 sta);
83 - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
84 + u8 pn_offs = 0;
85
86 - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid);
87 - if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
88 - info->flags |= IEEE80211_TX_CTL_AMPDU;
89 - else
90 - info->flags &= ~IEEE80211_TX_CTL_AMPDU;
91 + if (info->control.hw_key)
92 + pn_offs = ieee80211_padded_hdrlen(hw, hdr->frame_control);
93 +
94 + ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
95 + info->control.hw_key, skb);
96 + } else {
97 + struct ieee80211_tx_data tx = { };
98 +
99 + __skb_queue_head_init(&tx.skbs);
100 + tx.local = local;
101 + tx.skb = skb;
102 + tx.hdrlen = ieee80211_padded_hdrlen(hw, hdr->frame_control);
103 + if (txq->sta) {
104 + tx.sta = container_of(txq->sta, struct sta_info, sta);
105 + tx.sdata = tx.sta->sdata;
106 + } else {
107 + tx.sdata = vif_to_sdata(info->control.vif);
108 + }
109 +
110 + if (invoke_tx_handlers_late(&tx))
111 + goto begin;
112 +
113 + skb = __skb_dequeue(&tx.skbs);
114 +
115 + if (!skb_queue_empty(&tx.skbs))
116 + skb_queue_splice_tail(&tx.skbs, &txqi->frags);
117 }
118
119 out:
120 @@ -1512,6 +1548,47 @@ out:
121 }
122 EXPORT_SYMBOL(ieee80211_tx_dequeue);
123
124 +static bool ieee80211_queue_skb(struct ieee80211_local *local,
125 + struct ieee80211_sub_if_data *sdata,
126 + struct sta_info *sta,
127 + struct sk_buff *skb)
128 +{
129 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
130 + struct fq *fq = &local->fq;
131 + struct ieee80211_vif *vif;
132 + struct txq_info *txqi;
133 + struct ieee80211_sta *pubsta;
134 +
135 + if (!local->ops->wake_tx_queue ||
136 + sdata->vif.type == NL80211_IFTYPE_MONITOR)
137 + return false;
138 +
139 + if (sta && sta->uploaded)
140 + pubsta = &sta->sta;
141 + else
142 + pubsta = NULL;
143 +
144 + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
145 + sdata = container_of(sdata->bss,
146 + struct ieee80211_sub_if_data, u.ap);
147 +
148 + vif = &sdata->vif;
149 + txqi = ieee80211_get_txq(local, vif, pubsta, skb);
150 +
151 + if (!txqi)
152 + return false;
153 +
154 + info->control.vif = vif;
155 +
156 + spin_lock_bh(&fq->lock);
157 + ieee80211_txq_enqueue(local, txqi, skb);
158 + spin_unlock_bh(&fq->lock);
159 +
160 + drv_wake_tx_queue(local, txqi);
161 +
162 + return true;
163 +}
164 +
165 static bool ieee80211_tx_frags(struct ieee80211_local *local,
166 struct ieee80211_vif *vif,
167 struct ieee80211_sta *sta,
168 @@ -1519,9 +1596,7 @@ static bool ieee80211_tx_frags(struct ie
169 bool txpending)
170 {
171 struct ieee80211_tx_control control = {};
172 - struct fq *fq = &local->fq;
173 struct sk_buff *skb, *tmp;
174 - struct txq_info *txqi;
175 unsigned long flags;
176
177 skb_queue_walk_safe(skbs, skb, tmp) {
178 @@ -1536,21 +1611,6 @@ static bool ieee80211_tx_frags(struct ie
179 }
180 #endif
181
182 - txqi = ieee80211_get_txq(local, vif, sta, skb);
183 - if (txqi) {
184 - info->control.vif = vif;
185 -
186 - __skb_unlink(skb, skbs);
187 -
188 - spin_lock_bh(&fq->lock);
189 - ieee80211_txq_enqueue(local, txqi, skb);
190 - spin_unlock_bh(&fq->lock);
191 -
192 - drv_wake_tx_queue(local, txqi);
193 -
194 - continue;
195 - }
196 -
197 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
198 if (local->queue_stop_reasons[q] ||
199 (!txpending && !skb_queue_empty(&local->pending[q]))) {
200 @@ -1671,10 +1731,13 @@ static bool __ieee80211_tx(struct ieee80
201 /*
202 * Invoke TX handlers, return 0 on success and non-zero if the
203 * frame was dropped or queued.
204 + *
205 + * The handlers are split into an early and late part. The latter is everything
206 + * that can be sensitive to reordering, and will be deferred to after packets
207 + * are dequeued from the intermediate queues (when they are enabled).
208 */
209 -static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
210 +static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx)
211 {
212 - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
213 ieee80211_tx_result res = TX_DROP;
214
215 #define CALL_TXH(txh) \
216 @@ -1688,16 +1751,42 @@ static int invoke_tx_handlers(struct iee
217 CALL_TXH(ieee80211_tx_h_check_assoc);
218 CALL_TXH(ieee80211_tx_h_ps_buf);
219 CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
220 - CALL_TXH(ieee80211_tx_h_select_key);
221 +
222 if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL))
223 CALL_TXH(ieee80211_tx_h_rate_ctrl);
224
225 + txh_done:
226 + if (unlikely(res == TX_DROP)) {
227 + I802_DEBUG_INC(tx->local->tx_handlers_drop);
228 + if (tx->skb)
229 + ieee80211_free_txskb(&tx->local->hw, tx->skb);
230 + else
231 + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs);
232 + return -1;
233 + } else if (unlikely(res == TX_QUEUED)) {
234 + I802_DEBUG_INC(tx->local->tx_handlers_queued);
235 + return -1;
236 + }
237 +
238 + return 0;
239 +}
240 +
241 +/*
242 + * Late handlers can be called while the sta lock is held. Handlers that can
243 + * cause packets to be generated will cause deadlock!
244 + */
245 +static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx)
246 +{
247 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
248 + ieee80211_tx_result res = TX_CONTINUE;
249 +
250 if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) {
251 __skb_queue_tail(&tx->skbs, tx->skb);
252 tx->skb = NULL;
253 goto txh_done;
254 }
255
256 + CALL_TXH(ieee80211_tx_h_select_key);
257 CALL_TXH(ieee80211_tx_h_michael_mic_add);
258 CALL_TXH(ieee80211_tx_h_sequence);
259 CALL_TXH(ieee80211_tx_h_fragment);
260 @@ -1724,6 +1813,15 @@ static int invoke_tx_handlers(struct iee
261 return 0;
262 }
263
264 +static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
265 +{
266 + int r = invoke_tx_handlers_early(tx);
267 + if (r)
268 + return r;
269 +
270 + return invoke_tx_handlers_late(tx);
271 +}
272 +
273 bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw,
274 struct ieee80211_vif *vif, struct sk_buff *skb,
275 int band, struct ieee80211_sta **sta)
276 @@ -1798,7 +1896,13 @@ static bool ieee80211_tx(struct ieee8021
277 info->hw_queue =
278 sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
279
280 - if (!invoke_tx_handlers(&tx))
281 + if (invoke_tx_handlers_early(&tx))
282 + return false;
283 +
284 + if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb))
285 + return true;
286 +
287 + if (!invoke_tx_handlers_late(&tx))
288 result = __ieee80211_tx(local, &tx.skbs, led_len,
289 tx.sta, txpending);
290
291 @@ -3181,7 +3285,7 @@ out:
292 }
293
294 static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
295 - struct net_device *dev, struct sta_info *sta,
296 + struct sta_info *sta,
297 struct ieee80211_fast_tx *fast_tx,
298 struct sk_buff *skb)
299 {
300 @@ -3192,9 +3296,9 @@ static bool ieee80211_xmit_fast(struct i
301 struct ethhdr eth;
302 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
303 struct ieee80211_hdr *hdr = (void *)fast_tx->hdr;
304 - struct ieee80211_tx_data tx;
305 - ieee80211_tx_result r;
306 struct tid_ampdu_tx *tid_tx = NULL;
307 + ieee80211_tx_result r;
308 + struct ieee80211_tx_data tx;
309 u8 tid = IEEE80211_NUM_TIDS;
310
311 /* control port protocol needs a lot of special handling */
312 @@ -3232,8 +3336,6 @@ static bool ieee80211_xmit_fast(struct i
313 return true;
314 }
315
316 - ieee80211_tx_stats(dev, skb->len + extra_head);
317 -
318 if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
319 ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
320 return true;
321 @@ -3262,24 +3364,7 @@ static bool ieee80211_xmit_fast(struct i
322 info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
323 IEEE80211_TX_CTL_DONTFRAG |
324 (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0);
325 -
326 - if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
327 - *ieee80211_get_qos_ctl(hdr) = tid;
328 - if (!sta->sta.txq[0])
329 - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
330 - } else {
331 - info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
332 - hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
333 - sdata->sequence_number += 0x10;
334 - }
335 -
336 - if (skb_shinfo(skb)->gso_size)
337 - sta->tx_stats.msdu[tid] +=
338 - DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
339 - else
340 - sta->tx_stats.msdu[tid]++;
341 -
342 - info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
343 + info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT;
344
345 __skb_queue_head_init(&tx.skbs);
346
347 @@ -3305,22 +3390,71 @@ static bool ieee80211_xmit_fast(struct i
348 }
349 }
350
351 + if (ieee80211_queue_skb(local, sdata, sta, skb))
352 + return true;
353 +
354 + ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs,
355 + &fast_tx->key->conf, skb);
356 +
357 + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
358 + sdata = container_of(sdata->bss,
359 + struct ieee80211_sub_if_data, u.ap);
360 +
361 + __skb_queue_tail(&tx.skbs, skb);
362 + ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
363 +
364 + return true;
365 +}
366 +
367 +/*
368 + * Can be called while the sta lock is held. Anything that can cause packets to
369 + * be generated will cause deadlock!
370 + */
371 +static bool ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata,
372 + struct sta_info *sta, u8 pn_offs,
373 + struct ieee80211_key_conf *key_conf,
374 + struct sk_buff *skb)
375 +{
376 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
377 + struct ieee80211_hdr *hdr = (void *)skb->data;
378 + u8 tid = IEEE80211_NUM_TIDS;
379 +
380 + ieee80211_tx_stats(skb->dev, skb->len);
381 +
382 + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
383 + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
384 + *ieee80211_get_qos_ctl(hdr) = tid;
385 + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
386 + } else {
387 + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
388 + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
389 + sdata->sequence_number += 0x10;
390 + }
391 +
392 + if (skb_shinfo(skb)->gso_size)
393 + sta->tx_stats.msdu[tid] +=
394 + DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
395 + else
396 + sta->tx_stats.msdu[tid]++;
397 +
398 + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
399 +
400 /* statistics normally done by ieee80211_tx_h_stats (but that
401 * has to consider fragmentation, so is more complex)
402 */
403 sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len;
404 sta->tx_stats.packets[skb_get_queue_mapping(skb)]++;
405
406 - if (fast_tx->pn_offs) {
407 + if (pn_offs) {
408 u64 pn;
409 - u8 *crypto_hdr = skb->data + fast_tx->pn_offs;
410 + u8 *crypto_hdr = skb->data + pn_offs;
411
412 - switch (fast_tx->key->conf.cipher) {
413 + switch (key_conf->cipher) {
414 case WLAN_CIPHER_SUITE_CCMP:
415 case WLAN_CIPHER_SUITE_CCMP_256:
416 case WLAN_CIPHER_SUITE_GCMP:
417 case WLAN_CIPHER_SUITE_GCMP_256:
418 - pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn);
419 + pn = atomic64_inc_return(&key_conf->tx_pn);
420 crypto_hdr[0] = pn;
421 crypto_hdr[1] = pn >> 8;
422 crypto_hdr[4] = pn >> 16;
423 @@ -3331,12 +3465,6 @@ static bool ieee80211_xmit_fast(struct i
424 }
425 }
426
427 - if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
428 - sdata = container_of(sdata->bss,
429 - struct ieee80211_sub_if_data, u.ap);
430 -
431 - __skb_queue_tail(&tx.skbs, skb);
432 - ieee80211_tx_frags(local, &sdata->vif, &sta->sta, &tx.skbs, false);
433 return true;
434 }
435
436 @@ -3364,7 +3492,7 @@ void __ieee80211_subif_start_xmit(struct
437 fast_tx = rcu_dereference(sta->fast_tx);
438
439 if (fast_tx &&
440 - ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb))
441 + ieee80211_xmit_fast(sdata, sta, fast_tx, skb))
442 goto out;
443 }
444
445 --- a/include/net/mac80211.h
446 +++ b/include/net/mac80211.h
447 @@ -715,6 +715,7 @@ enum mac80211_tx_info_flags {
448 * frame (PS-Poll or uAPSD).
449 * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information
450 * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
451 + * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path
452 *
453 * These flags are used in tx_info->control.flags.
454 */
455 @@ -723,6 +724,7 @@ enum mac80211_tx_control_flags {
456 IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1),
457 IEEE80211_TX_CTRL_RATE_INJECT = BIT(2),
458 IEEE80211_TX_CTRL_AMSDU = BIT(3),
459 + IEEE80211_TX_CTRL_FAST_XMIT = BIT(4),
460 };
461
462 /*
463 --- a/net/mac80211/ieee80211_i.h
464 +++ b/net/mac80211/ieee80211_i.h
465 @@ -814,11 +814,13 @@ enum txq_info_flags {
466 * @def_flow: used as a fallback flow when a packet destined to @tin hashes to
467 * a fq_flow which is already owned by a different tin
468 * @def_cvars: codel vars for @def_flow
469 + * @frags: used to keep fragments created after dequeue
470 */
471 struct txq_info {
472 struct fq_tin tin;
473 struct fq_flow def_flow;
474 struct codel_vars def_cvars;
475 + struct sk_buff_head frags;
476 unsigned long flags;
477
478 /* keep last! */