mac80211: rework memory allocation for software queueing patch
[openwrt/staging/dedeckeh.git] / package / kernel / mac80211 / patches / 300-mac80211-add-an-intermediate-software-queue-implemen.patch
1 From: Felix Fietkau <nbd@openwrt.org>
2 Date: Tue, 18 Nov 2014 23:58:51 +0100
3 Subject: [PATCH] mac80211: add an intermediate software queue implementation
4
5 This allows drivers to request per-vif and per-sta-tid queues from which
6 they can pull frames. This makes it easier to keep the hardware queues
7 short, and to improve fairness between clients and vifs.
8
9 The task of scheduling packet transmission is left up to the driver -
10 queueing is controlled by mac80211. Drivers can only dequeue packets by
11 calling ieee80211_tx_dequeue. This makes it possible to add active queue
12 management later without changing drivers using this code.
13
14 This can also be used as a starting point to implement A-MSDU
15 aggregation in a way that does not add artificially induced latency.
16
17 Signed-off-by: Felix Fietkau <nbd@openwrt.org>
18 ---
19
20 --- a/include/net/mac80211.h
21 +++ b/include/net/mac80211.h
22 @@ -84,6 +84,35 @@
23 *
24 */
25
26 +/**
27 + * DOC: mac80211 software tx queueing
28 + *
29 + * mac80211 provides an optional intermediate queueing implementation designed
30 + * to allow the driver to keep hardware queues short and provide some fairness
31 + * between different stations/interfaces.
32 + * In this model, the driver pulls data frames from the mac80211 queue instead
33 + * of letting mac80211 push them via drv_tx().
34 + * Other frames (e.g. control or management) are still pushed using drv_tx().
35 + *
36 + * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with a
37 + * single per-vif queue for multicast data frames.
38 + *
39 + * The driver is expected to initialize its private per-queue data for stations
40 + * and interfaces in the .add_interface and .sta_add ops.
41 + *
42 + * The driver can not access the queue directly. To dequeue a frame, it calls
43 + * ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a queue, it
44 + * calls the .wake_tx_queue driver op.
45 + *
46 + * For AP powersave TIM handling, the driver only needs to indicate if it has
47 + * buffered packets in the driver specific data structures by calling
48 + * ieee80211_sta_set_buffered(). For frames buffered in the ieee80211_txq
49 + * struct, mac80211 sets the appropriate TIM PVB bits and calls
50 + * .release_buffered_frames().
51 + * That callback is expected to release its own buffered frames and afterwards
52 + * also frames from the ieee80211_txq (obtained via ieee80211_tx_dequeue).
53 + */
54 +
55 struct device;
56
57 /**
58 @@ -1257,6 +1286,8 @@ struct ieee80211_vif {
59 u8 cab_queue;
60 u8 hw_queue[IEEE80211_NUM_ACS];
61
62 + struct ieee80211_txq *txq;
63 +
64 struct ieee80211_chanctx_conf __rcu *chanctx_conf;
65
66 u32 driver_flags;
67 @@ -1519,6 +1550,8 @@ struct ieee80211_sta {
68 bool tdls_initiator;
69 bool mfp;
70
71 + struct ieee80211_txq *txq[IEEE80211_NUM_TIDS];
72 +
73 /* must be last */
74 u8 drv_priv[0] __aligned(sizeof(void *));
75 };
76 @@ -1547,6 +1580,27 @@ struct ieee80211_tx_control {
77 };
78
79 /**
80 + * struct ieee80211_txq - Software intermediate tx queue
81 + *
82 + * @vif: &struct ieee80211_vif pointer from the add_interface callback.
83 + * @sta: station table entry, %NULL for per-vif queue
84 + * @tid: the TID for this queue (unused for per-vif queue)
85 + * @ac: the AC for this queue
86 + *
87 + * The driver can obtain packets from this queue by calling
88 + * ieee80211_tx_dequeue().
89 + */
90 +struct ieee80211_txq {
91 + struct ieee80211_vif *vif;
92 + struct ieee80211_sta *sta;
93 + u8 tid;
94 + u8 ac;
95 +
96 + /* must be last */
97 + u8 drv_priv[0] __aligned(sizeof(void *));
98 +};
99 +
100 +/**
101 * enum ieee80211_hw_flags - hardware flags
102 *
103 * These flags are used to indicate hardware capabilities to
104 @@ -1770,6 +1824,8 @@ enum ieee80211_hw_flags {
105 * within &struct ieee80211_sta.
106 * @chanctx_data_size: size (in bytes) of the drv_priv data area
107 * within &struct ieee80211_chanctx_conf.
108 + * @txq_data_size: size (in bytes) of the drv_priv data area
109 + * within @struct ieee80211_txq.
110 *
111 * @max_rates: maximum number of alternate rate retry stages the hw
112 * can handle.
113 @@ -1818,6 +1874,9 @@ enum ieee80211_hw_flags {
114 * @n_cipher_schemes: a size of an array of cipher schemes definitions.
115 * @cipher_schemes: a pointer to an array of cipher scheme definitions
116 * supported by HW.
117 + *
118 + * @txq_ac_max_pending: maximum number of frames per AC pending in all txq
119 + * entries for a vif.
120 */
121 struct ieee80211_hw {
122 struct ieee80211_conf conf;
123 @@ -1830,6 +1889,7 @@ struct ieee80211_hw {
124 int vif_data_size;
125 int sta_data_size;
126 int chanctx_data_size;
127 + int txq_data_size;
128 u16 queues;
129 u16 max_listen_interval;
130 s8 max_signal;
131 @@ -1846,6 +1906,7 @@ struct ieee80211_hw {
132 u8 uapsd_max_sp_len;
133 u8 n_cipher_schemes;
134 const struct ieee80211_cipher_scheme *cipher_schemes;
135 + int txq_ac_max_pending;
136 };
137
138 /**
139 @@ -3007,6 +3068,8 @@ enum ieee80211_reconfig_type {
140 * response template is provided, together with the location of the
141 * switch-timing IE within the template. The skb can only be used within
142 * the function call.
143 + *
144 + * @wake_tx_queue: Called when new packets have been added to the queue.
145 */
146 struct ieee80211_ops {
147 void (*tx)(struct ieee80211_hw *hw,
148 @@ -3238,6 +3301,9 @@ struct ieee80211_ops {
149 void (*tdls_recv_channel_switch)(struct ieee80211_hw *hw,
150 struct ieee80211_vif *vif,
151 struct ieee80211_tdls_ch_sw_params *params);
152 +
153 + void (*wake_tx_queue)(struct ieee80211_hw *hw,
154 + struct ieee80211_txq *txq);
155 };
156
157 /**
158 @@ -5249,4 +5315,17 @@ void ieee80211_unreserve_tid(struct ieee
159 */
160 size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
161 const u8 *ids, int n_ids, size_t offset);
162 +
163 +/**
164 + * ieee80211_tx_dequeue - dequeue a packet from a software tx queue
165 + *
166 + * @hw: pointer as obtained from ieee80211_alloc_hw()
167 + * @txq: pointer obtained from .add_tx_queue() call
168 + *
169 + * Returns the skb if successful, %NULL if no frame was available.
170 + */
171 +struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
172 + struct ieee80211_txq *txq);
173 +
174 +
175 #endif /* MAC80211_H */
176 --- a/net/mac80211/driver-ops.h
177 +++ b/net/mac80211/driver-ops.h
178 @@ -1367,4 +1367,16 @@ drv_tdls_recv_channel_switch(struct ieee
179 trace_drv_return_void(local);
180 }
181
182 +static inline void drv_wake_tx_queue(struct ieee80211_local *local,
183 + struct txq_info *txq)
184 +{
185 + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif);
186 +
187 + if (!check_sdata_in_driver(sdata))
188 + return;
189 +
190 + trace_drv_wake_tx_queue(local, sdata, txq->txq.sta, txq->txq.tid);
191 + local->ops->wake_tx_queue(&local->hw, &txq->txq);
192 +}
193 +
194 #endif /* __MAC80211_DRIVER_OPS */
195 --- a/net/mac80211/ieee80211_i.h
196 +++ b/net/mac80211/ieee80211_i.h
197 @@ -809,6 +809,19 @@ struct mac80211_qos_map {
198 struct rcu_head rcu_head;
199 };
200
201 +enum txq_info_flags {
202 + IEEE80211_TXQ_STOP,
203 + IEEE80211_TXQ_AMPDU,
204 +};
205 +
206 +struct txq_info {
207 + struct sk_buff_head queue;
208 + unsigned long flags;
209 +
210 + /* keep last! */
211 + struct ieee80211_txq txq;
212 +};
213 +
214 struct ieee80211_sub_if_data {
215 struct list_head list;
216
217 @@ -853,6 +866,7 @@ struct ieee80211_sub_if_data {
218 bool control_port_no_encrypt;
219 int encrypt_headroom;
220
221 + atomic_t txqs_len[IEEE80211_NUM_ACS];
222 struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS];
223 struct mac80211_qos_map __rcu *qos_map;
224
225 @@ -1453,6 +1467,10 @@ static inline struct ieee80211_local *hw
226 return container_of(hw, struct ieee80211_local, hw);
227 }
228
229 +static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq)
230 +{
231 + return container_of(txq, struct txq_info, txq);
232 +}
233
234 static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr)
235 {
236 @@ -1905,6 +1923,9 @@ static inline bool ieee80211_can_run_wor
237 return true;
238 }
239
240 +void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
241 + struct sta_info *sta,
242 + struct txq_info *txq, int tid);
243 void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
244 u16 transaction, u16 auth_alg, u16 status,
245 const u8 *extra, size_t extra_len, const u8 *bssid,
246 --- a/net/mac80211/iface.c
247 +++ b/net/mac80211/iface.c
248 @@ -969,6 +969,13 @@ static void ieee80211_do_stop(struct iee
249 }
250 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
251
252 + if (sdata->vif.txq) {
253 + struct txq_info *txqi = to_txq_info(sdata->vif.txq);
254 +
255 + ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
256 + atomic_set(&sdata->txqs_len[txqi->txq.ac], 0);
257 + }
258 +
259 if (local->open_count == 0)
260 ieee80211_clear_tx_pending(local);
261
262 @@ -1674,6 +1681,7 @@ int ieee80211_if_add(struct ieee80211_lo
263 {
264 struct net_device *ndev = NULL;
265 struct ieee80211_sub_if_data *sdata = NULL;
266 + struct txq_info *txqi;
267 int ret, i;
268 int txqs = 1;
269
270 @@ -1693,10 +1701,18 @@ int ieee80211_if_add(struct ieee80211_lo
271 ieee80211_assign_perm_addr(local, wdev->address, type);
272 memcpy(sdata->vif.addr, wdev->address, ETH_ALEN);
273 } else {
274 + int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size,
275 + sizeof(void *));
276 + int txq_size = 0;
277 +
278 + if (local->ops->wake_tx_queue)
279 + txq_size += sizeof(struct txq_info) +
280 + local->hw.txq_data_size;
281 +
282 if (local->hw.queues >= IEEE80211_NUM_ACS)
283 txqs = IEEE80211_NUM_ACS;
284
285 - ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size,
286 + ndev = alloc_netdev_mqs(size + txq_size,
287 name, NET_NAME_UNKNOWN,
288 ieee80211_if_setup, txqs, 1);
289 if (!ndev)
290 @@ -1731,6 +1747,9 @@ int ieee80211_if_add(struct ieee80211_lo
291 memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN);
292 memcpy(sdata->name, ndev->name, IFNAMSIZ);
293
294 + txqi = netdev_priv(ndev) + size;
295 + ieee80211_init_tx_queue(sdata, NULL, txqi, 0);
296 +
297 sdata->dev = ndev;
298 }
299
300 @@ -1773,6 +1792,15 @@ int ieee80211_if_add(struct ieee80211_lo
301 ieee80211_setup_sdata(sdata, type);
302
303 if (ndev) {
304 + struct txq_info *txqi = NULL;
305 +
306 + if (local->ops->wake_tx_queue) {
307 + txqi = kzalloc(sizeof(*txqi) +
308 + local->hw.txq_data_size, GFP_KERNEL);
309 + if (txqi)
310 + ieee80211_init_tx_queue(sdata, NULL, txqi, 0);
311 + }
312 +
313 if (params) {
314 ndev->ieee80211_ptr->use_4addr = params->use_4addr;
315 if (type == NL80211_IFTYPE_STATION)
316 @@ -1785,6 +1813,7 @@ int ieee80211_if_add(struct ieee80211_lo
317
318 ret = register_netdevice(ndev);
319 if (ret) {
320 + kfree(txqi);
321 free_netdev(ndev);
322 return ret;
323 }
324 --- a/net/mac80211/main.c
325 +++ b/net/mac80211/main.c
326 @@ -1019,6 +1019,9 @@ int ieee80211_register_hw(struct ieee802
327
328 local->dynamic_ps_forced_timeout = -1;
329
330 + if (!local->hw.txq_ac_max_pending)
331 + local->hw.txq_ac_max_pending = 64;
332 +
333 result = ieee80211_wep_init(local);
334 if (result < 0)
335 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
336 --- a/net/mac80211/sta_info.c
337 +++ b/net/mac80211/sta_info.c
338 @@ -118,6 +118,16 @@ static void __cleanup_single_sta(struct
339 atomic_dec(&ps->num_sta_ps);
340 }
341
342 + if (sta->sta.txq[0]) {
343 + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
344 + struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
345 + int n = skb_queue_len(&txqi->queue);
346 +
347 + ieee80211_purge_tx_queue(&local->hw, &txqi->queue);
348 + atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]);
349 + }
350 + }
351 +
352 for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
353 local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]);
354 ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]);
355 @@ -234,6 +244,8 @@ void sta_info_free(struct ieee80211_loca
356
357 sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr);
358
359 + if (sta->sta.txq[0])
360 + kfree(to_txq_info(sta->sta.txq[0]));
361 kfree(rcu_dereference_raw(sta->sta.rates));
362 kfree(sta);
363 }
364 @@ -285,11 +297,12 @@ struct sta_info *sta_info_alloc(struct i
365 const u8 *addr, gfp_t gfp)
366 {
367 struct ieee80211_local *local = sdata->local;
368 + struct ieee80211_hw *hw = &local->hw;
369 struct sta_info *sta;
370 struct timespec uptime;
371 int i;
372
373 - sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp);
374 + sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp);
375 if (!sta)
376 return NULL;
377
378 @@ -321,11 +334,25 @@ struct sta_info *sta_info_alloc(struct i
379 for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++)
380 ewma_init(&sta->chain_signal_avg[i], 1024, 8);
381
382 - if (sta_prepare_rate_control(local, sta, gfp)) {
383 - kfree(sta);
384 - return NULL;
385 + if (local->ops->wake_tx_queue) {
386 + void *txq_data;
387 + int size = sizeof(struct txq_info) +
388 + ALIGN(hw->txq_data_size, sizeof(void *));
389 +
390 + txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp);
391 + if (!txq_data)
392 + goto free;
393 +
394 + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
395 + struct txq_info *txq = txq_data + i * size;
396 +
397 + ieee80211_init_tx_queue(sdata, sta, txq, i);
398 + }
399 }
400
401 + if (sta_prepare_rate_control(local, sta, gfp))
402 + goto free_txq;
403 +
404 for (i = 0; i < IEEE80211_NUM_TIDS; i++) {
405 /*
406 * timer_to_tid must be initialized with identity mapping
407 @@ -346,7 +373,7 @@ struct sta_info *sta_info_alloc(struct i
408 if (sdata->vif.type == NL80211_IFTYPE_AP ||
409 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
410 struct ieee80211_supported_band *sband =
411 - local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)];
412 + hw->wiphy->bands[ieee80211_get_sdata_band(sdata)];
413 u8 smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >>
414 IEEE80211_HT_CAP_SM_PS_SHIFT;
415 /*
416 @@ -371,6 +398,13 @@ struct sta_info *sta_info_alloc(struct i
417 sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr);
418
419 return sta;
420 +
421 +free_txq:
422 + if (sta->sta.txq[0])
423 + kfree(to_txq_info(sta->sta.txq[0]));
424 +free:
425 + kfree(sta);
426 + return NULL;
427 }
428
429 static int sta_info_insert_check(struct sta_info *sta)
430 @@ -640,6 +674,8 @@ static void __sta_info_recalc_tim(struct
431
432 indicate_tim |=
433 sta->driver_buffered_tids & tids;
434 + indicate_tim |=
435 + sta->txq_buffered_tids & tids;
436 }
437
438 done:
439 @@ -1071,7 +1107,7 @@ void ieee80211_sta_ps_deliver_wakeup(str
440 struct ieee80211_sub_if_data *sdata = sta->sdata;
441 struct ieee80211_local *local = sdata->local;
442 struct sk_buff_head pending;
443 - int filtered = 0, buffered = 0, ac;
444 + int filtered = 0, buffered = 0, ac, i;
445 unsigned long flags;
446 struct ps_data *ps;
447
448 @@ -1090,10 +1126,22 @@ void ieee80211_sta_ps_deliver_wakeup(str
449
450 BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1);
451 sta->driver_buffered_tids = 0;
452 + sta->txq_buffered_tids = 0;
453
454 if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS))
455 drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta);
456
457 + if (sta->sta.txq[0]) {
458 + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) {
459 + struct txq_info *txqi = to_txq_info(sta->sta.txq[i]);
460 +
461 + if (!skb_queue_len(&txqi->queue))
462 + continue;
463 +
464 + drv_wake_tx_queue(local, txqi);
465 + }
466 + }
467 +
468 skb_queue_head_init(&pending);
469
470 /* sync with ieee80211_tx_h_unicast_ps_buf */
471 @@ -1254,7 +1302,7 @@ ieee80211_sta_ps_deliver_response(struct
472 struct ieee80211_sub_if_data *sdata = sta->sdata;
473 struct ieee80211_local *local = sdata->local;
474 bool more_data = false;
475 - int ac;
476 + int ac, tid;
477 unsigned long driver_release_tids = 0;
478 struct sk_buff_head frames;
479
480 @@ -1275,8 +1323,10 @@ ieee80211_sta_ps_deliver_response(struct
481 /* if we already have frames from software, then we can't also
482 * release from hardware queues
483 */
484 - if (skb_queue_empty(&frames))
485 + if (skb_queue_empty(&frames)) {
486 driver_release_tids |= sta->driver_buffered_tids & tids;
487 + driver_release_tids |= sta->txq_buffered_tids & tids;
488 + }
489
490 if (driver_release_tids) {
491 /* If the driver has data on more than one TID then
492 @@ -1447,6 +1497,8 @@ ieee80211_sta_ps_deliver_response(struct
493
494 sta_info_recalc_tim(sta);
495 } else {
496 + unsigned long tids = sta->txq_buffered_tids & driver_release_tids;
497 +
498 /*
499 * We need to release a frame that is buffered somewhere in the
500 * driver ... it'll have to handle that.
501 @@ -1466,8 +1518,22 @@ ieee80211_sta_ps_deliver_response(struct
502 * that the TID(s) became empty before returning here from the
503 * release function.
504 * Either way, however, when the driver tells us that the TID(s)
505 - * became empty we'll do the TIM recalculation.
506 + * became empty or we find that a txq became empty, we'll do the
507 + * TIM recalculation.
508 */
509 +
510 + if (!sta->sta.txq[0])
511 + return;
512 +
513 + for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
514 + struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
515 +
516 + if (!(tids & BIT(tid)) || skb_queue_len(&txqi->queue))
517 + continue;
518 +
519 + sta_info_recalc_tim(sta);
520 + break;
521 + }
522 }
523 }
524
525 --- a/net/mac80211/sta_info.h
526 +++ b/net/mac80211/sta_info.h
527 @@ -274,6 +274,7 @@ struct sta_ampdu_mlme {
528 * entered power saving state, these are also delivered to
529 * the station when it leaves powersave or polls for frames
530 * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on
531 + * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on
532 * @rx_packets: Number of MSDUs received from this STA
533 * @rx_bytes: Number of bytes received from this STA
534 * @last_rx: time (in jiffies) when last frame was received from this STA
535 @@ -368,6 +369,7 @@ struct sta_info {
536 struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS];
537 struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS];
538 unsigned long driver_buffered_tids;
539 + unsigned long txq_buffered_tids;
540
541 /* Updated from RX path only, no locking requirements */
542 unsigned long rx_packets;
543 --- a/net/mac80211/trace.h
544 +++ b/net/mac80211/trace.h
545 @@ -2312,6 +2312,34 @@ TRACE_EVENT(drv_tdls_recv_channel_switch
546 )
547 );
548
549 +TRACE_EVENT(drv_wake_tx_queue,
550 + TP_PROTO(struct ieee80211_local *local,
551 + struct ieee80211_sub_if_data *sdata,
552 + struct ieee80211_sta *sta,
553 + u8 tid),
554 +
555 + TP_ARGS(local, sdata, sta, tid),
556 +
557 + TP_STRUCT__entry(
558 + LOCAL_ENTRY
559 + VIF_ENTRY
560 + STA_ENTRY
561 + __field(u8, tid)
562 + ),
563 +
564 + TP_fast_assign(
565 + LOCAL_ASSIGN;
566 + VIF_ASSIGN;
567 + STA_ASSIGN;
568 + __entry->tid = tid;
569 + ),
570 +
571 + TP_printk(
572 + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " tid: 0x%x",
573 + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->tid
574 + )
575 +);
576 +
577 #ifdef CPTCFG_MAC80211_MESSAGE_TRACING
578 #undef TRACE_SYSTEM
579 #define TRACE_SYSTEM mac80211_msg
580 --- a/net/mac80211/tx.c
581 +++ b/net/mac80211/tx.c
582 @@ -776,12 +776,23 @@ ieee80211_tx_h_rate_ctrl(struct ieee8021
583 return TX_CONTINUE;
584 }
585
586 +static u16
587 +ieee80211_tx_next_seq(struct sta_info *sta, int tid)
588 +{
589 + u16 *seq = &sta->tid_seq[tid];
590 + u16 ret = cpu_to_le16(*seq);
591 +
592 + /* Increase the sequence number. */
593 + *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ;
594 +
595 + return ret;
596 +}
597 +
598 static ieee80211_tx_result debug_noinline
599 ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
600 {
601 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
602 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
603 - u16 *seq;
604 u8 *qc;
605 int tid;
606
607 @@ -832,13 +843,10 @@ ieee80211_tx_h_sequence(struct ieee80211
608
609 qc = ieee80211_get_qos_ctl(hdr);
610 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
611 - seq = &tx->sta->tid_seq[tid];
612 tx->sta->tx_msdu[tid]++;
613
614 - hdr->seq_ctrl = cpu_to_le16(*seq);
615 -
616 - /* Increase the sequence number. */
617 - *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ;
618 + if (!tx->sta->sta.txq[0])
619 + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
620
621 return TX_CONTINUE;
622 }
623 @@ -1067,7 +1075,7 @@ static bool ieee80211_tx_prep_agg(struct
624 * nothing -- this aggregation session is being started
625 * but that might still fail with the driver
626 */
627 - } else {
628 + } else if (!tx->sta->sta.txq[tid]) {
629 spin_lock(&tx->sta->lock);
630 /*
631 * Need to re-check now, because we may get here
632 @@ -1201,13 +1209,102 @@ ieee80211_tx_prepare(struct ieee80211_su
633 return TX_CONTINUE;
634 }
635
636 +static void ieee80211_drv_tx(struct ieee80211_local *local,
637 + struct ieee80211_vif *vif,
638 + struct ieee80211_sta *pubsta,
639 + struct sk_buff *skb)
640 +{
641 + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
642 + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
643 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
644 + struct ieee80211_tx_control control = {
645 + .sta = pubsta
646 + };
647 + struct ieee80211_txq *txq = NULL;
648 + struct txq_info *txqi;
649 + u8 ac;
650 +
651 + if (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)
652 + goto tx_normal;
653 +
654 + if (!ieee80211_is_data(hdr->frame_control))
655 + goto tx_normal;
656 +
657 + if (pubsta) {
658 + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
659 +
660 + txq = pubsta->txq[tid];
661 + } else if (vif) {
662 + txq = vif->txq;
663 + }
664 +
665 + if (!txq)
666 + goto tx_normal;
667 +
668 + ac = txq->ac;
669 + txqi = to_txq_info(txq);
670 + atomic_inc(&sdata->txqs_len[ac]);
671 + if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending)
672 + netif_stop_subqueue(sdata->dev, ac);
673 +
674 + skb_queue_tail(&txqi->queue, skb);
675 + drv_wake_tx_queue(local, txqi);
676 +
677 + return;
678 +
679 +tx_normal:
680 + drv_tx(local, &control, skb);
681 +}
682 +
683 +struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
684 + struct ieee80211_txq *txq)
685 +{
686 + struct ieee80211_local *local = hw_to_local(hw);
687 + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif);
688 + struct txq_info *txqi = container_of(txq, struct txq_info, txq);
689 + struct ieee80211_hdr *hdr;
690 + struct sk_buff *skb = NULL;
691 + u8 ac = txq->ac;
692 +
693 + spin_lock_bh(&txqi->queue.lock);
694 +
695 + if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags))
696 + goto out;
697 +
698 + skb = __skb_dequeue(&txqi->queue);
699 + if (!skb)
700 + goto out;
701 +
702 + atomic_dec(&sdata->txqs_len[ac]);
703 + if (__netif_subqueue_stopped(sdata->dev, ac))
704 + ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]);
705 +
706 + hdr = (struct ieee80211_hdr *)skb->data;
707 + if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) {
708 + struct sta_info *sta = container_of(txq->sta, struct sta_info,
709 + sta);
710 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
711 +
712 + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid);
713 + if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
714 + info->flags |= IEEE80211_TX_CTL_AMPDU;
715 + else
716 + info->flags &= ~IEEE80211_TX_CTL_AMPDU;
717 + }
718 +
719 +out:
720 + spin_unlock_bh(&txqi->queue.lock);
721 +
722 + return skb;
723 +}
724 +EXPORT_SYMBOL(ieee80211_tx_dequeue);
725 +
726 static bool ieee80211_tx_frags(struct ieee80211_local *local,
727 struct ieee80211_vif *vif,
728 struct ieee80211_sta *sta,
729 struct sk_buff_head *skbs,
730 bool txpending)
731 {
732 - struct ieee80211_tx_control control;
733 struct sk_buff *skb, *tmp;
734 unsigned long flags;
735
736 @@ -1265,10 +1362,9 @@ static bool ieee80211_tx_frags(struct ie
737 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
738
739 info->control.vif = vif;
740 - control.sta = sta;
741
742 __skb_unlink(skb, skbs);
743 - drv_tx(local, &control, skb);
744 + ieee80211_drv_tx(local, vif, sta, skb);
745 }
746
747 return true;
748 --- a/net/mac80211/util.c
749 +++ b/net/mac80211/util.c
750 @@ -308,6 +308,11 @@ void ieee80211_propagate_queue_wake(stru
751 for (ac = 0; ac < n_acs; ac++) {
752 int ac_queue = sdata->vif.hw_queue[ac];
753
754 + if (local->ops->wake_tx_queue &&
755 + (atomic_read(&sdata->txqs_len[ac]) >
756 + local->hw.txq_ac_max_pending))
757 + continue;
758 +
759 if (ac_queue == queue ||
760 (sdata->vif.cab_queue == queue &&
761 local->queue_stop_reasons[ac_queue] == 0 &&
762 @@ -3307,3 +3312,20 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u
763
764 return buf;
765 }
766 +
767 +void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata,
768 + struct sta_info *sta,
769 + struct txq_info *txqi, int tid)
770 +{
771 + skb_queue_head_init(&txqi->queue);
772 + txqi->txq.vif = &sdata->vif;
773 +
774 + if (sta) {
775 + txqi->txq.sta = &sta->sta;
776 + sta->sta.txq[tid] = &txqi->txq;
777 + txqi->txq.ac = ieee802_1d_to_ac[tid & 7];
778 + } else {
779 + sdata->vif.txq = &txqi->txq;
780 + txqi->txq.ac = IEEE80211_AC_BE;
781 + }
782 +}
783 --- a/net/mac80211/rx.c
784 +++ b/net/mac80211/rx.c
785 @@ -1176,6 +1176,7 @@ static void sta_ps_start(struct sta_info
786 struct ieee80211_sub_if_data *sdata = sta->sdata;
787 struct ieee80211_local *local = sdata->local;
788 struct ps_data *ps;
789 + int tid;
790
791 if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
792 sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
793 @@ -1189,6 +1190,18 @@ static void sta_ps_start(struct sta_info
794 drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta);
795 ps_dbg(sdata, "STA %pM aid %d enters power save mode\n",
796 sta->sta.addr, sta->sta.aid);
797 +
798 + if (!sta->sta.txq[0])
799 + return;
800 +
801 + for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
802 + struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
803 +
804 + if (!skb_queue_len(&txqi->queue))
805 + set_bit(tid, &sta->txq_buffered_tids);
806 + else
807 + clear_bit(tid, &sta->txq_buffered_tids);
808 + }
809 }
810
811 static void sta_ps_end(struct sta_info *sta)
812 --- a/net/mac80211/agg-tx.c
813 +++ b/net/mac80211/agg-tx.c
814 @@ -188,6 +188,43 @@ ieee80211_wake_queue_agg(struct ieee8021
815 __release(agg_queue);
816 }
817
818 +static void
819 +ieee80211_agg_stop_txq(struct sta_info *sta, int tid)
820 +{
821 + struct ieee80211_txq *txq = sta->sta.txq[tid];
822 + struct txq_info *txqi;
823 +
824 + if (!txq)
825 + return;
826 +
827 + txqi = to_txq_info(txq);
828 +
829 + /* Lock here to protect against further seqno updates on dequeue */
830 + spin_lock_bh(&txqi->queue.lock);
831 + set_bit(IEEE80211_TXQ_STOP, &txqi->flags);
832 + spin_unlock_bh(&txqi->queue.lock);
833 +}
834 +
835 +static void
836 +ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable)
837 +{
838 + struct ieee80211_txq *txq = sta->sta.txq[tid];
839 + struct txq_info *txqi;
840 +
841 + if (!txq)
842 + return;
843 +
844 + txqi = to_txq_info(txq);
845 +
846 + if (enable)
847 + set_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
848 + else
849 + clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags);
850 +
851 + clear_bit(IEEE80211_TXQ_STOP, &txqi->flags);
852 + drv_wake_tx_queue(sta->sdata->local, txqi);
853 +}
854 +
855 /*
856 * splice packets from the STA's pending to the local pending,
857 * requires a call to ieee80211_agg_splice_finish later
858 @@ -247,6 +284,7 @@ static void ieee80211_remove_tid_tx(stru
859 ieee80211_assign_tid_tx(sta, tid, NULL);
860
861 ieee80211_agg_splice_finish(sta->sdata, tid);
862 + ieee80211_agg_start_txq(sta, tid, false);
863
864 kfree_rcu(tid_tx, rcu_head);
865 }
866 @@ -418,6 +456,8 @@ void ieee80211_tx_ba_session_handle_star
867 */
868 clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
869
870 + ieee80211_agg_stop_txq(sta, tid);
871 +
872 /*
873 * Make sure no packets are being processed. This ensures that
874 * we have a valid starting sequence number and that in-flight
875 @@ -440,6 +480,8 @@ void ieee80211_tx_ba_session_handle_star
876 ieee80211_agg_splice_finish(sdata, tid);
877 spin_unlock_bh(&sta->lock);
878
879 + ieee80211_agg_start_txq(sta, tid, false);
880 +
881 kfree_rcu(tid_tx, rcu_head);
882 return;
883 }
884 @@ -666,6 +708,8 @@ static void ieee80211_agg_tx_operational
885 ieee80211_agg_splice_finish(sta->sdata, tid);
886
887 spin_unlock_bh(&sta->lock);
888 +
889 + ieee80211_agg_start_txq(sta, tid, true);
890 }
891
892 void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)