ath9k: improve performance in tx status handling
[openwrt/openwrt.git] / package / kernel / mac80211 / patches / 335-ath9k-use-ieee80211_tx_status_noskb-where-possible.patch
1 From: Felix Fietkau <nbd@nbd.name>
2 Date: Tue, 2 Aug 2016 12:12:18 +0200
3 Subject: [PATCH] ath9k: use ieee80211_tx_status_noskb where possible
4
5 It removes the need for undoing the padding changes to skb->data and it
6 improves performance by eliminating one tx status lookup per MPDU in the
7 status path. It is also useful for preparing a follow-up fix to better
8 handle powersave filtering.
9
10 Signed-off-by: Felix Fietkau <nbd@nbd.name>
11 ---
12
13 --- a/drivers/net/wireless/ath/ath9k/xmit.c
14 +++ b/drivers/net/wireless/ath/ath9k/xmit.c
15 @@ -50,9 +50,11 @@ static u16 bits_per_symbol[][2] = {
16 static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
17 struct ath_atx_tid *tid, struct sk_buff *skb);
18 static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
19 - int tx_flags, struct ath_txq *txq);
20 + int tx_flags, struct ath_txq *txq,
21 + struct ieee80211_sta *sta);
22 static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
23 struct ath_txq *txq, struct list_head *bf_q,
24 + struct ieee80211_sta *sta,
25 struct ath_tx_status *ts, int txok);
26 static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq,
27 struct list_head *head, bool internal);
28 @@ -77,6 +79,22 @@ enum {
29 /* Aggregation logic */
30 /*********************/
31
32 +static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
33 +{
34 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
35 + struct ieee80211_sta *sta = info->status.status_driver_data[0];
36 +
37 + if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
38 + ieee80211_tx_status(hw, skb);
39 + return;
40 + }
41 +
42 + if (sta)
43 + ieee80211_tx_status_noskb(hw, sta, info);
44 +
45 + dev_kfree_skb(skb);
46 +}
47 +
48 void ath_txq_lock(struct ath_softc *sc, struct ath_txq *txq)
49 __acquires(&txq->axq_lock)
50 {
51 @@ -92,6 +110,7 @@ void ath_txq_unlock(struct ath_softc *sc
52 void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq)
53 __releases(&txq->axq_lock)
54 {
55 + struct ieee80211_hw *hw = sc->hw;
56 struct sk_buff_head q;
57 struct sk_buff *skb;
58
59 @@ -100,7 +119,7 @@ void ath_txq_unlock_complete(struct ath_
60 spin_unlock_bh(&txq->axq_lock);
61
62 while ((skb = __skb_dequeue(&q)))
63 - ieee80211_tx_status(sc->hw, skb);
64 + ath_tx_status(hw, skb);
65 }
66
67 static void ath_tx_queue_tid(struct ath_softc *sc, struct ath_txq *txq,
68 @@ -268,7 +287,7 @@ static void ath_tx_flush_tid(struct ath_
69 }
70
71 list_add_tail(&bf->list, &bf_head);
72 - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
73 + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
74 }
75
76 if (sendbar) {
77 @@ -333,12 +352,12 @@ static void ath_tid_drain(struct ath_sof
78 bf = fi->bf;
79
80 if (!bf) {
81 - ath_tx_complete(sc, skb, ATH_TX_ERROR, txq);
82 + ath_tx_complete(sc, skb, ATH_TX_ERROR, txq, NULL);
83 continue;
84 }
85
86 list_add_tail(&bf->list, &bf_head);
87 - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
88 + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
89 }
90 }
91
92 @@ -441,12 +460,11 @@ static void ath_tx_count_frames(struct a
93
94 static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq,
95 struct ath_buf *bf, struct list_head *bf_q,
96 + struct ieee80211_sta *sta,
97 struct ath_tx_status *ts, int txok)
98 {
99 struct ath_node *an = NULL;
100 struct sk_buff *skb;
101 - struct ieee80211_sta *sta;
102 - struct ieee80211_hw *hw = sc->hw;
103 struct ieee80211_hdr *hdr;
104 struct ieee80211_tx_info *tx_info;
105 struct ath_atx_tid *tid = NULL;
106 @@ -475,12 +493,7 @@ static void ath_tx_complete_aggr(struct
107 for (i = 0; i < ts->ts_rateindex; i++)
108 retries += rates[i].count;
109
110 - rcu_read_lock();
111 -
112 - sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2);
113 if (!sta) {
114 - rcu_read_unlock();
115 -
116 INIT_LIST_HEAD(&bf_head);
117 while (bf) {
118 bf_next = bf->bf_next;
119 @@ -488,7 +501,7 @@ static void ath_tx_complete_aggr(struct
120 if (!bf->bf_state.stale || bf_next != NULL)
121 list_move_tail(&bf->list, &bf_head);
122
123 - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, 0);
124 + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, ts, 0);
125
126 bf = bf_next;
127 }
128 @@ -598,7 +611,7 @@ static void ath_tx_complete_aggr(struct
129 ts);
130 }
131
132 - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts,
133 + ath_tx_complete_buf(sc, bf, txq, &bf_head, sta, ts,
134 !txfail);
135 } else {
136 if (tx_info->flags & IEEE80211_TX_STATUS_EOSP) {
137 @@ -619,7 +632,8 @@ static void ath_tx_complete_aggr(struct
138 ath_tx_update_baw(sc, tid, seqno);
139
140 ath_tx_complete_buf(sc, bf, txq,
141 - &bf_head, ts, 0);
142 + &bf_head, NULL, ts,
143 + 0);
144 bar_index = max_t(int, bar_index,
145 ATH_BA_INDEX(seq_first, seqno));
146 break;
147 @@ -663,8 +677,6 @@ static void ath_tx_complete_aggr(struct
148 ath_txq_lock(sc, txq);
149 }
150
151 - rcu_read_unlock();
152 -
153 if (needreset)
154 ath9k_queue_reset(sc, RESET_TYPE_TX_ERROR);
155 }
156 @@ -679,7 +691,10 @@ static void ath_tx_process_buffer(struct
157 struct ath_tx_status *ts, struct ath_buf *bf,
158 struct list_head *bf_head)
159 {
160 + struct ieee80211_hw *hw = sc->hw;
161 struct ieee80211_tx_info *info;
162 + struct ieee80211_sta *sta;
163 + struct ieee80211_hdr *hdr;
164 bool txok, flush;
165
166 txok = !(ts->ts_status & ATH9K_TXERR_MASK);
167 @@ -692,6 +707,10 @@ static void ath_tx_process_buffer(struct
168
169 ts->duration = ath9k_hw_get_duration(sc->sc_ah, bf->bf_desc,
170 ts->ts_rateindex);
171 +
172 + hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data;
173 + sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2);
174 +
175 if (!bf_isampdu(bf)) {
176 if (!flush) {
177 info = IEEE80211_SKB_CB(bf->bf_mpdu);
178 @@ -700,9 +719,9 @@ static void ath_tx_process_buffer(struct
179 ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok);
180 ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts);
181 }
182 - ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok);
183 + ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok);
184 } else
185 - ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok);
186 + ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, ts, txok);
187
188 if (!flush)
189 ath_txq_schedule(sc, txq);
190 @@ -938,7 +957,7 @@ ath_tx_get_tid_subframe(struct ath_softc
191 list_add(&bf->list, &bf_head);
192 __skb_unlink(skb, *q);
193 ath_tx_update_baw(sc, tid, seqno);
194 - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
195 + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
196 continue;
197 }
198
199 @@ -1847,6 +1866,7 @@ static void ath_drain_txq_list(struct at
200 */
201 void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq)
202 {
203 + rcu_read_lock();
204 ath_txq_lock(sc, txq);
205
206 if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) {
207 @@ -1865,6 +1885,7 @@ void ath_draintxq(struct ath_softc *sc,
208 ath_drain_txq_list(sc, txq, &txq->axq_q);
209
210 ath_txq_unlock_complete(sc, txq);
211 + rcu_read_unlock();
212 }
213
214 bool ath_drain_all_txq(struct ath_softc *sc)
215 @@ -2487,7 +2508,8 @@ void ath_tx_cabq(struct ieee80211_hw *hw
216 /*****************/
217
218 static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
219 - int tx_flags, struct ath_txq *txq)
220 + int tx_flags, struct ath_txq *txq,
221 + struct ieee80211_sta *sta)
222 {
223 struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
224 struct ath_common *common = ath9k_hw_common(sc->sc_ah);
225 @@ -2507,15 +2529,17 @@ static void ath_tx_complete(struct ath_s
226 tx_info->flags |= IEEE80211_TX_STAT_ACK;
227 }
228
229 - padpos = ieee80211_hdrlen(hdr->frame_control);
230 - padsize = padpos & 3;
231 - if (padsize && skb->len>padpos+padsize) {
232 - /*
233 - * Remove MAC header padding before giving the frame back to
234 - * mac80211.
235 - */
236 - memmove(skb->data + padsize, skb->data, padpos);
237 - skb_pull(skb, padsize);
238 + if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
239 + padpos = ieee80211_hdrlen(hdr->frame_control);
240 + padsize = padpos & 3;
241 + if (padsize && skb->len>padpos+padsize) {
242 + /*
243 + * Remove MAC header padding before giving the frame back to
244 + * mac80211.
245 + */
246 + memmove(skb->data + padsize, skb->data, padpos);
247 + skb_pull(skb, padsize);
248 + }
249 }
250
251 spin_lock_irqsave(&sc->sc_pm_lock, flags);
252 @@ -2530,12 +2554,14 @@ static void ath_tx_complete(struct ath_s
253 }
254 spin_unlock_irqrestore(&sc->sc_pm_lock, flags);
255
256 - __skb_queue_tail(&txq->complete_q, skb);
257 ath_txq_skb_done(sc, txq, skb);
258 + tx_info->status.status_driver_data[0] = sta;
259 + __skb_queue_tail(&txq->complete_q, skb);
260 }
261
262 static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
263 struct ath_txq *txq, struct list_head *bf_q,
264 + struct ieee80211_sta *sta,
265 struct ath_tx_status *ts, int txok)
266 {
267 struct sk_buff *skb = bf->bf_mpdu;
268 @@ -2563,7 +2589,7 @@ static void ath_tx_complete_buf(struct a
269 complete(&sc->paprd_complete);
270 } else {
271 ath_debug_stat_tx(sc, bf, ts, txq, tx_flags);
272 - ath_tx_complete(sc, skb, tx_flags, txq);
273 + ath_tx_complete(sc, skb, tx_flags, txq, sta);
274 }
275 skip_tx_complete:
276 /* At this point, skb (bf->bf_mpdu) is consumed...make sure we don't
277 @@ -2715,10 +2741,12 @@ void ath_tx_tasklet(struct ath_softc *sc
278 u32 qcumask = ((1 << ATH9K_NUM_TX_QUEUES) - 1) & ah->intr_txqs;
279 int i;
280
281 + rcu_read_lock();
282 for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
283 if (ATH_TXQ_SETUP(sc, i) && (qcumask & (1 << i)))
284 ath_tx_processq(sc, &sc->tx.txq[i]);
285 }
286 + rcu_read_unlock();
287 }
288
289 void ath_tx_edma_tasklet(struct ath_softc *sc)
290 @@ -2732,6 +2760,7 @@ void ath_tx_edma_tasklet(struct ath_soft
291 struct list_head *fifo_list;
292 int status;
293
294 + rcu_read_lock();
295 for (;;) {
296 if (test_bit(ATH_OP_HW_RESET, &common->op_flags))
297 break;
298 @@ -2802,6 +2831,7 @@ void ath_tx_edma_tasklet(struct ath_soft
299 ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head);
300 ath_txq_unlock_complete(sc, txq);
301 }
302 + rcu_read_unlock();
303 }
304
305 /*****************/