ath9k: improve performance in tx status handling
authorFelix Fietkau <nbd@nbd.name>
Tue, 2 Aug 2016 10:21:27 +0000 (12:21 +0200)
committerFelix Fietkau <nbd@nbd.name>
Thu, 4 Aug 2016 21:10:41 +0000 (23:10 +0200)
Signed-off-by: Felix Fietkau <nbd@nbd.name>
package/kernel/mac80211/patches/335-ath9k-use-ieee80211_tx_status_noskb-where-possible.patch [new file with mode: 0644]

diff --git a/package/kernel/mac80211/patches/335-ath9k-use-ieee80211_tx_status_noskb-where-possible.patch b/package/kernel/mac80211/patches/335-ath9k-use-ieee80211_tx_status_noskb-where-possible.patch
new file mode 100644 (file)
index 0000000..dbb5b90
--- /dev/null
@@ -0,0 +1,305 @@
+From: Felix Fietkau <nbd@nbd.name>
+Date: Tue, 2 Aug 2016 12:12:18 +0200
+Subject: [PATCH] ath9k: use ieee80211_tx_status_noskb where possible
+
+It removes the need for undoing the padding changes to skb->data and it
+improves performance by eliminating one tx status lookup per MPDU in the
+status path. It is also useful for preparing a follow-up fix to better
+handle powersave filtering.
+
+Signed-off-by: Felix Fietkau <nbd@nbd.name>
+---
+
+--- a/drivers/net/wireless/ath/ath9k/xmit.c
++++ b/drivers/net/wireless/ath/ath9k/xmit.c
+@@ -50,9 +50,11 @@ static u16 bits_per_symbol[][2] = {
+ static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq,
+                              struct ath_atx_tid *tid, struct sk_buff *skb);
+ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
+-                          int tx_flags, struct ath_txq *txq);
++                          int tx_flags, struct ath_txq *txq,
++                          struct ieee80211_sta *sta);
+ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
+                               struct ath_txq *txq, struct list_head *bf_q,
++                              struct ieee80211_sta *sta,
+                               struct ath_tx_status *ts, int txok);
+ static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq,
+                            struct list_head *head, bool internal);
+@@ -77,6 +79,22 @@ enum {
+ /* Aggregation logic */
+ /*********************/
++static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
++{
++      struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
++      struct ieee80211_sta *sta = info->status.status_driver_data[0];
++
++      if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
++              ieee80211_tx_status(hw, skb);
++              return;
++      }
++
++      if (sta)
++              ieee80211_tx_status_noskb(hw, sta, info);
++
++      dev_kfree_skb(skb);
++}
++
+ void ath_txq_lock(struct ath_softc *sc, struct ath_txq *txq)
+       __acquires(&txq->axq_lock)
+ {
+@@ -92,6 +110,7 @@ void ath_txq_unlock(struct ath_softc *sc
+ void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq)
+       __releases(&txq->axq_lock)
+ {
++      struct ieee80211_hw *hw = sc->hw;
+       struct sk_buff_head q;
+       struct sk_buff *skb;
+@@ -100,7 +119,7 @@ void ath_txq_unlock_complete(struct ath_
+       spin_unlock_bh(&txq->axq_lock);
+       while ((skb = __skb_dequeue(&q)))
+-              ieee80211_tx_status(sc->hw, skb);
++              ath_tx_status(hw, skb);
+ }
+ static void ath_tx_queue_tid(struct ath_softc *sc, struct ath_txq *txq,
+@@ -268,7 +287,7 @@ static void ath_tx_flush_tid(struct ath_
+               }
+               list_add_tail(&bf->list, &bf_head);
+-              ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
++              ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
+       }
+       if (sendbar) {
+@@ -333,12 +352,12 @@ static void ath_tid_drain(struct ath_sof
+               bf = fi->bf;
+               if (!bf) {
+-                      ath_tx_complete(sc, skb, ATH_TX_ERROR, txq);
++                      ath_tx_complete(sc, skb, ATH_TX_ERROR, txq, NULL);
+                       continue;
+               }
+               list_add_tail(&bf->list, &bf_head);
+-              ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
++              ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
+       }
+ }
+@@ -441,12 +460,11 @@ static void ath_tx_count_frames(struct a
+ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq,
+                                struct ath_buf *bf, struct list_head *bf_q,
++                               struct ieee80211_sta *sta,
+                                struct ath_tx_status *ts, int txok)
+ {
+       struct ath_node *an = NULL;
+       struct sk_buff *skb;
+-      struct ieee80211_sta *sta;
+-      struct ieee80211_hw *hw = sc->hw;
+       struct ieee80211_hdr *hdr;
+       struct ieee80211_tx_info *tx_info;
+       struct ath_atx_tid *tid = NULL;
+@@ -475,12 +493,7 @@ static void ath_tx_complete_aggr(struct
+       for (i = 0; i < ts->ts_rateindex; i++)
+               retries += rates[i].count;
+-      rcu_read_lock();
+-
+-      sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2);
+       if (!sta) {
+-              rcu_read_unlock();
+-
+               INIT_LIST_HEAD(&bf_head);
+               while (bf) {
+                       bf_next = bf->bf_next;
+@@ -488,7 +501,7 @@ static void ath_tx_complete_aggr(struct
+                       if (!bf->bf_state.stale || bf_next != NULL)
+                               list_move_tail(&bf->list, &bf_head);
+-                      ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, 0);
++                      ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, ts, 0);
+                       bf = bf_next;
+               }
+@@ -598,7 +611,7 @@ static void ath_tx_complete_aggr(struct
+                                                               ts);
+                       }
+-                      ath_tx_complete_buf(sc, bf, txq, &bf_head, ts,
++                      ath_tx_complete_buf(sc, bf, txq, &bf_head, sta, ts,
+                               !txfail);
+               } else {
+                       if (tx_info->flags & IEEE80211_TX_STATUS_EOSP) {
+@@ -619,7 +632,8 @@ static void ath_tx_complete_aggr(struct
+                                       ath_tx_update_baw(sc, tid, seqno);
+                                       ath_tx_complete_buf(sc, bf, txq,
+-                                                          &bf_head, ts, 0);
++                                                          &bf_head, NULL, ts,
++                                                          0);
+                                       bar_index = max_t(int, bar_index,
+                                               ATH_BA_INDEX(seq_first, seqno));
+                                       break;
+@@ -663,8 +677,6 @@ static void ath_tx_complete_aggr(struct
+               ath_txq_lock(sc, txq);
+       }
+-      rcu_read_unlock();
+-
+       if (needreset)
+               ath9k_queue_reset(sc, RESET_TYPE_TX_ERROR);
+ }
+@@ -679,7 +691,10 @@ static void ath_tx_process_buffer(struct
+                                 struct ath_tx_status *ts, struct ath_buf *bf,
+                                 struct list_head *bf_head)
+ {
++      struct ieee80211_hw *hw = sc->hw;
+       struct ieee80211_tx_info *info;
++      struct ieee80211_sta *sta;
++      struct ieee80211_hdr *hdr;
+       bool txok, flush;
+       txok = !(ts->ts_status & ATH9K_TXERR_MASK);
+@@ -692,6 +707,10 @@ static void ath_tx_process_buffer(struct
+       ts->duration = ath9k_hw_get_duration(sc->sc_ah, bf->bf_desc,
+                                            ts->ts_rateindex);
++
++      hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data;
++      sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2);
++
+       if (!bf_isampdu(bf)) {
+               if (!flush) {
+                       info = IEEE80211_SKB_CB(bf->bf_mpdu);
+@@ -700,9 +719,9 @@ static void ath_tx_process_buffer(struct
+                       ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok);
+                       ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts);
+               }
+-              ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok);
++              ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok);
+       } else
+-              ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok);
++              ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, ts, txok);
+       if (!flush)
+               ath_txq_schedule(sc, txq);
+@@ -938,7 +957,7 @@ ath_tx_get_tid_subframe(struct ath_softc
+                       list_add(&bf->list, &bf_head);
+                       __skb_unlink(skb, *q);
+                       ath_tx_update_baw(sc, tid, seqno);
+-                      ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0);
++                      ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0);
+                       continue;
+               }
+@@ -1847,6 +1866,7 @@ static void ath_drain_txq_list(struct at
+  */
+ void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq)
+ {
++      rcu_read_lock();
+       ath_txq_lock(sc, txq);
+       if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) {
+@@ -1865,6 +1885,7 @@ void ath_draintxq(struct ath_softc *sc,
+       ath_drain_txq_list(sc, txq, &txq->axq_q);
+       ath_txq_unlock_complete(sc, txq);
++      rcu_read_unlock();
+ }
+ bool ath_drain_all_txq(struct ath_softc *sc)
+@@ -2487,7 +2508,8 @@ void ath_tx_cabq(struct ieee80211_hw *hw
+ /*****************/
+ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb,
+-                          int tx_flags, struct ath_txq *txq)
++                          int tx_flags, struct ath_txq *txq,
++                          struct ieee80211_sta *sta)
+ {
+       struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+       struct ath_common *common = ath9k_hw_common(sc->sc_ah);
+@@ -2507,15 +2529,17 @@ static void ath_tx_complete(struct ath_s
+                       tx_info->flags |= IEEE80211_TX_STAT_ACK;
+       }
+-      padpos = ieee80211_hdrlen(hdr->frame_control);
+-      padsize = padpos & 3;
+-      if (padsize && skb->len>padpos+padsize) {
+-              /*
+-               * Remove MAC header padding before giving the frame back to
+-               * mac80211.
+-               */
+-              memmove(skb->data + padsize, skb->data, padpos);
+-              skb_pull(skb, padsize);
++      if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
++              padpos = ieee80211_hdrlen(hdr->frame_control);
++              padsize = padpos & 3;
++              if (padsize && skb->len>padpos+padsize) {
++                      /*
++                       * Remove MAC header padding before giving the frame back to
++                       * mac80211.
++                       */
++                      memmove(skb->data + padsize, skb->data, padpos);
++                      skb_pull(skb, padsize);
++              }
+       }
+       spin_lock_irqsave(&sc->sc_pm_lock, flags);
+@@ -2530,12 +2554,14 @@ static void ath_tx_complete(struct ath_s
+       }
+       spin_unlock_irqrestore(&sc->sc_pm_lock, flags);
+-      __skb_queue_tail(&txq->complete_q, skb);
+       ath_txq_skb_done(sc, txq, skb);
++      tx_info->status.status_driver_data[0] = sta;
++      __skb_queue_tail(&txq->complete_q, skb);
+ }
+ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf,
+                               struct ath_txq *txq, struct list_head *bf_q,
++                              struct ieee80211_sta *sta,
+                               struct ath_tx_status *ts, int txok)
+ {
+       struct sk_buff *skb = bf->bf_mpdu;
+@@ -2563,7 +2589,7 @@ static void ath_tx_complete_buf(struct a
+                       complete(&sc->paprd_complete);
+       } else {
+               ath_debug_stat_tx(sc, bf, ts, txq, tx_flags);
+-              ath_tx_complete(sc, skb, tx_flags, txq);
++              ath_tx_complete(sc, skb, tx_flags, txq, sta);
+       }
+ skip_tx_complete:
+       /* At this point, skb (bf->bf_mpdu) is consumed...make sure we don't
+@@ -2715,10 +2741,12 @@ void ath_tx_tasklet(struct ath_softc *sc
+       u32 qcumask = ((1 << ATH9K_NUM_TX_QUEUES) - 1) & ah->intr_txqs;
+       int i;
++      rcu_read_lock();
+       for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) {
+               if (ATH_TXQ_SETUP(sc, i) && (qcumask & (1 << i)))
+                       ath_tx_processq(sc, &sc->tx.txq[i]);
+       }
++      rcu_read_unlock();
+ }
+ void ath_tx_edma_tasklet(struct ath_softc *sc)
+@@ -2732,6 +2760,7 @@ void ath_tx_edma_tasklet(struct ath_soft
+       struct list_head *fifo_list;
+       int status;
++      rcu_read_lock();
+       for (;;) {
+               if (test_bit(ATH_OP_HW_RESET, &common->op_flags))
+                       break;
+@@ -2802,6 +2831,7 @@ void ath_tx_edma_tasklet(struct ath_soft
+               ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head);
+               ath_txq_unlock_complete(sc, txq);
+       }
++      rcu_read_unlock();
+ }
+ /*****************/