mac80211: add A-MSDU tx support
[openwrt/openwrt.git] / package / kernel / mac80211 / patches / 322-mac80211-add-A-MSDU-tx-support.patch
1 From: Felix Fietkau <nbd@openwrt.org>
2 Date: Fri, 5 Feb 2016 01:38:51 +0100
3 Subject: [PATCH] mac80211: add A-MSDU tx support
4
5 Requires software tx queueing support. frag_list support (for zero-copy)
6 is optional.
7
8 Signed-off-by: Felix Fietkau <nbd@openwrt.org>
9 ---
10
11 --- a/include/net/mac80211.h
12 +++ b/include/net/mac80211.h
13 @@ -709,6 +709,7 @@ enum mac80211_tx_info_flags {
14 * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll
15 * frame (PS-Poll or uAPSD).
16 * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information
17 + * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
18 *
19 * These flags are used in tx_info->control.flags.
20 */
21 @@ -716,6 +717,7 @@ enum mac80211_tx_control_flags {
22 IEEE80211_TX_CTRL_PORT_CTRL_PROTO = BIT(0),
23 IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1),
24 IEEE80211_TX_CTRL_RATE_INJECT = BIT(2),
25 + IEEE80211_TX_CTRL_AMSDU = BIT(3),
26 };
27
28 /*
29 @@ -1961,6 +1963,12 @@ struct ieee80211_txq {
30 * order and does not need to manage its own reorder buffer or BA session
31 * timeout.
32 *
33 + * @IEEE80211_HW_TX_AMSDU: Hardware (or driver) supports software aggregated
34 + * A-MSDU frames. Requires software tx queueing support.
35 + *
36 + * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list
37 + * skbs, needed for zero-copy software A-MSDU.
38 + *
39 * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
40 */
41 enum ieee80211_hw_flags {
42 @@ -1998,6 +2006,8 @@ enum ieee80211_hw_flags {
43 IEEE80211_HW_BEACON_TX_STATUS,
44 IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR,
45 IEEE80211_HW_SUPPORTS_REORDERING_BUFFER,
46 + IEEE80211_HW_TX_AMSDU,
47 + IEEE80211_HW_TX_FRAG_LIST,
48
49 /* keep last, obviously */
50 NUM_IEEE80211_HW_FLAGS
51 @@ -2070,6 +2080,9 @@ enum ieee80211_hw_flags {
52 * size is smaller (an example is LinkSys WRT120N with FW v1.0.07
53 * build 002 Jun 18 2012).
54 *
55 + * @max_tx_amsdu_subframes: maximum number of subframes used in software
56 + * A-MSDU aggregation
57 + *
58 * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
59 * (if %IEEE80211_HW_QUEUE_CONTROL is set)
60 *
61 @@ -2124,6 +2137,7 @@ struct ieee80211_hw {
62 u8 max_rate_tries;
63 u8 max_rx_aggregation_subframes;
64 u8 max_tx_aggregation_subframes;
65 + u8 max_tx_amsdu_subframes;
66 u8 offchannel_tx_hw_queue;
67 u8 radiotap_mcs_details;
68 u16 radiotap_vht_details;
69 --- a/net/mac80211/agg-tx.c
70 +++ b/net/mac80211/agg-tx.c
71 @@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct
72 size_t len)
73 {
74 struct tid_ampdu_tx *tid_tx;
75 + struct ieee80211_txq *txq;
76 u16 capab, tid;
77 u8 buf_size;
78 bool amsdu;
79 @@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct
80 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
81 buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
82
83 + txq = sta->sta.txq[tid];
84 + if (!amsdu && txq)
85 + set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags);
86 +
87 mutex_lock(&sta->ampdu_mlme.mtx);
88
89 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
90 --- a/net/mac80211/debugfs.c
91 +++ b/net/mac80211/debugfs.c
92 @@ -127,6 +127,8 @@ static const char *hw_flag_names[NUM_IEE
93 FLAG(BEACON_TX_STATUS),
94 FLAG(NEEDS_UNIQUE_STA_ADDR),
95 FLAG(SUPPORTS_REORDERING_BUFFER),
96 + FLAG(TX_AMSDU),
97 + FLAG(TX_FRAG_LIST),
98
99 /* keep last for the build bug below */
100 (void *)0x1
101 --- a/net/mac80211/ieee80211_i.h
102 +++ b/net/mac80211/ieee80211_i.h
103 @@ -799,6 +799,7 @@ struct mac80211_qos_map {
104 enum txq_info_flags {
105 IEEE80211_TXQ_STOP,
106 IEEE80211_TXQ_AMPDU,
107 + IEEE80211_TXQ_NO_AMSDU,
108 };
109
110 struct txq_info {
111 --- a/net/mac80211/tx.c
112 +++ b/net/mac80211/tx.c
113 @@ -1318,6 +1318,10 @@ struct sk_buff *ieee80211_tx_dequeue(str
114 out:
115 spin_unlock_bh(&txqi->queue.lock);
116
117 + if (skb && skb_has_frag_list(skb) &&
118 + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
119 + skb_linearize(skb);
120 +
121 return skb;
122 }
123 EXPORT_SYMBOL(ieee80211_tx_dequeue);
124 @@ -2757,6 +2761,149 @@ void ieee80211_clear_fast_xmit(struct st
125 kfree_rcu(fast_tx, rcu_head);
126 }
127
128 +static int ieee80211_amsdu_pad(struct sk_buff *skb, int subframe_len)
129 +{
130 + int amsdu_len = subframe_len + sizeof(struct ethhdr);
131 + int padding = (4 - amsdu_len) & 3;
132 +
133 + if (padding)
134 + memset(skb_put(skb, padding), 0, padding);
135 +
136 + return padding;
137 +}
138 +
139 +static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
140 + struct ieee80211_fast_tx *fast_tx,
141 + struct sk_buff *skb)
142 +{
143 + struct ieee80211_local *local = sdata->local;
144 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
145 + struct ieee80211_hdr *hdr;
146 + struct ethhdr amsdu_hdr;
147 + int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header);
148 + int subframe_len = skb->len - hdr_len;
149 + void *data;
150 + u8 *qc;
151 +
152 + if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
153 + return true;
154 +
155 + if (skb_headroom(skb) < sizeof(amsdu_hdr) || skb_tailroom(skb) < 3) {
156 + I802_DEBUG_INC(local->tx_expand_skb_head);
157 +
158 + if (pskb_expand_head(skb, sizeof(amsdu_hdr), 3, GFP_ATOMIC)) {
159 + wiphy_debug(local->hw.wiphy,
160 + "failed to reallocate TX buffer\n");
161 + return false;
162 + }
163 + }
164 +
165 + subframe_len += ieee80211_amsdu_pad(skb, subframe_len);
166 +
167 + amsdu_hdr.h_proto = cpu_to_be16(subframe_len);
168 + memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN);
169 + memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN);
170 +
171 + data = skb_push(skb, sizeof(amsdu_hdr));
172 + memmove(data, data + sizeof(amsdu_hdr), hdr_len);
173 + memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr));
174 +
175 + hdr = data;
176 + qc = ieee80211_get_qos_ctl(hdr);
177 + *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT;
178 +
179 + info->control.flags |= IEEE80211_TX_CTRL_AMSDU;
180 +
181 + return true;
182 +}
183 +
184 +static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
185 + struct sta_info *sta,
186 + struct ieee80211_fast_tx *fast_tx,
187 + struct sk_buff *skb)
188 +{
189 + struct ieee80211_local *local = sdata->local;
190 + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
191 + struct ieee80211_txq *txq = sta->sta.txq[tid];
192 + struct txq_info *txqi;
193 + struct sk_buff **frag_tail, *head;
194 + int subframe_len = skb->len - ETH_ALEN;
195 + int max_amsdu_len;
196 + __be16 len;
197 + void *data;
198 + bool ret = false;
199 + int n = 1;
200 +
201 + if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
202 + return false;
203 +
204 + if (!txq)
205 + return false;
206 +
207 + txqi = to_txq_info(txq);
208 + if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags))
209 + return false;
210 +
211 + /*
212 + * A-MPDU limits maximum MPDU size to 4095 bytes. Since aggregation
213 + * sessions are started/stopped without txq flush, use the limit here
214 + * to avoid having to de-aggregate later.
215 + */
216 + max_amsdu_len = min_t(int, sta->sta.max_amsdu_len, 4095);
217 +
218 + spin_lock_bh(&txqi->queue.lock);
219 +
220 + head = skb_peek_tail(&txqi->queue);
221 + if (!head)
222 + goto out;
223 +
224 + if (skb->len + head->len > max_amsdu_len)
225 + goto out;
226 +
227 + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
228 + goto out;
229 +
230 + frag_tail = &skb_shinfo(head)->frag_list;
231 + while (*frag_tail) {
232 + frag_tail = &(*frag_tail)->next;
233 + n++;
234 + }
235 +
236 + if (local->hw.max_tx_amsdu_subframes &&
237 + n > local->hw.max_tx_amsdu_subframes)
238 + goto out;
239 +
240 + if (skb_headroom(skb) < 8 || skb_tailroom(skb) < 3) {
241 + I802_DEBUG_INC(local->tx_expand_skb_head);
242 +
243 + if (pskb_expand_head(skb, 8, 3, GFP_ATOMIC)) {
244 + wiphy_debug(local->hw.wiphy,
245 + "failed to reallocate TX buffer\n");
246 + goto out;
247 + }
248 + }
249 +
250 + subframe_len += ieee80211_amsdu_pad(skb, subframe_len);
251 +
252 + ret = true;
253 + data = skb_push(skb, ETH_ALEN + 2);
254 + memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN);
255 +
256 + data += 2 * ETH_ALEN;
257 + len = cpu_to_be16(subframe_len);
258 + memcpy(data, &len, 2);
259 + memcpy(data + 2, rfc1042_header, ETH_ALEN);
260 +
261 + head->len += skb->len;
262 + head->data_len += skb->len;
263 + *frag_tail = skb;
264 +
265 +out:
266 + spin_unlock_bh(&txqi->queue.lock);
267 +
268 + return ret;
269 +}
270 +
271 static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
272 struct net_device *dev, struct sta_info *sta,
273 struct ieee80211_fast_tx *fast_tx,
274 @@ -2811,6 +2958,10 @@ static bool ieee80211_xmit_fast(struct i
275
276 ieee80211_tx_stats(dev, skb->len + extra_head);
277
278 + if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
279 + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
280 + return true;
281 +
282 /* will not be crypto-handled beyond what we do here, so use false
283 * as the may-encrypt argument for the resize to not account for
284 * more room than we already have in 'extra_head'