e7bfb9c83dd406e41e523195fabd0a4611dc86b4
[openwrt/staging/yousong.git] / package / kernel / mac80211 / patches / 322-mac80211-add-A-MSDU-tx-support.patch
1 From: Felix Fietkau <nbd@openwrt.org>
2 Date: Fri, 5 Feb 2016 01:38:51 +0100
3 Subject: [PATCH] mac80211: add A-MSDU tx support
4
5 Requires software tx queueing support. frag_list support (for zero-copy)
6 is optional.
7
8 Signed-off-by: Felix Fietkau <nbd@openwrt.org>
9 ---
10
11 --- a/include/net/mac80211.h
12 +++ b/include/net/mac80211.h
13 @@ -709,6 +709,7 @@ enum mac80211_tx_info_flags {
14 * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll
15 * frame (PS-Poll or uAPSD).
16 * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information
17 + * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
18 *
19 * These flags are used in tx_info->control.flags.
20 */
21 @@ -716,6 +717,7 @@ enum mac80211_tx_control_flags {
22 IEEE80211_TX_CTRL_PORT_CTRL_PROTO = BIT(0),
23 IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1),
24 IEEE80211_TX_CTRL_RATE_INJECT = BIT(2),
25 + IEEE80211_TX_CTRL_AMSDU = BIT(3),
26 };
27
28 /*
29 @@ -1728,6 +1730,7 @@ struct ieee80211_sta_rates {
30 * size is min(max_amsdu_len, 7935) bytes.
31 * Both additional HT limits must be enforced by the low level driver.
32 * This is defined by the spec (IEEE 802.11-2012 section 8.3.2.2 NOTE 2).
33 + * @max_rc_amsdu_len: Maximum A-MSDU size in bytes recommended by rate control.
34 * @txq: per-TID data TX queues (if driver uses the TXQ abstraction)
35 */
36 struct ieee80211_sta {
37 @@ -1748,6 +1751,7 @@ struct ieee80211_sta {
38 bool mfp;
39 u8 max_amsdu_subframes;
40 u16 max_amsdu_len;
41 + u16 max_rc_amsdu_len;
42
43 struct ieee80211_txq *txq[IEEE80211_NUM_TIDS];
44
45 @@ -1961,6 +1965,15 @@ struct ieee80211_txq {
46 * order and does not need to manage its own reorder buffer or BA session
47 * timeout.
48 *
49 + * @IEEE80211_HW_TX_AMSDU: Hardware (or driver) supports software aggregated
50 + * A-MSDU frames. Requires software tx queueing and fast-xmit support.
51 + * When not using minstrel/minstrel_ht rate control, the driver should
52 + * limit the maximum A-MSDU size based on the current tx rate by setting
53 + * max_rc_amsdu_len in struct ieee80211_sta.
54 + *
55 + * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list
56 + * skbs, needed for zero-copy software A-MSDU.
57 + *
58 * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
59 */
60 enum ieee80211_hw_flags {
61 @@ -1998,6 +2011,8 @@ enum ieee80211_hw_flags {
62 IEEE80211_HW_BEACON_TX_STATUS,
63 IEEE80211_HW_NEEDS_UNIQUE_STA_ADDR,
64 IEEE80211_HW_SUPPORTS_REORDERING_BUFFER,
65 + IEEE80211_HW_TX_AMSDU,
66 + IEEE80211_HW_TX_FRAG_LIST,
67
68 /* keep last, obviously */
69 NUM_IEEE80211_HW_FLAGS
70 @@ -2070,6 +2085,9 @@ enum ieee80211_hw_flags {
71 * size is smaller (an example is LinkSys WRT120N with FW v1.0.07
72 * build 002 Jun 18 2012).
73 *
74 + * @max_tx_fragments: maximum number of tx buffers per (A)-MSDU, sum
75 + * of 1 + skb_shinfo(skb)->nr_frags for each skb in the frag_list.
76 + *
77 * @offchannel_tx_hw_queue: HW queue ID to use for offchannel TX
78 * (if %IEEE80211_HW_QUEUE_CONTROL is set)
79 *
80 @@ -2124,6 +2142,7 @@ struct ieee80211_hw {
81 u8 max_rate_tries;
82 u8 max_rx_aggregation_subframes;
83 u8 max_tx_aggregation_subframes;
84 + u8 max_tx_fragments;
85 u8 offchannel_tx_hw_queue;
86 u8 radiotap_mcs_details;
87 u16 radiotap_vht_details;
88 --- a/net/mac80211/agg-tx.c
89 +++ b/net/mac80211/agg-tx.c
90 @@ -935,6 +935,7 @@ void ieee80211_process_addba_resp(struct
91 size_t len)
92 {
93 struct tid_ampdu_tx *tid_tx;
94 + struct ieee80211_txq *txq;
95 u16 capab, tid;
96 u8 buf_size;
97 bool amsdu;
98 @@ -945,6 +946,10 @@ void ieee80211_process_addba_resp(struct
99 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
100 buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes);
101
102 + txq = sta->sta.txq[tid];
103 + if (!amsdu && txq)
104 + set_bit(IEEE80211_TXQ_NO_AMSDU, &to_txq_info(txq)->flags);
105 +
106 mutex_lock(&sta->ampdu_mlme.mtx);
107
108 tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
109 --- a/net/mac80211/debugfs.c
110 +++ b/net/mac80211/debugfs.c
111 @@ -127,6 +127,8 @@ static const char *hw_flag_names[NUM_IEE
112 FLAG(BEACON_TX_STATUS),
113 FLAG(NEEDS_UNIQUE_STA_ADDR),
114 FLAG(SUPPORTS_REORDERING_BUFFER),
115 + FLAG(TX_AMSDU),
116 + FLAG(TX_FRAG_LIST),
117
118 /* keep last for the build bug below */
119 (void *)0x1
120 --- a/net/mac80211/ieee80211_i.h
121 +++ b/net/mac80211/ieee80211_i.h
122 @@ -799,6 +799,7 @@ struct mac80211_qos_map {
123 enum txq_info_flags {
124 IEEE80211_TXQ_STOP,
125 IEEE80211_TXQ_AMPDU,
126 + IEEE80211_TXQ_NO_AMSDU,
127 };
128
129 struct txq_info {
130 --- a/net/mac80211/tx.c
131 +++ b/net/mac80211/tx.c
132 @@ -1318,6 +1318,10 @@ struct sk_buff *ieee80211_tx_dequeue(str
133 out:
134 spin_unlock_bh(&txqi->queue.lock);
135
136 + if (skb && skb_has_frag_list(skb) &&
137 + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
138 + skb_linearize(skb);
139 +
140 return skb;
141 }
142 EXPORT_SYMBOL(ieee80211_tx_dequeue);
143 @@ -2757,6 +2761,163 @@ void ieee80211_clear_fast_xmit(struct st
144 kfree_rcu(fast_tx, rcu_head);
145 }
146
147 +static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local,
148 + struct sk_buff *skb, int headroom,
149 + int *subframe_len)
150 +{
151 + int amsdu_len = *subframe_len + sizeof(struct ethhdr);
152 + int padding = (4 - amsdu_len) & 3;
153 +
154 + if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) {
155 + I802_DEBUG_INC(local->tx_expand_skb_head);
156 +
157 + if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) {
158 + wiphy_debug(local->hw.wiphy,
159 + "failed to reallocate TX buffer\n");
160 + return false;
161 + }
162 + }
163 +
164 + if (padding) {
165 + *subframe_len += padding;
166 + memset(skb_put(skb, padding), 0, padding);
167 + }
168 +
169 + return true;
170 +}
171 +
172 +static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
173 + struct ieee80211_fast_tx *fast_tx,
174 + struct sk_buff *skb)
175 +{
176 + struct ieee80211_local *local = sdata->local;
177 + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
178 + struct ieee80211_hdr *hdr;
179 + struct ethhdr amsdu_hdr;
180 + int hdr_len = fast_tx->hdr_len - sizeof(rfc1042_header);
181 + int subframe_len = skb->len - hdr_len;
182 + void *data;
183 + u8 *qc;
184 +
185 + if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE)
186 + return false;
187 +
188 + if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
189 + return true;
190 +
191 + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(amsdu_hdr),
192 + &subframe_len))
193 + return false;
194 +
195 + amsdu_hdr.h_proto = cpu_to_be16(subframe_len);
196 + memcpy(amsdu_hdr.h_source, skb->data + fast_tx->sa_offs, ETH_ALEN);
197 + memcpy(amsdu_hdr.h_dest, skb->data + fast_tx->da_offs, ETH_ALEN);
198 +
199 + data = skb_push(skb, sizeof(amsdu_hdr));
200 + memmove(data, data + sizeof(amsdu_hdr), hdr_len);
201 + memcpy(data + hdr_len, &amsdu_hdr, sizeof(amsdu_hdr));
202 +
203 + hdr = data;
204 + qc = ieee80211_get_qos_ctl(hdr);
205 + *qc |= IEEE80211_QOS_CTL_A_MSDU_PRESENT;
206 +
207 + info->control.flags |= IEEE80211_TX_CTRL_AMSDU;
208 +
209 + return true;
210 +}
211 +
212 +static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
213 + struct sta_info *sta,
214 + struct ieee80211_fast_tx *fast_tx,
215 + struct sk_buff *skb)
216 +{
217 + struct ieee80211_local *local = sdata->local;
218 + u8 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
219 + struct ieee80211_txq *txq = sta->sta.txq[tid];
220 + struct txq_info *txqi;
221 + struct sk_buff **frag_tail, *head;
222 + int subframe_len = skb->len - ETH_ALEN;
223 + u8 max_subframes = sta->sta.max_amsdu_subframes;
224 + int max_frags = local->hw.max_tx_fragments;
225 + int max_amsdu_len = sta->sta.max_amsdu_len;
226 + __be16 len;
227 + void *data;
228 + bool ret = false;
229 + int n = 1, nfrags;
230 +
231 + if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
232 + return false;
233 +
234 + if (!txq)
235 + return false;
236 +
237 + txqi = to_txq_info(txq);
238 + if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags))
239 + return false;
240 +
241 + if (sta->sta.max_rc_amsdu_len)
242 + max_amsdu_len = min_t(int, max_amsdu_len,
243 + sta->sta.max_rc_amsdu_len);
244 +
245 + spin_lock_bh(&txqi->queue.lock);
246 +
247 + head = skb_peek_tail(&txqi->queue);
248 + if (!head)
249 + goto out;
250 +
251 + if (skb->len + head->len > max_amsdu_len)
252 + goto out;
253 +
254 + /*
255 + * HT A-MPDU limits maximum MPDU size to 4095 bytes. Since aggregation
256 + * sessions are started/stopped without txq flush, use the limit here
257 + * to avoid having to de-aggregate later.
258 + */
259 + if (skb->len + head->len > 4095 &&
260 + !sta->sta.vht_cap.vht_supported)
261 + goto out;
262 +
263 + if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
264 + goto out;
265 +
266 + nfrags = 1 + skb_shinfo(skb)->nr_frags;
267 + nfrags += 1 + skb_shinfo(head)->nr_frags;
268 + frag_tail = &skb_shinfo(head)->frag_list;
269 + while (*frag_tail) {
270 + nfrags += 1 + skb_shinfo(*frag_tail)->nr_frags;
271 + frag_tail = &(*frag_tail)->next;
272 + n++;
273 + }
274 +
275 + if (max_subframes && n > max_subframes)
276 + goto out;
277 +
278 + if (max_frags && nfrags > max_frags)
279 + goto out;
280 +
281 + if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2,
282 + &subframe_len))
283 + return false;
284 +
285 + ret = true;
286 + data = skb_push(skb, ETH_ALEN + 2);
287 + memmove(data, data + ETH_ALEN + 2, 2 * ETH_ALEN);
288 +
289 + data += 2 * ETH_ALEN;
290 + len = cpu_to_be16(subframe_len);
291 + memcpy(data, &len, 2);
292 + memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header));
293 +
294 + head->len += skb->len;
295 + head->data_len += skb->len;
296 + *frag_tail = skb;
297 +
298 +out:
299 + spin_unlock_bh(&txqi->queue.lock);
300 +
301 + return ret;
302 +}
303 +
304 static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
305 struct net_device *dev, struct sta_info *sta,
306 struct ieee80211_fast_tx *fast_tx,
307 @@ -2811,6 +2972,10 @@ static bool ieee80211_xmit_fast(struct i
308
309 ieee80211_tx_stats(dev, skb->len + extra_head);
310
311 + if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
312 + ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
313 + return true;
314 +
315 /* will not be crypto-handled beyond what we do here, so use false
316 * as the may-encrypt argument for the resize to not account for
317 * more room than we already have in 'extra_head'