1 From: Janusz Dziedzic <janusz.dziedzic@tieto.com>
2 Date: Fri, 19 Feb 2016 11:01:50 +0100
3 Subject: [PATCH] mac80211: add NEED_ALIGNED4_SKBS hw flag
5 HW/driver should set NEED_ALIGNED4_SKBS flag in case
6 require aligned skbs to four-byte boundaries.
7 This affect only TX direction.
9 Padding is added after ieee80211_hdr, before IV/LLC.
11 Before we have to do memmove(hdrlen) twice in the
12 dirver. Once before we pass this to HW and next
13 in tx completion (to be sure monitor will report
14 this tx frame correctly).
16 With this patch we can skip this memmove() and save CPU.
18 Currently this was tested with ath9k, both hw/sw crypt for
21 Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
24 --- a/include/net/mac80211.h
25 +++ b/include/net/mac80211.h
26 @@ -2043,6 +2043,9 @@ struct ieee80211_txq {
27 * The stack will not do fragmentation.
28 * The callback for @set_frag_threshold should be set as well.
30 + * @IEEE80211_HW_NEEDS_ALIGNED4_SKBS: Driver need aligned skbs to four-byte.
31 + * Padding will be added after ieee80211_hdr, before IV/LLC.
33 * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays
35 enum ieee80211_hw_flags {
36 @@ -2085,6 +2088,7 @@ enum ieee80211_hw_flags {
37 IEEE80211_HW_TX_FRAG_LIST,
38 IEEE80211_HW_REPORTS_LOW_ACK,
39 IEEE80211_HW_SUPPORTS_TX_FRAG,
40 + IEEE80211_HW_NEEDS_ALIGNED4_SKBS,
42 /* keep last, obviously */
43 NUM_IEEE80211_HW_FLAGS
44 --- a/net/mac80211/debugfs.c
45 +++ b/net/mac80211/debugfs.c
46 @@ -211,6 +211,7 @@ static const char *hw_flag_names[] = {
48 FLAG(REPORTS_LOW_ACK),
49 FLAG(SUPPORTS_TX_FRAG),
50 + FLAG(NEEDS_ALIGNED4_SKBS),
54 --- a/net/mac80211/ieee80211_i.h
55 +++ b/net/mac80211/ieee80211_i.h
56 @@ -1553,6 +1553,29 @@ ieee80211_vif_get_num_mcast_if(struct ie
60 +static inline unsigned int
61 +ieee80211_hdr_padsize(struct ieee80211_hw *hw, unsigned int hdrlen)
64 + * While hdrlen is already aligned to two-byte boundaries,
65 + * simple check with & 2 will return correct padsize.
67 + if (ieee80211_hw_check(hw, NEEDS_ALIGNED4_SKBS))
72 +static inline unsigned int
73 +ieee80211_padded_hdrlen(struct ieee80211_hw *hw, __le16 fc)
75 + unsigned int hdrlen;
77 + hdrlen = ieee80211_hdrlen(fc);
78 + hdrlen += ieee80211_hdr_padsize(hw, hdrlen);
83 u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
84 struct ieee80211_rx_status *status,
85 unsigned int mpdu_len,
86 --- a/net/mac80211/sta_info.h
87 +++ b/net/mac80211/sta_info.h
88 @@ -282,7 +282,7 @@ struct ieee80211_fast_tx {
90 u8 sa_offs, da_offs, pn_offs;
92 - u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
93 + u8 hdr[30 + 2 + 2 + IEEE80211_FAST_XMIT_MAX_IV +
94 sizeof(rfc1042_header)] __aligned(2);
96 struct rcu_head rcu_head;
97 --- a/net/mac80211/status.c
98 +++ b/net/mac80211/status.c
99 @@ -693,9 +693,22 @@ void ieee80211_tx_monitor(struct ieee802
100 struct sk_buff *skb2;
101 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
102 struct ieee80211_sub_if_data *sdata;
103 + struct ieee80211_hdr *hdr = (void *)skb->data;
104 struct net_device *prev_dev = NULL;
105 + unsigned int hdrlen, padsize;
108 + /* Remove padding if was added */
109 + if (ieee80211_hw_check(&local->hw, NEEDS_ALIGNED4_SKBS)) {
110 + hdrlen = ieee80211_hdrlen(hdr->frame_control);
111 + padsize = ieee80211_hdr_padsize(&local->hw, hdrlen);
113 + if (padsize && skb->len > hdrlen + padsize) {
114 + memmove(skb->data + padsize, skb->data, hdrlen);
115 + skb_pull(skb, padsize);
119 /* send frame to monitor interfaces now */
120 rtap_len = ieee80211_tx_radiotap_len(info);
121 if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
122 --- a/net/mac80211/tkip.c
123 +++ b/net/mac80211/tkip.c
124 @@ -201,10 +201,12 @@ void ieee80211_get_tkip_p2k(struct ieee8
126 struct ieee80211_key *key = (struct ieee80211_key *)
127 container_of(keyconf, struct ieee80211_key, conf);
128 + struct ieee80211_hw *hw = &key->local->hw;
129 const u8 *tk = &key->conf.key[NL80211_TKIP_DATA_OFFSET_ENCR_KEY];
130 struct tkip_ctx *ctx = &key->u.tkip.tx;
131 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
132 - const u8 *data = (u8 *)hdr + ieee80211_hdrlen(hdr->frame_control);
133 + const u8 *data = (u8 *)hdr + ieee80211_padded_hdrlen(hw,
134 + hdr->frame_control);
135 u32 iv32 = get_unaligned_le32(&data[4]);
136 u16 iv16 = data[2] | (data[0] << 8);
138 --- a/net/mac80211/tx.c
139 +++ b/net/mac80211/tx.c
140 @@ -1176,8 +1176,7 @@ ieee80211_tx_prepare(struct ieee80211_su
141 info->flags &= ~IEEE80211_TX_INTFL_NEED_TXPROCESSING;
143 hdr = (struct ieee80211_hdr *) skb->data;
145 - tx->hdrlen = ieee80211_hdrlen(hdr->frame_control);
146 + tx->hdrlen = ieee80211_padded_hdrlen(&local->hw, hdr->frame_control);
150 @@ -2152,7 +2151,7 @@ netdev_tx_t ieee80211_monitor_start_xmit
153 hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr);
154 - hdrlen = ieee80211_hdrlen(hdr->frame_control);
155 + hdrlen = ieee80211_padded_hdrlen(&local->hw, hdr->frame_control);
157 if (skb->len < len_rthdr + hdrlen)
159 @@ -2370,7 +2369,7 @@ static struct sk_buff *ieee80211_build_h
160 struct ieee80211_chanctx_conf *chanctx_conf;
161 struct ieee80211_sub_if_data *ap_sdata;
162 enum nl80211_band band;
168 @@ -2590,6 +2589,9 @@ static struct sk_buff *ieee80211_build_h
172 + /* Check aligned4 skb required */
173 + padsize = ieee80211_hdr_padsize(&sdata->local->hw, hdrlen);
176 * Drop unicast frames to unauthorised stations unless they are
177 * EAPOL frames from the local station.
178 @@ -2670,6 +2672,7 @@ static struct sk_buff *ieee80211_build_h
180 skb_pull(skb, skip_header_bytes);
181 head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
182 + head_need += padsize;
185 * So we need to modify the skb header and hence need a copy of
186 @@ -2702,6 +2705,9 @@ static struct sk_buff *ieee80211_build_h
187 memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
191 + memset(skb_push(skb, padsize), 0, padsize);
193 if (ieee80211_is_data_qos(fc)) {
196 @@ -2877,6 +2883,9 @@ void ieee80211_check_fast_xmit(struct st
197 fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
200 + /* Check aligned4 skb required */
201 + build.hdr_len += ieee80211_hdr_padsize(&local->hw, build.hdr_len);
203 /* We store the key here so there's no point in using rcu_dereference()
204 * but that's fine because the code that changes the pointers will call
205 * this function after doing so. For a single CPU that would be enough,
206 @@ -3464,7 +3473,7 @@ begin:
209 (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV))
210 - pn_offs = ieee80211_hdrlen(hdr->frame_control);
211 + pn_offs = tx.hdrlen;
213 ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs,
215 --- a/net/mac80211/util.c
216 +++ b/net/mac80211/util.c
217 @@ -1225,6 +1225,7 @@ void ieee80211_send_auth(struct ieee8021
220 struct ieee80211_local *local = sdata->local;
221 + struct ieee80211_hw *hw = &local->hw;
223 struct ieee80211_mgmt *mgmt;
225 @@ -1252,7 +1253,7 @@ void ieee80211_send_auth(struct ieee8021
226 memcpy(skb_put(skb, extra_len), extra, extra_len);
228 if (auth_alg == WLAN_AUTH_SHARED_KEY && transaction == 3) {
229 - hdrlen = ieee80211_hdrlen(mgmt->frame_control);
230 + hdrlen = ieee80211_padded_hdrlen(hw, mgmt->frame_control);
231 mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
232 err = ieee80211_wep_encrypt(local, skb, hdrlen, key,