1 From: Pablo Neira Ayuso <pablo@netfilter.org>
2 Date: Fri, 20 Nov 2020 13:49:20 +0100
3 Subject: [PATCH] netfilter: flowtable: add vlan support
5 Add the vlan id and protocol to the flow tuple to uniquely identify
6 flows from the receive path. For the transmit path, dev_hard_header() on
7 the vlan device push the headers. This patch includes support for two
8 VLAN headers (QinQ) from the ingress path.
10 Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
13 --- a/include/net/netfilter/nf_flow_table.h
14 +++ b/include/net/netfilter/nf_flow_table.h
15 @@ -95,6 +95,8 @@ enum flow_offload_xmit_type {
16 FLOW_OFFLOAD_XMIT_DIRECT,
19 +#define NF_FLOW_TABLE_ENCAP_MAX 2
21 struct flow_offload_tuple {
23 struct in_addr src_v4;
24 @@ -113,13 +115,17 @@ struct flow_offload_tuple {
31 + } encap[NF_FLOW_TABLE_ENCAP_MAX];
33 /* All members above are keys for lookups, see flow_offload_hash(). */
44 struct dst_entry *dst_cache;
45 @@ -174,6 +180,11 @@ struct nf_flow_route {
46 struct dst_entry *dst;
52 + } encap[NF_FLOW_TABLE_ENCAP_MAX];
57 --- a/net/netfilter/nf_flow_table_core.c
58 +++ b/net/netfilter/nf_flow_table_core.c
59 @@ -80,6 +80,7 @@ static int flow_offload_fill_route(struc
61 struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
62 struct dst_entry *dst = route->tuple[dir].dst;
65 switch (flow_tuple->l3proto) {
67 @@ -91,6 +92,12 @@ static int flow_offload_fill_route(struc
70 flow_tuple->iifidx = route->tuple[dir].in.ifindex;
71 + for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
72 + flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
73 + flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
76 + flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
78 switch (route->tuple[dir].xmit_type) {
79 case FLOW_OFFLOAD_XMIT_DIRECT:
80 --- a/net/netfilter/nf_flow_table_ip.c
81 +++ b/net/netfilter/nf_flow_table_ip.c
82 @@ -159,17 +159,38 @@ static bool ip_has_options(unsigned int
83 return thoff != sizeof(struct iphdr);
86 +static void nf_flow_tuple_encap(struct sk_buff *skb,
87 + struct flow_offload_tuple *tuple)
91 + if (skb_vlan_tag_present(skb)) {
92 + tuple->encap[i].id = skb_vlan_tag_get(skb);
93 + tuple->encap[i].proto = skb->vlan_proto;
96 + if (skb->protocol == htons(ETH_P_8021Q)) {
97 + struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb_mac_header(skb);
99 + tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
100 + tuple->encap[i].proto = skb->protocol;
104 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
105 struct flow_offload_tuple *tuple)
107 - unsigned int thoff, hdrsize;
108 + unsigned int thoff, hdrsize, offset = 0;
109 struct flow_ports *ports;
112 - if (!pskb_may_pull(skb, sizeof(*iph)))
113 + if (skb->protocol == htons(ETH_P_8021Q))
114 + offset += VLAN_HLEN;
116 + if (!pskb_may_pull(skb, sizeof(*iph) + offset))
120 + iph = (struct iphdr *)(skb_network_header(skb) + offset);
121 thoff = iph->ihl * 4;
123 if (ip_is_fragment(iph) ||
124 @@ -191,11 +212,11 @@ static int nf_flow_tuple_ip(struct sk_bu
127 thoff = iph->ihl * 4;
128 - if (!pskb_may_pull(skb, thoff + hdrsize))
129 + if (!pskb_may_pull(skb, thoff + hdrsize + offset))
133 - ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
134 + iph = (struct iphdr *)(skb_network_header(skb) + offset);
135 + ports = (struct flow_ports *)(skb_network_header(skb) + thoff + offset);
137 tuple->src_v4.s_addr = iph->saddr;
138 tuple->dst_v4.s_addr = iph->daddr;
139 @@ -204,6 +225,7 @@ static int nf_flow_tuple_ip(struct sk_bu
140 tuple->l3proto = AF_INET;
141 tuple->l4proto = iph->protocol;
142 tuple->iifidx = dev->ifindex;
143 + nf_flow_tuple_encap(skb, tuple);
147 @@ -248,6 +270,40 @@ static unsigned int nf_flow_xmit_xfrm(st
151 +static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto)
153 + if (skb->protocol == htons(ETH_P_8021Q)) {
154 + struct vlan_ethhdr *veth;
156 + veth = (struct vlan_ethhdr *)skb_mac_header(skb);
157 + if (veth->h_vlan_encapsulated_proto == proto)
164 +static void nf_flow_encap_pop(struct sk_buff *skb,
165 + struct flow_offload_tuple_rhash *tuplehash)
167 + struct vlan_hdr *vlan_hdr;
170 + for (i = 0; i < tuplehash->tuple.encap_num; i++) {
171 + if (skb_vlan_tag_present(skb)) {
172 + __vlan_hwaccel_clear_tag(skb);
175 + if (skb->protocol == htons(ETH_P_8021Q)) {
176 + vlan_hdr = (struct vlan_hdr *)skb->data;
177 + __skb_pull(skb, VLAN_HLEN);
178 + vlan_set_encap_proto(skb, vlan_hdr);
179 + skb_reset_network_header(skb);
185 static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
186 const struct flow_offload_tuple_rhash *tuplehash,
188 @@ -276,13 +332,15 @@ nf_flow_offload_ip_hook(void *priv, stru
189 enum flow_offload_tuple_dir dir;
190 struct flow_offload *flow;
191 struct net_device *outdev;
192 + unsigned int thoff, mtu;
194 - unsigned int thoff;
200 - if (skb->protocol != htons(ETH_P_IP))
201 + if (skb->protocol != htons(ETH_P_IP) &&
202 + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP)))
205 if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
206 @@ -295,14 +353,19 @@ nf_flow_offload_ip_hook(void *priv, stru
207 dir = tuplehash->tuple.dir;
208 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
210 - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
211 + mtu = flow->tuplehash[dir].tuple.mtu + offset;
212 + if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
215 - if (skb_try_make_writable(skb, sizeof(*iph)))
216 + if (skb->protocol == htons(ETH_P_8021Q))
217 + offset += VLAN_HLEN;
219 + if (skb_try_make_writable(skb, sizeof(*iph) + offset))
222 - thoff = ip_hdr(skb)->ihl * 4;
223 - if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
224 + iph = (struct iphdr *)(skb_network_header(skb) + offset);
225 + thoff = (iph->ihl * 4) + offset;
226 + if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
229 flow_offload_refresh(flow_table, flow);
230 @@ -312,6 +375,9 @@ nf_flow_offload_ip_hook(void *priv, stru
234 + nf_flow_encap_pop(skb, tuplehash);
237 if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
240 @@ -479,14 +545,17 @@ static int nf_flow_nat_ipv6(const struct
241 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
242 struct flow_offload_tuple *tuple)
244 - unsigned int thoff, hdrsize;
245 + unsigned int thoff, hdrsize, offset = 0;
246 struct flow_ports *ports;
247 struct ipv6hdr *ip6h;
249 - if (!pskb_may_pull(skb, sizeof(*ip6h)))
250 + if (skb->protocol == htons(ETH_P_8021Q))
251 + offset += VLAN_HLEN;
253 + if (!pskb_may_pull(skb, sizeof(*ip6h) + offset))
256 - ip6h = ipv6_hdr(skb);
257 + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
259 switch (ip6h->nexthdr) {
261 @@ -503,11 +572,11 @@ static int nf_flow_tuple_ipv6(struct sk_
264 thoff = sizeof(*ip6h);
265 - if (!pskb_may_pull(skb, thoff + hdrsize))
266 + if (!pskb_may_pull(skb, thoff + hdrsize + offset))
269 - ip6h = ipv6_hdr(skb);
270 - ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
271 + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
272 + ports = (struct flow_ports *)(skb_network_header(skb) + thoff + offset);
274 tuple->src_v6 = ip6h->saddr;
275 tuple->dst_v6 = ip6h->daddr;
276 @@ -516,6 +585,7 @@ static int nf_flow_tuple_ipv6(struct sk_
277 tuple->l3proto = AF_INET6;
278 tuple->l4proto = ip6h->nexthdr;
279 tuple->iifidx = dev->ifindex;
280 + nf_flow_tuple_encap(skb, tuple);
284 @@ -533,9 +603,12 @@ nf_flow_offload_ipv6_hook(void *priv, st
285 struct net_device *outdev;
286 struct ipv6hdr *ip6h;
292 - if (skb->protocol != htons(ETH_P_IPV6))
293 + if (skb->protocol != htons(ETH_P_IPV6) &&
294 + !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6)))
297 if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
298 @@ -548,11 +621,15 @@ nf_flow_offload_ipv6_hook(void *priv, st
299 dir = tuplehash->tuple.dir;
300 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
302 - if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
303 + mtu = flow->tuplehash[dir].tuple.mtu + offset;
304 + if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
307 - if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
309 + if (skb->protocol == htons(ETH_P_8021Q))
310 + offset += VLAN_HLEN;
312 + ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
313 + if (nf_flow_state_check(flow, ip6h->nexthdr, skb, sizeof(*ip6h)))
316 flow_offload_refresh(flow_table, flow);
317 @@ -562,6 +639,8 @@ nf_flow_offload_ipv6_hook(void *priv, st
321 + nf_flow_encap_pop(skb, tuplehash);
323 if (skb_try_make_writable(skb, sizeof(*ip6h)))
326 --- a/net/netfilter/nft_flow_offload.c
327 +++ b/net/netfilter/nft_flow_offload.c
328 @@ -66,6 +66,11 @@ static int nft_dev_fill_forward_path(con
329 struct nft_forward_info {
330 const struct net_device *indev;
331 const struct net_device *outdev;
335 + } encap[NF_FLOW_TABLE_ENCAP_MAX];
337 u8 h_source[ETH_ALEN];
339 enum flow_offload_xmit_type xmit_type;
340 @@ -84,9 +89,23 @@ static void nft_dev_path_info(const stru
341 path = &stack->path[i];
342 switch (path->type) {
343 case DEV_PATH_ETHERNET:
344 + case DEV_PATH_VLAN:
345 info->indev = path->dev;
346 if (is_zero_ether_addr(info->h_source))
347 memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
349 + if (path->type == DEV_PATH_ETHERNET)
352 + /* DEV_PATH_VLAN */
353 + if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
354 + info->indev = NULL;
357 + info->outdev = path->dev;
358 + info->encap[info->num_encaps].id = path->encap.id;
359 + info->encap[info->num_encaps].proto = path->encap.proto;
360 + info->num_encaps++;
362 case DEV_PATH_BRIDGE:
363 if (is_zero_ether_addr(info->h_source))
364 @@ -94,7 +113,6 @@ static void nft_dev_path_info(const stru
366 info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
368 - case DEV_PATH_VLAN:
372 @@ -130,6 +148,7 @@ static void nft_dev_forward_path(struct
373 struct net_device_path_stack stack;
374 struct nft_forward_info info = {};
375 unsigned char ha[ETH_ALEN];
378 if (nft_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
379 nft_dev_path_info(&stack, &info, ha);
380 @@ -138,6 +157,11 @@ static void nft_dev_forward_path(struct
383 route->tuple[!dir].in.ifindex = info.indev->ifindex;
384 + for (i = 0; i < info.num_encaps; i++) {
385 + route->tuple[!dir].in.encap[i].id = info.encap[i].id;
386 + route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
388 + route->tuple[!dir].in.num_encaps = info.num_encaps;
390 if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
391 memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);