1 From: Steven Barth <steven@midlink.org>
2 Subject: Add support for MAP-E FMRs (mesh mode)
4 MAP-E FMRs (draft-ietf-softwire-map-10) are rules for IPv4-communication
5 between MAP CEs (mesh mode) without the need to forward such data to a
6 border relay. This is similar to how 6rd works but for IPv4 over IPv6.
8 Signed-off-by: Steven Barth <cyrus@openwrt.org>
10 include/net/ip6_tunnel.h | 13 ++
11 include/uapi/linux/if_tunnel.h | 13 ++
12 net/ipv6/ip6_tunnel.c | 276 +++++++++++++++++++++++++++++++++++++++--
13 3 files changed, 291 insertions(+), 11 deletions(-)
15 --- a/include/net/ip6_tunnel.h
16 +++ b/include/net/ip6_tunnel.h
18 /* determine capability on a per-packet basis */
19 #define IP6_TNL_F_CAP_PER_PACKET 0x40000
21 +/* IPv6 tunnel FMR */
22 +struct __ip6_tnl_fmr {
23 + struct __ip6_tnl_fmr *next; /* next fmr in list */
24 + struct in6_addr ip6_prefix;
25 + struct in_addr ip4_prefix;
27 + __u8 ip6_prefix_len;
28 + __u8 ip4_prefix_len;
33 struct __ip6_tnl_parm {
34 char name[IFNAMSIZ]; /* name of tunnel device */
35 int link; /* ifindex of underlying L2 interface */
36 @@ -29,6 +41,7 @@ struct __ip6_tnl_parm {
37 __u32 flags; /* tunnel flags */
38 struct in6_addr laddr; /* local tunnel end-point address */
39 struct in6_addr raddr; /* remote tunnel end-point address */
40 + struct __ip6_tnl_fmr *fmrs; /* FMRs */
44 --- a/include/uapi/linux/if_tunnel.h
45 +++ b/include/uapi/linux/if_tunnel.h
46 @@ -77,10 +77,23 @@ enum {
47 IFLA_IPTUN_ENCAP_DPORT,
48 IFLA_IPTUN_COLLECT_METADATA,
53 #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1)
56 + IFLA_IPTUN_FMR_UNSPEC,
57 + IFLA_IPTUN_FMR_IP6_PREFIX,
58 + IFLA_IPTUN_FMR_IP4_PREFIX,
59 + IFLA_IPTUN_FMR_IP6_PREFIX_LEN,
60 + IFLA_IPTUN_FMR_IP4_PREFIX_LEN,
61 + IFLA_IPTUN_FMR_EA_LEN,
62 + IFLA_IPTUN_FMR_OFFSET,
63 + __IFLA_IPTUN_FMR_MAX,
65 +#define IFLA_IPTUN_FMR_MAX (__IFLA_IPTUN_FMR_MAX - 1)
67 enum tunnel_encap_types {
70 --- a/net/ipv6/ip6_tunnel.c
71 +++ b/net/ipv6/ip6_tunnel.c
73 * linux/net/ipv6/sit.c and linux/net/ipv4/ipip.c
78 + * Steven Barth <cyrus@openwrt.org>: MAP-E FMR support
81 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
82 @@ -67,9 +70,9 @@ static bool log_ecn_error = true;
83 module_param(log_ecn_error, bool, 0644);
84 MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
86 -static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
87 +static u32 HASH(const struct in6_addr *addr)
89 - u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
90 + u32 hash = ipv6_addr_hash(addr);
92 return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT);
94 @@ -136,20 +139,29 @@ static struct net_device_stats *ip6_get_
95 static struct ip6_tnl *
96 ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_addr *local)
98 - unsigned int hash = HASH(remote, local);
99 + unsigned int hash = HASH(local);
101 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
103 + struct __ip6_tnl_fmr *fmr;
105 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
106 - if (ipv6_addr_equal(local, &t->parms.laddr) &&
107 - ipv6_addr_equal(remote, &t->parms.raddr) &&
108 - (t->dev->flags & IFF_UP))
109 + if (!ipv6_addr_equal(local, &t->parms.laddr) ||
110 + !(t->dev->flags & IFF_UP))
113 + if (ipv6_addr_equal(remote, &t->parms.raddr))
116 + for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
117 + if (ipv6_prefix_equal(remote, &fmr->ip6_prefix,
118 + fmr->ip6_prefix_len))
123 memset(&any, 0, sizeof(any));
124 - hash = HASH(&any, local);
125 + hash = HASH(local);
126 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
127 if (ipv6_addr_equal(local, &t->parms.laddr) &&
128 ipv6_addr_any(&t->parms.raddr) &&
129 @@ -157,7 +169,7 @@ ip6_tnl_lookup(struct net *net, const st
133 - hash = HASH(remote, &any);
135 for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) {
136 if (ipv6_addr_equal(remote, &t->parms.raddr) &&
137 ipv6_addr_any(&t->parms.laddr) &&
138 @@ -197,7 +209,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n,
140 if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
142 - h = HASH(remote, local);
145 return &ip6n->tnls[prio][h];
147 @@ -377,6 +389,12 @@ ip6_tnl_dev_uninit(struct net_device *de
148 struct net *net = t->net;
149 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
151 + while (t->parms.fmrs) {
152 + struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
153 + kfree(t->parms.fmrs);
154 + t->parms.fmrs = next;
157 if (dev == ip6n->fb_tnl_dev)
158 RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
160 @@ -766,6 +784,107 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
162 EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl);
165 + * ip4ip6_fmr_calc - calculate target / source IPv6-address based on FMR
166 + * @dest: destination IPv6 address buffer
167 + * @skb: received socket buffer
169 + * @xmit: Calculate for xmit or rcv
171 +static void ip4ip6_fmr_calc(struct in6_addr *dest,
172 + const struct iphdr *iph, const uint8_t *end,
173 + const struct __ip6_tnl_fmr *fmr, bool xmit)
175 + int psidlen = fmr->ea_len - (32 - fmr->ip4_prefix_len);
177 + bool use_dest_addr;
178 + const struct iphdr *dsth = iph;
180 + if ((u8*)dsth >= end)
183 + /* find significant IP header */
184 + if (iph->protocol == IPPROTO_ICMP) {
185 + struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
186 + if (ih && ((u8*)&ih[1]) <= end && (
187 + ih->type == ICMP_DEST_UNREACH ||
188 + ih->type == ICMP_SOURCE_QUENCH ||
189 + ih->type == ICMP_TIME_EXCEEDED ||
190 + ih->type == ICMP_PARAMETERPROB ||
191 + ih->type == ICMP_REDIRECT))
192 + dsth = (const struct iphdr*)&ih[1];
195 + /* in xmit-path use dest port by default and source port only if
196 + this is an ICMP reply to something else; vice versa in rcv-path */
197 + use_dest_addr = (xmit && dsth == iph) || (!xmit && dsth != iph);
200 + if (((u8*)&dsth[1]) <= end && (
201 + dsth->protocol == IPPROTO_UDP ||
202 + dsth->protocol == IPPROTO_TCP ||
203 + dsth->protocol == IPPROTO_SCTP ||
204 + dsth->protocol == IPPROTO_DCCP)) {
205 + /* for UDP, TCP, SCTP and DCCP source and dest port
206 + follow IPv4 header directly */
207 + portp = ((u8*)dsth) + dsth->ihl * 4;
210 + portp += sizeof(u16);
211 + } else if (iph->protocol == IPPROTO_ICMP) {
212 + struct icmphdr *ih = (struct icmphdr*)(((u8*)dsth) + dsth->ihl * 4);
214 + /* use icmp identifier as port */
215 + if (((u8*)&ih) <= end && (
216 + (use_dest_addr && (
217 + ih->type == ICMP_ECHOREPLY ||
218 + ih->type == ICMP_TIMESTAMPREPLY ||
219 + ih->type == ICMP_INFO_REPLY ||
220 + ih->type == ICMP_ADDRESSREPLY)) ||
221 + (!use_dest_addr && (
222 + ih->type == ICMP_ECHO ||
223 + ih->type == ICMP_TIMESTAMP ||
224 + ih->type == ICMP_INFO_REQUEST ||
225 + ih->type == ICMP_ADDRESS)
227 + portp = (u8*)&ih->un.echo.id;
230 + if ((portp && &portp[2] <= end) || psidlen == 0) {
231 + int frombyte = fmr->ip6_prefix_len / 8;
232 + int fromrem = fmr->ip6_prefix_len % 8;
233 + int bytes = sizeof(struct in6_addr) - frombyte;
234 + const u32 *addr = (use_dest_addr) ? &iph->daddr : &iph->saddr;
235 + u64 eabits = ((u64)ntohl(*addr)) << (32 + fmr->ip4_prefix_len);
238 + /* extract PSID from port and add it to eabits */
241 + psidbits = ((u16)portp[0]) << 8 | ((u16)portp[1]);
242 + psidbits >>= 16 - psidlen - fmr->offset;
243 + psidbits = (u16)(psidbits << (16 - psidlen));
244 + eabits |= ((u64)psidbits) << (48 - (fmr->ea_len - psidlen));
247 + /* rewrite destination address */
248 + *dest = fmr->ip6_prefix;
249 + memcpy(&dest->s6_addr[10], addr, sizeof(*addr));
250 + dest->s6_addr16[7] = htons(psidbits >> (16 - psidlen));
252 + if (bytes > sizeof(u64))
253 + bytes = sizeof(u64);
255 + /* insert eabits */
256 + memcpy(&t, &dest->s6_addr[frombyte], bytes);
257 + t = be64_to_cpu(t) & ~(((((u64)1) << fmr->ea_len) - 1)
258 + << (64 - fmr->ea_len - fromrem));
259 + t = cpu_to_be64(t | (eabits >> fromrem));
260 + memcpy(&dest->s6_addr[frombyte], &t, bytes);
265 static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
266 const struct tnl_ptk_info *tpi,
267 struct metadata_dst *tun_dst,
268 @@ -818,6 +937,27 @@ static int __ip6_tnl_rcv(struct ip6_tnl
269 skb_reset_network_header(skb);
270 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
272 + if (tpi->proto == htons(ETH_P_IP) && tunnel->parms.fmrs &&
273 + !ipv6_addr_equal(&ipv6h->saddr, &tunnel->parms.raddr)) {
274 + /* Packet didn't come from BR, so lookup FMR */
275 + struct __ip6_tnl_fmr *fmr;
276 + struct in6_addr expected = tunnel->parms.raddr;
277 + for (fmr = tunnel->parms.fmrs; fmr; fmr = fmr->next)
278 + if (ipv6_prefix_equal(&ipv6h->saddr,
279 + &fmr->ip6_prefix, fmr->ip6_prefix_len))
282 + /* Check that IPv6 matches IPv4 source to prevent spoofing */
284 + ip4ip6_fmr_calc(&expected, ip_hdr(skb),
285 + skb_tail_pointer(skb), fmr, false);
287 + if (!ipv6_addr_equal(&ipv6h->saddr, &expected)) {
293 __skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
295 err = dscp_ecn_decapsulate(tunnel, ipv6h, skb);
296 @@ -958,6 +1098,7 @@ static void init_tel_txopt(struct ipv6_t
297 opt->ops.opt_nflen = 8;
302 * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
303 * @t: the outgoing tunnel device
304 @@ -1310,6 +1451,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str
306 struct ip6_tnl *t = netdev_priv(dev);
307 struct ipv6hdr *ipv6h;
308 + struct __ip6_tnl_fmr *fmr;
309 int encap_limit = -1;
312 @@ -1375,6 +1517,18 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, str
313 fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
314 dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
316 + /* try to find matching FMR */
317 + for (fmr = t->parms.fmrs; fmr; fmr = fmr->next) {
318 + unsigned mshift = 32 - fmr->ip4_prefix_len;
319 + if (ntohl(fmr->ip4_prefix.s_addr) >> mshift ==
320 + ntohl(ip_hdr(skb)->daddr) >> mshift)
324 + /* change dstaddr according to FMR */
326 + ip4ip6_fmr_calc(&fl6.daddr, ip_hdr(skb), skb_tail_pointer(skb), fmr, true);
328 if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
331 @@ -1504,6 +1658,14 @@ ip6_tnl_change(struct ip6_tnl *t, const
332 t->parms.link = p->link;
333 t->parms.proto = p->proto;
334 t->parms.fwmark = p->fwmark;
336 + while (t->parms.fmrs) {
337 + struct __ip6_tnl_fmr *next = t->parms.fmrs->next;
338 + kfree(t->parms.fmrs);
339 + t->parms.fmrs = next;
341 + t->parms.fmrs = p->fmrs;
343 dst_cache_reset(&t->dst_cache);
344 ip6_tnl_link_config(t);
346 @@ -1542,6 +1704,7 @@ ip6_tnl_parm_from_user(struct __ip6_tnl_
347 p->flowinfo = u->flowinfo;
351 memcpy(p->name, u->name, sizeof(u->name));
354 @@ -1926,6 +2089,15 @@ static int ip6_tnl_validate(struct nlatt
358 +static const struct nla_policy ip6_tnl_fmr_policy[IFLA_IPTUN_FMR_MAX + 1] = {
359 + [IFLA_IPTUN_FMR_IP6_PREFIX] = { .len = sizeof(struct in6_addr) },
360 + [IFLA_IPTUN_FMR_IP4_PREFIX] = { .len = sizeof(struct in_addr) },
361 + [IFLA_IPTUN_FMR_IP6_PREFIX_LEN] = { .type = NLA_U8 },
362 + [IFLA_IPTUN_FMR_IP4_PREFIX_LEN] = { .type = NLA_U8 },
363 + [IFLA_IPTUN_FMR_EA_LEN] = { .type = NLA_U8 },
364 + [IFLA_IPTUN_FMR_OFFSET] = { .type = NLA_U8 }
367 static void ip6_tnl_netlink_parms(struct nlattr *data[],
368 struct __ip6_tnl_parm *parms)
370 @@ -1963,6 +2135,46 @@ static void ip6_tnl_netlink_parms(struct
372 if (data[IFLA_IPTUN_FWMARK])
373 parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]);
375 + if (data[IFLA_IPTUN_FMRS]) {
377 + struct nlattr *fmr;
378 + nla_for_each_nested(fmr, data[IFLA_IPTUN_FMRS], rem) {
379 + struct nlattr *fmrd[IFLA_IPTUN_FMR_MAX + 1], *c;
380 + struct __ip6_tnl_fmr *nfmr;
382 + nla_parse_nested(fmrd, IFLA_IPTUN_FMR_MAX,
383 + fmr, ip6_tnl_fmr_policy, NULL);
385 + if (!(nfmr = kzalloc(sizeof(*nfmr), GFP_KERNEL)))
390 + if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX]))
391 + nla_memcpy(&nfmr->ip6_prefix, fmrd[IFLA_IPTUN_FMR_IP6_PREFIX],
392 + sizeof(nfmr->ip6_prefix));
394 + if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX]))
395 + nla_memcpy(&nfmr->ip4_prefix, fmrd[IFLA_IPTUN_FMR_IP4_PREFIX],
396 + sizeof(nfmr->ip4_prefix));
398 + if ((c = fmrd[IFLA_IPTUN_FMR_IP6_PREFIX_LEN]))
399 + nfmr->ip6_prefix_len = nla_get_u8(c);
401 + if ((c = fmrd[IFLA_IPTUN_FMR_IP4_PREFIX_LEN]))
402 + nfmr->ip4_prefix_len = nla_get_u8(c);
404 + if ((c = fmrd[IFLA_IPTUN_FMR_EA_LEN]))
405 + nfmr->ea_len = nla_get_u8(c);
407 + if ((c = fmrd[IFLA_IPTUN_FMR_OFFSET]))
408 + nfmr->offset = nla_get_u8(c);
410 + nfmr->next = parms->fmrs;
411 + parms->fmrs = nfmr;
416 static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[],
417 @@ -2078,6 +2290,12 @@ static void ip6_tnl_dellink(struct net_d
419 static size_t ip6_tnl_get_size(const struct net_device *dev)
421 + const struct ip6_tnl *t = netdev_priv(dev);
422 + struct __ip6_tnl_fmr *c;
424 + for (c = t->parms.fmrs; c; c = c->next)
428 /* IFLA_IPTUN_LINK */
430 @@ -2107,6 +2325,24 @@ static size_t ip6_tnl_get_size(const str
432 /* IFLA_IPTUN_FWMARK */
434 + /* IFLA_IPTUN_FMRS */
435 + nla_total_size(0) +
438 + nla_total_size(0) +
439 + /* IFLA_IPTUN_FMR_IP6_PREFIX */
440 + nla_total_size(sizeof(struct in6_addr)) +
441 + /* IFLA_IPTUN_FMR_IP4_PREFIX */
442 + nla_total_size(sizeof(struct in_addr)) +
443 + /* IFLA_IPTUN_FMR_EA_LEN */
444 + nla_total_size(1) +
445 + /* IFLA_IPTUN_FMR_IP6_PREFIX_LEN */
446 + nla_total_size(1) +
447 + /* IFLA_IPTUN_FMR_IP4_PREFIX_LEN */
448 + nla_total_size(1) +
449 + /* IFLA_IPTUN_FMR_OFFSET */
455 @@ -2114,6 +2350,9 @@ static int ip6_tnl_fill_info(struct sk_b
457 struct ip6_tnl *tunnel = netdev_priv(dev);
458 struct __ip6_tnl_parm *parm = &tunnel->parms;
459 + struct __ip6_tnl_fmr *c;
461 + struct nlattr *fmrs;
463 if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
464 nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) ||
465 @@ -2123,9 +2362,27 @@ static int ip6_tnl_fill_info(struct sk_b
466 nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
467 nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) ||
468 nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto) ||
469 - nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark))
470 + nla_put_u32(skb, IFLA_IPTUN_FWMARK, parm->fwmark) ||
471 + !(fmrs = nla_nest_start(skb, IFLA_IPTUN_FMRS)))
472 goto nla_put_failure;
474 + for (c = parm->fmrs; c; c = c->next) {
475 + struct nlattr *fmr = nla_nest_start(skb, ++fmrcnt);
477 + nla_put(skb, IFLA_IPTUN_FMR_IP6_PREFIX,
478 + sizeof(c->ip6_prefix), &c->ip6_prefix) ||
479 + nla_put(skb, IFLA_IPTUN_FMR_IP4_PREFIX,
480 + sizeof(c->ip4_prefix), &c->ip4_prefix) ||
481 + nla_put_u8(skb, IFLA_IPTUN_FMR_IP6_PREFIX_LEN, c->ip6_prefix_len) ||
482 + nla_put_u8(skb, IFLA_IPTUN_FMR_IP4_PREFIX_LEN, c->ip4_prefix_len) ||
483 + nla_put_u8(skb, IFLA_IPTUN_FMR_EA_LEN, c->ea_len) ||
484 + nla_put_u8(skb, IFLA_IPTUN_FMR_OFFSET, c->offset))
485 + goto nla_put_failure;
487 + nla_nest_end(skb, fmr);
489 + nla_nest_end(skb, fmrs);
491 if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) ||
492 nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) ||
493 nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) ||
494 @@ -2165,6 +2422,7 @@ static const struct nla_policy ip6_tnl_p
495 [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 },
496 [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG },
497 [IFLA_IPTUN_FWMARK] = { .type = NLA_U32 },
498 + [IFLA_IPTUN_FMRS] = { .type = NLA_NESTED },
501 static struct rtnl_link_ops ip6_link_ops __read_mostly = {