fix device claim on interface reload
[project/netifd.git] / system-linux.c
1 #define _GNU_SOURCE
2
3 #include <sys/socket.h>
4 #include <sys/ioctl.h>
5 #include <sys/stat.h>
6 #include <sys/syscall.h>
7
8 #include <linux/rtnetlink.h>
9 #include <linux/sockios.h>
10 #include <linux/if_vlan.h>
11 #include <linux/if_bridge.h>
12
13 #include <unistd.h>
14 #include <string.h>
15 #include <fcntl.h>
16 #include <glob.h>
17
18 #include <netlink/msg.h>
19 #include <netlink/attr.h>
20 #include <netlink/socket.h>
21 #include <libubox/uloop.h>
22
23 #include "netifd.h"
24 #include "device.h"
25 #include "system.h"
26
27 static int sock_ioctl = -1;
28 static struct nl_sock *sock_rtnl = NULL;
29 static struct nl_sock *sock_rtnl_event = NULL;
30
31 static void handler_rtnl_event(struct uloop_fd *u, unsigned int events);
32 static int cb_rtnl_event(struct nl_msg *msg, void *arg);
33 static struct uloop_fd rtnl_event = {.cb = handler_rtnl_event};
34 static struct nl_cb *nl_cb_rtnl_event;
35
36 int system_init(void)
37 {
38 sock_ioctl = socket(AF_LOCAL, SOCK_DGRAM, 0);
39 fcntl(sock_ioctl, F_SETFD, fcntl(sock_ioctl, F_GETFD) | FD_CLOEXEC);
40
41 // Prepare socket for routing / address control
42 sock_rtnl = nl_socket_alloc();
43 if (!sock_rtnl)
44 return -1;
45
46 if (nl_connect(sock_rtnl, NETLINK_ROUTE))
47 goto error_free_sock;
48
49 // Prepare socket for link events
50 nl_cb_rtnl_event = nl_cb_alloc(NL_CB_DEFAULT);
51 if (!nl_cb_rtnl_event)
52 goto error_free_sock;
53
54 nl_cb_set(nl_cb_rtnl_event, NL_CB_VALID, NL_CB_CUSTOM,
55 cb_rtnl_event, NULL);
56
57 sock_rtnl_event = nl_socket_alloc();
58 if (!sock_rtnl_event)
59 goto error_free_cb;
60
61 if (nl_connect(sock_rtnl_event, NETLINK_ROUTE))
62 goto error_free_event;
63
64 // Receive network link events form kernel
65 nl_socket_add_membership(sock_rtnl_event, RTNLGRP_LINK);
66
67 rtnl_event.fd = nl_socket_get_fd(sock_rtnl_event);
68 uloop_fd_add(&rtnl_event, ULOOP_READ | ULOOP_EDGE_TRIGGER);
69
70 return 0;
71
72 error_free_event:
73 nl_socket_free(sock_rtnl_event);
74 sock_rtnl_event = NULL;
75 error_free_cb:
76 nl_cb_put(nl_cb_rtnl_event);
77 nl_cb_rtnl_event = NULL;
78 error_free_sock:
79 nl_socket_free(sock_rtnl);
80 sock_rtnl = NULL;
81 return -1;
82 }
83
84 // If socket is ready for reading parse netlink events
85 static void handler_rtnl_event(struct uloop_fd *u, unsigned int events)
86 {
87 nl_recvmsgs(sock_rtnl_event, nl_cb_rtnl_event);
88 }
89
90 static void system_set_sysctl(const char *path, const char *val)
91 {
92 int fd;
93
94 fd = open(path, O_WRONLY);
95 if (fd < 0)
96 return;
97
98 write(fd, val, strlen(val));
99 close(fd);
100 }
101
102 static void system_set_dev_sysctl(const char *path, const char *device, const char *val)
103 {
104 static char buf[256];
105
106 snprintf(buf, sizeof(buf), path, val);
107 system_set_sysctl(buf, val);
108 }
109
110 static void system_set_disable_ipv6(struct device *dev, const char *val)
111 {
112 system_set_dev_sysctl("/proc/sys/net/ipv6/conf/%s/disable_ipv6", dev->ifname, val);
113 }
114
115 // Evaluate netlink messages
116 static int cb_rtnl_event(struct nl_msg *msg, void *arg)
117 {
118 struct nlmsghdr *nh = nlmsg_hdr(msg);
119 struct ifinfomsg *ifi = NLMSG_DATA(nh);
120 struct nlattr *nla[__IFLA_MAX];
121
122 if (nh->nlmsg_type != RTM_DELLINK && nh->nlmsg_type != RTM_NEWLINK)
123 goto out;
124
125 nlmsg_parse(nh, sizeof(*ifi), nla, __IFLA_MAX - 1, NULL);
126 if (!nla[IFLA_IFNAME])
127 goto out;
128
129 struct device *dev = device_get(RTA_DATA(nla[IFLA_IFNAME]), false);
130 if (!dev)
131 goto out;
132
133 dev->ifindex = ifi->ifi_index;
134 device_set_present(dev, (nh->nlmsg_type == RTM_NEWLINK));
135
136 out:
137 return 0;
138 }
139
140 static int system_rtnl_call(struct nl_msg *msg)
141 {
142 int s = -(nl_send_auto_complete(sock_rtnl, msg)
143 || nl_wait_for_ack(sock_rtnl));
144 nlmsg_free(msg);
145 return s;
146 }
147
148 int system_bridge_delbr(struct device *bridge)
149 {
150 return ioctl(sock_ioctl, SIOCBRDELBR, bridge->ifname);
151 }
152
153 static int system_bridge_if(const char *bridge, struct device *dev, int cmd, void *data)
154 {
155 struct ifreq ifr;
156 if (dev)
157 ifr.ifr_ifindex = dev->ifindex;
158 else
159 ifr.ifr_data = data;
160 strncpy(ifr.ifr_name, bridge, sizeof(ifr.ifr_name));
161 return ioctl(sock_ioctl, cmd, &ifr);
162 }
163
164 int system_bridge_addif(struct device *bridge, struct device *dev)
165 {
166 system_set_disable_ipv6(dev, "1");
167 return system_bridge_if(bridge->ifname, dev, SIOCBRADDIF, NULL);
168 }
169
170 int system_bridge_delif(struct device *bridge, struct device *dev)
171 {
172 system_set_disable_ipv6(dev, "0");
173 return system_bridge_if(bridge->ifname, dev, SIOCBRDELIF, NULL);
174 }
175
176 static bool system_is_bridge(const char *name, char *buf, int buflen)
177 {
178 struct stat st;
179
180 snprintf(buf, buflen, "/sys/devices/virtual/net/%s/bridge", name);
181 if (stat(buf, &st) < 0)
182 return false;
183
184 return true;
185 }
186
187 static char *system_get_bridge(const char *name, char *buf, int buflen)
188 {
189 char *path;
190 ssize_t len;
191 glob_t gl;
192
193 snprintf(buf, buflen, "/sys/devices/virtual/net/*/brif/%s/bridge", name);
194 if (glob(buf, GLOB_NOSORT, NULL, &gl) < 0)
195 return NULL;
196
197 if (gl.gl_pathc == 0)
198 return NULL;
199
200 len = readlink(gl.gl_pathv[0], buf, buflen);
201 if (len < 0)
202 return NULL;
203
204 buf[len] = 0;
205 path = strrchr(buf, '/');
206 if (!path)
207 return NULL;
208
209 return path + 1;
210 }
211
212 static int system_if_resolve(struct device *dev)
213 {
214 struct ifreq ifr;
215 strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name));
216 if (!ioctl(sock_ioctl, SIOCGIFINDEX, &ifr))
217 return ifr.ifr_ifindex;
218 else
219 return 0;
220 }
221
222 static int system_if_flags(const char *ifname, unsigned add, unsigned rem)
223 {
224 struct ifreq ifr;
225 strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
226 ioctl(sock_ioctl, SIOCGIFFLAGS, &ifr);
227 ifr.ifr_flags |= add;
228 ifr.ifr_flags &= ~rem;
229 return ioctl(sock_ioctl, SIOCSIFFLAGS, &ifr);
230 }
231
232 struct clear_data {
233 struct nl_msg *msg;
234 struct device *dev;
235 int type;
236 int size;
237 int af;
238 };
239
240
241 static bool check_ifaddr(struct nlmsghdr *hdr, int ifindex)
242 {
243 struct ifaddrmsg *ifa = NLMSG_DATA(hdr);
244
245 return ifa->ifa_index == ifindex;
246 }
247
248 static bool check_route(struct nlmsghdr *hdr, int ifindex)
249 {
250 struct nlattr *tb[__RTA_MAX];
251
252 nlmsg_parse(hdr, sizeof(struct rtmsg), tb, __RTA_MAX - 1, NULL);
253 if (!tb[RTA_OIF])
254 return false;
255
256 return *(int *)RTA_DATA(tb[RTA_OIF]) == ifindex;
257 }
258
259 static int cb_clear_event(struct nl_msg *msg, void *arg)
260 {
261 struct clear_data *clr = arg;
262 struct nlmsghdr *hdr = nlmsg_hdr(msg);
263 bool (*cb)(struct nlmsghdr *, int ifindex);
264 int type;
265
266 switch(clr->type) {
267 case RTM_GETADDR:
268 type = RTM_DELADDR;
269 if (hdr->nlmsg_type != RTM_NEWADDR)
270 return NL_SKIP;
271
272 cb = check_ifaddr;
273 break;
274 case RTM_GETROUTE:
275 type = RTM_DELROUTE;
276 if (hdr->nlmsg_type != RTM_NEWROUTE)
277 return NL_SKIP;
278
279 cb = check_route;
280 break;
281 default:
282 return NL_SKIP;
283 }
284
285 if (!cb(hdr, clr->dev->ifindex))
286 return NL_SKIP;
287
288 D(SYSTEM, "Remove %s from device %s\n",
289 type == RTM_DELADDR ? "an address" : "a route",
290 clr->dev->ifname);
291 memcpy(nlmsg_hdr(clr->msg), hdr, hdr->nlmsg_len);
292 hdr = nlmsg_hdr(clr->msg);
293 hdr->nlmsg_type = type;
294 hdr->nlmsg_flags = NLM_F_REQUEST;
295
296 if (!nl_send_auto_complete(sock_rtnl, clr->msg))
297 nl_wait_for_ack(sock_rtnl);
298
299 return NL_SKIP;
300 }
301
302 static int
303 cb_finish_event(struct nl_msg *msg, void *arg)
304 {
305 int *pending = arg;
306 *pending = 0;
307 return NL_STOP;
308 }
309
310 static int
311 error_handler(struct sockaddr_nl *nla, struct nlmsgerr *err, void *arg)
312 {
313 int *pending = arg;
314 *pending = err->error;
315 return NL_STOP;
316 }
317
318 static void
319 system_if_clear_entries(struct device *dev, int type, int af)
320 {
321 struct clear_data clr;
322 struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT);
323 struct rtmsg rtm = {
324 .rtm_family = af,
325 .rtm_flags = RTM_F_CLONED,
326 };
327 int flags = NLM_F_DUMP;
328 int pending = 1;
329
330 clr.af = af;
331 clr.dev = dev;
332 clr.type = type;
333 switch (type) {
334 case RTM_GETADDR:
335 clr.size = sizeof(struct rtgenmsg);
336 break;
337 case RTM_GETROUTE:
338 clr.size = sizeof(struct rtmsg);
339 break;
340 default:
341 return;
342 }
343
344 if (!cb)
345 return;
346
347 clr.msg = nlmsg_alloc_simple(type, flags);
348 if (!clr.msg)
349 goto out;
350
351 nlmsg_append(clr.msg, &rtm, clr.size, 0);
352 nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, cb_clear_event, &clr);
353 nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, cb_finish_event, &pending);
354 nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &pending);
355
356 nl_send_auto_complete(sock_rtnl, clr.msg);
357 while (pending > 0)
358 nl_recvmsgs(sock_rtnl, cb);
359
360 nlmsg_free(clr.msg);
361 out:
362 nl_cb_put(cb);
363 }
364
365 /*
366 * Clear bridge (membership) state and bring down device
367 */
368 void system_if_clear_state(struct device *dev)
369 {
370 static char buf[256];
371 char *bridge;
372
373 if (dev->external)
374 return;
375
376 dev->ifindex = system_if_resolve(dev);
377 if (!dev->ifindex)
378 return;
379
380 system_if_flags(dev->ifname, 0, IFF_UP);
381
382 if (system_is_bridge(dev->ifname, buf, sizeof(buf))) {
383 D(SYSTEM, "Delete existing bridge named '%s'\n", dev->ifname);
384 system_bridge_delbr(dev);
385 return;
386 }
387
388 bridge = system_get_bridge(dev->ifname, buf, sizeof(buf));
389 if (bridge) {
390 D(SYSTEM, "Remove device '%s' from bridge '%s'\n", dev->ifname, bridge);
391 system_bridge_if(bridge, dev, SIOCBRDELIF, NULL);
392 }
393
394 system_if_clear_entries(dev, RTM_GETROUTE, AF_INET);
395 system_if_clear_entries(dev, RTM_GETADDR, AF_INET);
396 system_if_clear_entries(dev, RTM_GETROUTE, AF_INET6);
397 system_if_clear_entries(dev, RTM_GETADDR, AF_INET6);
398 system_set_disable_ipv6(dev, "0");
399 }
400
401 static inline unsigned long
402 sec_to_jiffies(int val)
403 {
404 return (unsigned long) val * 100;
405 }
406
407 int system_bridge_addbr(struct device *bridge, struct bridge_config *cfg)
408 {
409 unsigned long args[4] = {};
410
411 if (ioctl(sock_ioctl, SIOCBRADDBR, bridge->ifname) < 0)
412 return -1;
413
414 args[0] = BRCTL_SET_BRIDGE_STP_STATE;
415 args[1] = !!cfg->stp;
416 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
417
418 args[0] = BRCTL_SET_BRIDGE_FORWARD_DELAY;
419 args[1] = sec_to_jiffies(cfg->forward_delay);
420 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
421
422 system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/multicast_snooping",
423 bridge->ifname, cfg->igmp_snoop ? "1" : "0");
424
425 if (cfg->flags & BRIDGE_OPT_AGEING_TIME) {
426 args[0] = BRCTL_SET_AGEING_TIME;
427 args[1] = sec_to_jiffies(cfg->ageing_time);
428 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
429 }
430
431 if (cfg->flags & BRIDGE_OPT_HELLO_TIME) {
432 args[0] = BRCTL_SET_BRIDGE_HELLO_TIME;
433 args[1] = sec_to_jiffies(cfg->hello_time);
434 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
435 }
436
437 if (cfg->flags & BRIDGE_OPT_MAX_AGE) {
438 args[0] = BRCTL_SET_BRIDGE_MAX_AGE;
439 args[1] = sec_to_jiffies(cfg->max_age);
440 system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
441 }
442
443 return 0;
444 }
445
446 static int system_vlan(struct device *dev, int id)
447 {
448 struct vlan_ioctl_args ifr = {
449 .cmd = SET_VLAN_NAME_TYPE_CMD,
450 .u.name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD,
451 };
452
453 ioctl(sock_ioctl, SIOCSIFVLAN, &ifr);
454
455 if (id < 0) {
456 ifr.cmd = DEL_VLAN_CMD;
457 ifr.u.VID = 0;
458 } else {
459 ifr.cmd = ADD_VLAN_CMD;
460 ifr.u.VID = id;
461 }
462 strncpy(ifr.device1, dev->ifname, sizeof(ifr.device1));
463 return ioctl(sock_ioctl, SIOCSIFVLAN, &ifr);
464 }
465
466 int system_vlan_add(struct device *dev, int id)
467 {
468 return system_vlan(dev, id);
469 }
470
471 int system_vlan_del(struct device *dev)
472 {
473 return system_vlan(dev, -1);
474 }
475
476 static void
477 system_if_apply_settings(struct device *dev)
478 {
479 struct ifreq ifr;
480
481 memset(&ifr, 0, sizeof(ifr));
482 strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name));
483 if (dev->flags & DEV_OPT_MTU) {
484 ifr.ifr_mtu = dev->mtu;
485 ioctl(sock_ioctl, SIOCSIFMTU, &ifr);
486 }
487 if (dev->flags & DEV_OPT_TXQUEUELEN) {
488 ifr.ifr_qlen = dev->txqueuelen;
489 ioctl(sock_ioctl, SIOCSIFTXQLEN, &ifr);
490 }
491 if (dev->flags & DEV_OPT_MACADDR) {
492 memcpy(&ifr.ifr_hwaddr, dev->macaddr, sizeof(dev->macaddr));
493 ioctl(sock_ioctl, SIOCSIFHWADDR, &ifr);
494 }
495
496 dev->ifindex = system_if_resolve(dev);
497 }
498
499 int system_if_up(struct device *dev)
500 {
501 system_if_apply_settings(dev);
502 return system_if_flags(dev->ifname, IFF_UP, 0);
503 }
504
505 int system_if_down(struct device *dev)
506 {
507 return system_if_flags(dev->ifname, 0, IFF_UP);
508 }
509
510 int system_if_check(struct device *dev)
511 {
512 device_set_present(dev, (system_if_resolve(dev) >= 0));
513 return 0;
514 }
515
516 int system_if_dump_stats(struct device *dev, struct blob_buf *b)
517 {
518 const char *const counters[] = {
519 "collisions", "rx_frame_errors", "tx_compressed",
520 "multicast", "rx_length_errors", "tx_dropped",
521 "rx_bytes", "rx_missed_errors", "tx_errors",
522 "rx_compressed", "rx_over_errors", "tx_fifo_errors",
523 "rx_crc_errors", "rx_packets", "tx_heartbeat_errors",
524 "rx_dropped", "tx_aborted_errors", "tx_packets",
525 "rx_errors", "tx_bytes", "tx_window_errors",
526 "rx_fifo_errors", "tx_carrier_errors",
527 };
528 char buf[64];
529 int stats_dir;
530 int i, fd, len;
531
532 snprintf(buf, sizeof(buf), "/sys/class/net/%s/statistics", dev->ifname);
533 stats_dir = open(buf, O_DIRECTORY);
534 if (stats_dir < 0)
535 return -1;
536
537 for (i = 0; i < ARRAY_SIZE(counters); i++) {
538 fd = openat(stats_dir, counters[i], O_RDONLY);
539 if (fd < 0)
540 continue;
541
542 retry:
543 len = read(fd, buf, sizeof(buf));
544 if (len < 0) {
545 if (errno == EINTR)
546 goto retry;
547 continue;
548 }
549
550 buf[len] = 0;
551 blobmsg_add_u32(b, counters[i], strtoul(buf, NULL, 0));
552 close(fd);
553 }
554
555 close(stats_dir);
556 return 0;
557 }
558
559 static int system_addr(struct device *dev, struct device_addr *addr, int cmd)
560 {
561 int alen = ((addr->flags & DEVADDR_FAMILY) == DEVADDR_INET4) ? 4 : 16;
562 struct ifaddrmsg ifa = {
563 .ifa_family = (alen == 4) ? AF_INET : AF_INET6,
564 .ifa_prefixlen = addr->mask,
565 .ifa_index = dev->ifindex,
566 };
567
568 struct nl_msg *msg;
569
570 dev = addr->device;
571 if (dev) {
572 if (!dev->ifindex)
573 return -1;
574
575 ifa.ifa_index = dev->ifindex;
576 }
577
578 msg = nlmsg_alloc_simple(cmd, 0);
579 if (!msg)
580 return -1;
581
582 nlmsg_append(msg, &ifa, sizeof(ifa), 0);
583 nla_put(msg, IFA_LOCAL, alen, &addr->addr);
584 return system_rtnl_call(msg);
585 }
586
587 int system_add_address(struct device *dev, struct device_addr *addr)
588 {
589 return system_addr(dev, addr, RTM_NEWADDR);
590 }
591
592 int system_del_address(struct device *dev, struct device_addr *addr)
593 {
594 return system_addr(dev, addr, RTM_DELADDR);
595 }
596
597 static int system_rt(struct device *dev, struct device_route *route, int cmd)
598 {
599 int alen = ((route->flags & DEVADDR_FAMILY) == DEVADDR_INET4) ? 4 : 16;
600 bool have_gw;
601 unsigned int flags = 0;
602 int ifindex = dev->ifindex;
603
604 if (alen == 4)
605 have_gw = !!route->nexthop.in.s_addr;
606 else
607 have_gw = route->nexthop.in6.s6_addr32[0] ||
608 route->nexthop.in6.s6_addr32[1] ||
609 route->nexthop.in6.s6_addr32[2] ||
610 route->nexthop.in6.s6_addr32[3];
611
612 unsigned char scope = (cmd == RTM_DELROUTE) ? RT_SCOPE_NOWHERE :
613 (have_gw) ? RT_SCOPE_UNIVERSE : RT_SCOPE_LINK;
614
615 struct rtmsg rtm = {
616 .rtm_family = (alen == 4) ? AF_INET : AF_INET6,
617 .rtm_dst_len = route->mask,
618 .rtm_table = RT_TABLE_MAIN,
619 .rtm_protocol = RTPROT_BOOT,
620 .rtm_scope = scope,
621 .rtm_type = (cmd == RTM_DELROUTE) ? 0: RTN_UNICAST,
622 };
623 struct nl_msg *msg;
624
625 if (cmd == RTM_NEWROUTE)
626 flags |= NLM_F_CREATE | NLM_F_REPLACE;
627
628 dev = route->device;
629 if (dev) {
630 if (!dev->ifindex)
631 return -1;
632
633 ifindex = dev->ifindex;
634 }
635
636 msg = nlmsg_alloc_simple(cmd, flags);
637 if (!msg)
638 return -1;
639
640 nlmsg_append(msg, &rtm, sizeof(rtm), 0);
641
642 if (route->mask)
643 nla_put(msg, RTA_DST, alen, &route->addr);
644
645 if (have_gw)
646 nla_put(msg, RTA_GATEWAY, alen, &route->nexthop);
647
648 if (route->flags & DEVADDR_DEVICE)
649 nla_put_u32(msg, RTA_OIF, ifindex);
650
651 return system_rtnl_call(msg);
652 }
653
654 int system_add_route(struct device *dev, struct device_route *route)
655 {
656 return system_rt(dev, route, RTM_NEWROUTE);
657 }
658
659 int system_del_route(struct device *dev, struct device_route *route)
660 {
661 return system_rt(dev, route, RTM_DELROUTE);
662 }
663
664 int system_flush_routes(void)
665 {
666 const char *names[] = {
667 "/proc/sys/net/ipv4/route/flush",
668 "/proc/sys/net/ipv6/route/flush"
669 };
670 int fd, i;
671
672 for (i = 0; i < ARRAY_SIZE(names); i++) {
673 fd = open(names[i], O_WRONLY);
674 if (fd < 0)
675 continue;
676
677 write(fd, "-1", 2);
678 close(fd);
679 }
680 return 0;
681 }
682
683 time_t system_get_rtime(void)
684 {
685 struct timespec ts;
686 struct timeval tv;
687
688 if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts) == 0)
689 return ts.tv_sec;
690
691 if (gettimeofday(&tv, NULL) == 0)
692 return tv.tv_sec;
693
694 return 0;
695 }