interface: allow renaming interface when moving to jail netns
[project/netifd.git] / system-linux.c
index 9694c6f68d1f5ceda63dcfcc9928ac6df86115e8..04b9bdfc486e3888ee45c715bac05b9f5d63b209 100644 (file)
 #include <net/if.h>
 #include <net/if_arp.h>
 
+#include <limits.h>
 #include <arpa/inet.h>
 #include <netinet/ether.h>
 #include <netinet/in.h>
 
 #include <linux/rtnetlink.h>
+#include <linux/neighbour.h>
 #include <linux/sockios.h>
 #include <linux/ip.h>
 #include <linux/if_addr.h>
@@ -44,6 +46,8 @@
 #include <linux/veth.h>
 #include <linux/version.h>
 
+#include <sched.h>
+
 #ifndef RTN_FAILED_POLICY
 #define RTN_FAILED_POLICY 12
 #endif
@@ -138,8 +142,10 @@ create_socket(int protocol, int groups)
        if (groups)
                nl_join_groups(sock, groups);
 
-       if (nl_connect(sock, protocol))
+       if (nl_connect(sock, protocol)) {
+               nl_socket_free(sock);
                return NULL;
+       }
 
        return sock;
 }
@@ -181,6 +187,21 @@ create_event_socket(struct event_socket *ev, int protocol,
        return true;
 }
 
+static bool
+create_hotplug_event_socket(struct event_socket *ev, int protocol,
+                           void (*cb)(struct uloop_fd *u, unsigned int events))
+{
+       if (!create_raw_event_socket(ev, protocol, 1, cb, ULOOP_ERROR_CB))
+               return false;
+
+       /* Increase rx buffer size to 65K on event sockets */
+       ev->bufsize = 65535;
+       if (nl_socket_set_buffer_size(ev->sock, ev->bufsize, 0))
+               return false;
+
+       return true;
+}
+
 static bool
 system_rtn_aton(const char *src, unsigned int *dst)
 {
@@ -249,8 +270,8 @@ int system_init(void)
        if (!create_event_socket(&rtnl_event, NETLINK_ROUTE, cb_rtnl_event))
                return -1;
 
-       if (!create_raw_event_socket(&hotplug_event, NETLINK_KOBJECT_UEVENT, 1,
-                                       handle_hotplug_event, 0))
+       if (!create_hotplug_event_socket(&hotplug_event, NETLINK_KOBJECT_UEVENT,
+                                        handle_hotplug_event))
                return -1;
 
        /* Receive network link events form kernel */
@@ -403,6 +424,36 @@ static void system_bridge_set_startup_query_interval(struct device *dev, const c
                              dev->ifname, val);
 }
 
+static void system_bridge_set_stp_state(struct device *dev, const char *val)
+{
+       system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/stp_state", dev->ifname, val);
+}
+
+static void system_bridge_set_forward_delay(struct device *dev, const char *val)
+{
+       system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/forward_delay", dev->ifname, val);
+}
+
+static void system_bridge_set_priority(struct device *dev, const char *val)
+{
+       system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/priority", dev->ifname, val);
+}
+
+static void system_bridge_set_ageing_time(struct device *dev, const char *val)
+{
+       system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/ageing_time", dev->ifname, val);
+}
+
+static void system_bridge_set_hello_time(struct device *dev, const char *val)
+{
+       system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/hello_time", dev->ifname, val);
+}
+
+static void system_bridge_set_max_age(struct device *dev, const char *val)
+{
+       system_set_dev_sysctl("/sys/devices/virtual/net/%s/bridge/max_age", dev->ifname, val);
+}
+
 static void system_bridge_set_learning(struct device *dev, const char *val)
 {
        system_set_dev_sysctl("/sys/class/net/%s/brport/learning", dev->ifname, val);
@@ -630,13 +681,39 @@ handle_hotplug_event(struct uloop_fd *u, unsigned int events)
        struct sockaddr_nl nla;
        unsigned char *buf = NULL;
        int size;
+       int err;
+       socklen_t errlen = sizeof(err);
+
+       if (!u->error) {
+               while ((size = nl_recv(ev->sock, &nla, &buf, NULL)) > 0) {
+                       if (nla.nl_pid == 0)
+                               handle_hotplug_msg((char *) buf, size);
 
-       while ((size = nl_recv(ev->sock, &nla, &buf, NULL)) > 0) {
-               if (nla.nl_pid == 0)
-                       handle_hotplug_msg((char *) buf, size);
+                       free(buf);
+               }
+               return;
+       }
+
+       if (getsockopt(u->fd, SOL_SOCKET, SO_ERROR, (void *)&err, &errlen))
+               goto abort;
+
+       switch(err) {
+       case ENOBUFS:
+               /* Increase rx buffer size on netlink socket */
+               ev->bufsize *= 2;
+               if (nl_socket_set_buffer_size(ev->sock, ev->bufsize, 0))
+                       goto abort;
+               break;
 
-               free(buf);
+       default:
+               goto abort;
        }
+       u->error = false;
+       return;
+
+abort:
+       uloop_fd_delete(&ev->uloop);
+       return;
 }
 
 static int system_rtnl_call(struct nl_msg *msg)
@@ -766,16 +843,21 @@ int system_bridge_delif(struct device *bridge, struct device *dev)
        return system_bridge_if(bridge->ifname, dev, SIOCBRDELIF, NULL);
 }
 
-int system_if_resolve(struct device *dev)
+static int system_ifname_resolve(const char *ifname)
 {
        struct ifreq ifr;
-       strncpy(ifr.ifr_name, dev->ifname, sizeof(ifr.ifr_name) - 1);
+       strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name) - 1);
        if (!ioctl(sock_ioctl, SIOCGIFINDEX, &ifr))
                return ifr.ifr_ifindex;
        else
                return 0;
 }
 
+int system_if_resolve(struct device *dev)
+{
+       return system_ifname_resolve(dev->ifname);
+}
+
 static int system_if_flags(const char *ifname, unsigned add, unsigned rem)
 {
        struct ifreq ifr;
@@ -832,7 +914,7 @@ static int cb_clear_event(struct nl_msg *msg, void *arg)
        struct clear_data *clr = arg;
        struct nlmsghdr *hdr = nlmsg_hdr(msg);
        bool (*cb)(struct nlmsghdr *, int ifindex);
-       int type;
+       int type, ret;
 
        switch(clr->type) {
        case RTM_GETADDR:
@@ -869,13 +951,23 @@ static int cb_clear_event(struct nl_msg *msg, void *arg)
                D(SYSTEM, "Remove %s from device %s\n",
                  type == RTM_DELADDR ? "an address" : "a route",
                  clr->dev->ifname);
+
        memcpy(nlmsg_hdr(clr->msg), hdr, hdr->nlmsg_len);
        hdr = nlmsg_hdr(clr->msg);
        hdr->nlmsg_type = type;
        hdr->nlmsg_flags = NLM_F_REQUEST;
 
        nl_socket_disable_auto_ack(sock_rtnl);
-       nl_send_auto_complete(sock_rtnl, clr->msg);
+       ret = nl_send_auto_complete(sock_rtnl, clr->msg);
+       if (ret < 0) {
+               if (type == RTM_DELRULE)
+                       D(SYSTEM, "Error deleting a rule: %d\n", ret);
+               else
+                       D(SYSTEM, "Error deleting %s from device '%s': %d\n",
+                               type == RTM_DELADDR ? "an address" : "a route",
+                               clr->dev->ifname, ret);
+       }
+
        nl_socket_enable_auto_ack(sock_rtnl);
 
        return NL_SKIP;
@@ -901,7 +993,7 @@ static void
 system_if_clear_entries(struct device *dev, int type, int af)
 {
        struct clear_data clr;
-       struct nl_cb *cb = nl_cb_alloc(NL_CB_DEFAULT);
+       struct nl_cb *cb;
        struct rtmsg rtm = {
                .rtm_family = af,
                .rtm_flags = RTM_F_CLONED,
@@ -924,6 +1016,7 @@ system_if_clear_entries(struct device *dev, int type, int af)
                return;
        }
 
+       cb = nl_cb_alloc(NL_CB_DEFAULT);
        if (!cb)
                return;
 
@@ -936,10 +1029,13 @@ system_if_clear_entries(struct device *dev, int type, int af)
        nl_cb_set(cb, NL_CB_FINISH, NL_CB_CUSTOM, cb_finish_event, &pending);
        nl_cb_err(cb, NL_CB_CUSTOM, error_handler, &pending);
 
-       nl_send_auto_complete(sock_rtnl, clr.msg);
+       if (nl_send_auto_complete(sock_rtnl, clr.msg) < 0)
+               goto free;
+
        while (pending > 0)
                nl_recvmsgs(sock_rtnl, cb);
 
+free:
        nlmsg_free(clr.msg);
 out:
        nl_cb_put(cb);
@@ -952,8 +1048,8 @@ void system_if_clear_state(struct device *dev)
 {
        static char buf[256];
        char *bridge;
-
        device_set_ifindex(dev, system_if_resolve(dev));
+
        if (dev->external || !dev->ifindex)
                return;
 
@@ -975,6 +1071,8 @@ void system_if_clear_state(struct device *dev)
        system_if_clear_entries(dev, RTM_GETADDR, AF_INET);
        system_if_clear_entries(dev, RTM_GETROUTE, AF_INET6);
        system_if_clear_entries(dev, RTM_GETADDR, AF_INET6);
+       system_if_clear_entries(dev, RTM_GETNEIGH, AF_INET);
+       system_if_clear_entries(dev, RTM_GETNEIGH, AF_INET6);
        system_set_disable_ipv6(dev, "0");
 }
 
@@ -1061,41 +1159,33 @@ static void system_bridge_conf_multicast(struct device *bridge,
 int system_bridge_addbr(struct device *bridge, struct bridge_config *cfg)
 {
        char buf[64];
-       unsigned long args[4] = {};
 
        if (ioctl(sock_ioctl, SIOCBRADDBR, bridge->ifname) < 0)
                return -1;
 
-       args[0] = BRCTL_SET_BRIDGE_STP_STATE;
-       args[1] = !!cfg->stp;
-       system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+       system_bridge_set_stp_state(bridge, cfg->stp ? "1" : "0");
 
-       args[0] = BRCTL_SET_BRIDGE_FORWARD_DELAY;
-       args[1] = sec_to_jiffies(cfg->forward_delay);
-       system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+       snprintf(buf, sizeof(buf), "%lu", sec_to_jiffies(cfg->forward_delay));
+       system_bridge_set_forward_delay(bridge, buf);
 
        system_bridge_conf_multicast(bridge, cfg, buf, sizeof(buf));
 
-       args[0] = BRCTL_SET_BRIDGE_PRIORITY;
-       args[1] = cfg->priority;
-       system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+       snprintf(buf, sizeof(buf), "%d", cfg->priority);
+       system_bridge_set_priority(bridge, buf);
 
        if (cfg->flags & BRIDGE_OPT_AGEING_TIME) {
-               args[0] = BRCTL_SET_AGEING_TIME;
-               args[1] = sec_to_jiffies(cfg->ageing_time);
-               system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+               snprintf(buf, sizeof(buf), "%lu", sec_to_jiffies(cfg->ageing_time));
+               system_bridge_set_ageing_time(bridge, buf);
        }
 
        if (cfg->flags & BRIDGE_OPT_HELLO_TIME) {
-               args[0] = BRCTL_SET_BRIDGE_HELLO_TIME;
-               args[1] = sec_to_jiffies(cfg->hello_time);
-               system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+               snprintf(buf, sizeof(buf), "%lu", sec_to_jiffies(cfg->hello_time));
+               system_bridge_set_hello_time(bridge, buf);
        }
 
        if (cfg->flags & BRIDGE_OPT_MAX_AGE) {
-               args[0] = BRCTL_SET_BRIDGE_MAX_AGE;
-               args[1] = sec_to_jiffies(cfg->max_age);
-               system_bridge_if(bridge->ifname, NULL, SIOCDEVPRIVATE, &args);
+               snprintf(buf, sizeof(buf), "%lu", sec_to_jiffies(cfg->max_age));
+               system_bridge_set_max_age(bridge, buf);
        }
 
        return 0;
@@ -1161,6 +1251,27 @@ nla_put_failure:
        return -ENOMEM;
 }
 
+int system_link_netns_move(const char *ifname, int netns_fd, const char *target_ifname)
+{
+       struct nl_msg *msg;
+       struct ifinfomsg iim = {
+               .ifi_family = AF_UNSPEC,
+       };
+
+       iim.ifi_index = system_ifname_resolve(ifname);
+       msg = nlmsg_alloc_simple(RTM_NEWLINK, NLM_F_REQUEST);
+
+       if (!msg)
+               return -1;
+
+       nlmsg_append(msg, &iim, sizeof(iim), 0);
+       if (target_ifname)
+               nla_put_string(msg, IFLA_IFNAME, target_ifname);
+
+       nla_put_u32(msg, IFLA_NET_NS_FD, netns_fd);
+       return system_rtnl_call(msg);
+}
+
 static int system_link_del(const char *ifname)
 {
        struct nl_msg *msg;
@@ -1184,6 +1295,20 @@ int system_macvlan_del(struct device *macvlan)
        return system_link_del(macvlan->ifname);
 }
 
+int system_netns_open(const pid_t target_ns)
+{
+       char pid_net_path[PATH_MAX];
+
+       snprintf(pid_net_path, sizeof(pid_net_path), "/proc/%u/ns/net", target_ns);
+
+       return open(pid_net_path, O_RDONLY);
+}
+
+int system_netns_set(int netns_fd)
+{
+       return setns(netns_fd, CLONE_NEWNET);
+}
+
 int system_veth_add(struct device *veth, struct veth_config *cfg)
 {
        struct nl_msg *msg;
@@ -1590,7 +1715,10 @@ int system_if_check(struct device *dev)
        nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, cb_if_check_ack, &chk);
        nl_cb_err(cb, NL_CB_CUSTOM, cb_if_check_error, &chk);
 
-       nl_send_auto_complete(sock_rtnl, msg);
+       ret = nl_send_auto_complete(sock_rtnl, msg);
+       if (ret < 0)
+               goto free;
+
        while (chk.pending > 0)
                nl_recvmsgs(sock_rtnl, cb);
 
@@ -1867,6 +1995,46 @@ int system_del_address(struct device *dev, struct device_addr *addr)
        return system_addr(dev, addr, RTM_DELADDR);
 }
 
+static int system_neigh(struct device *dev, struct device_neighbor *neighbor, int cmd)
+{
+       int alen = ((neighbor->flags & DEVADDR_FAMILY) == DEVADDR_INET4) ? 4 : 16;
+       unsigned int flags = 0;
+       struct ndmsg ndm = {
+               .ndm_family = (alen == 4) ? AF_INET : AF_INET6,
+               .ndm_ifindex = dev->ifindex,
+               .ndm_state = NUD_PERMANENT,
+               .ndm_flags = (neighbor->proxy ? NTF_PROXY : 0) | (neighbor->router ? NTF_ROUTER : 0),
+       };
+       struct nl_msg *msg;
+
+       if (cmd == RTM_NEWNEIGH)
+               flags |= NLM_F_CREATE | NLM_F_REPLACE;
+
+       msg = nlmsg_alloc_simple(cmd, flags);
+
+       if (!msg)
+               return -1;
+
+       nlmsg_append(msg, &ndm, sizeof(ndm), 0);
+
+       nla_put(msg, NDA_DST, alen, &neighbor->addr);
+       if (neighbor->flags & DEVNEIGH_MAC)
+               nla_put(msg, NDA_LLADDR, sizeof(neighbor->macaddr), &neighbor->macaddr);
+
+
+       return system_rtnl_call(msg);
+}
+
+int system_add_neighbor(struct device *dev, struct device_neighbor *neighbor)
+{
+       return system_neigh(dev, neighbor, RTM_NEWNEIGH);
+}
+
+int system_del_neighbor(struct device *dev, struct device_neighbor *neighbor)
+{
+       return system_neigh(dev, neighbor, RTM_DELNEIGH);
+}
+
 static int system_rt(struct device *dev, struct device_route *route, int cmd)
 {
        int alen = ((route->flags & DEVADDR_FAMILY) == DEVADDR_INET4) ? 4 : 16;
@@ -2261,7 +2429,7 @@ time_t system_get_rtime(void)
        struct timespec ts;
        struct timeval tv;
 
-       if (syscall(__NR_clock_gettime, CLOCK_MONOTONIC, &ts) == 0)
+       if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0)
                return ts.tv_sec;
 
        if (gettimeofday(&tv, NULL) == 0)
@@ -2768,6 +2936,63 @@ failure:
 }
 #endif
 
+#ifdef IFLA_XFRM_MAX
+static int system_add_xfrm_tunnel(const char *name, const char *kind,
+                                const unsigned int link, struct blob_attr **tb)
+{
+       struct nl_msg *nlm;
+       struct ifinfomsg ifi = { .ifi_family = AF_UNSPEC, };
+       struct blob_attr *cur;
+       int ret = 0;
+
+       nlm = nlmsg_alloc_simple(RTM_NEWLINK, NLM_F_REQUEST | NLM_F_REPLACE | NLM_F_CREATE);
+       if (!nlm)
+               return -1;
+
+       nlmsg_append(nlm, &ifi, sizeof(ifi), 0);
+       nla_put_string(nlm, IFLA_IFNAME, name);
+
+       struct nlattr *linkinfo = nla_nest_start(nlm, IFLA_LINKINFO);
+       if (!linkinfo) {
+               ret = -ENOMEM;
+               goto failure;
+       }
+
+       nla_put_string(nlm, IFLA_INFO_KIND, kind);
+       struct nlattr *infodata = nla_nest_start(nlm, IFLA_INFO_DATA);
+       if (!infodata) {
+               ret = -ENOMEM;
+               goto failure;
+       }
+
+       if (link)
+               nla_put_u32(nlm, IFLA_XFRM_LINK, link);
+
+       if ((cur = tb[TUNNEL_ATTR_DATA])) {
+               struct blob_attr *tb_data[__XFRM_DATA_ATTR_MAX];
+               uint32_t if_id = 0;
+
+               blobmsg_parse(xfrm_data_attr_list.params, __XFRM_DATA_ATTR_MAX, tb_data,
+                       blobmsg_data(cur), blobmsg_len(cur));
+
+               if ((cur = tb_data[XFRM_DATA_IF_ID])) {
+                       if ((if_id = blobmsg_get_u32(cur)))
+                               nla_put_u32(nlm, IFLA_XFRM_IF_ID, if_id);
+               }
+
+       }
+
+       nla_nest_end(nlm, infodata);
+       nla_nest_end(nlm, linkinfo);
+
+       return system_rtnl_call(nlm);
+
+failure:
+       nlmsg_free(nlm);
+       return ret;
+}
+#endif
+
 #ifdef IFLA_VXLAN_MAX
 static int system_add_vxlan(const char *name, const unsigned int link, struct blob_attr **tb, bool v6)
 {
@@ -3053,7 +3278,8 @@ static int __system_del_ip_tunnel(const char *name, struct blob_attr **tb)
        if (!strcmp(str, "greip") || !strcmp(str, "gretapip") ||
            !strcmp(str, "greip6") || !strcmp(str, "gretapip6") ||
            !strcmp(str, "vtiip") || !strcmp(str, "vtiip6") ||
-           !strcmp(str, "vxlan") || !strcmp(str, "vxlan6"))
+           !strcmp(str, "vxlan") || !strcmp(str, "vxlan6") ||
+           !strcmp(str, "xfrm"))
                return system_link_del(name);
        else
                return tunnel_ioctl(name, SIOCDELTUNNEL, NULL);
@@ -3150,6 +3376,10 @@ int system_add_ip_tunnel(const char *name, struct blob_attr *attr)
        } else if (!strcmp(str, "vtiip6")) {
                return system_add_vti_tunnel(name, "vti6", link, tb, true);
 #endif
+#ifdef IFLA_XFRM_MAX
+       } else if (!strcmp(str, "xfrm")) {
+               return system_add_xfrm_tunnel(name, "xfrm", link, tb);
+#endif
 #ifdef IFLA_VXLAN_MAX
        } else if(!strcmp(str, "vxlan")) {
                return system_add_vxlan(name, link, tb, false);