/*
* Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
*/
+#define _GNU_SOURCE
#include <sys/types.h>
#include <sys/socket.h>
-#include <sys/wait.h>
#include <sys/ioctl.h>
#include <net/if_arp.h>
#include <net/if.h>
+#include <netinet/if_ether.h>
#include <unistd.h>
#include <errno.h>
+#include <netlink/msg.h>
+#include <netlink/attr.h>
+#include <netlink/socket.h>
+
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+
#include <libubox/vlist.h>
#include <libubox/avl-cmp.h>
#include <libubox/uloop.h>
static VLIST_TREE(devices, avl_strcmp, interface_update_cb, true, false);
static VLIST_TREE(interfaces, avl_strcmp, interface_update_cb, true, false);
static int socket_fd;
+static struct nl_sock *rtnl_sock;
#define APPEND(_buf, _ofs, _format, ...) _ofs += snprintf(_buf + _ofs, sizeof(_buf) - _ofs, _format, ##__VA_ARGS__)
return ifname;
}
-static int run_cmd(char *cmd, bool ignore)
+static int
+prepare_qdisc_cmd(char *buf, int len, const char *dev, bool add, const char *type)
{
- char *argv[] = { "sh", "-c", cmd, NULL };
- bool first = true;
- int status = -1;
- char buf[512];
- int fds[2];
- FILE *f;
- int pid;
-
- if (pipe(fds))
- return -1;
-
- pid = fork();
- if (!pid) {
- close(fds[0]);
- if (fds[1] != STDOUT_FILENO)
- dup2(fds[1], STDOUT_FILENO);
- if (fds[1] != STDERR_FILENO)
- dup2(fds[1], STDERR_FILENO);
- if (fds[1] > STDERR_FILENO)
- close(fds[1]);
- execv("/bin/sh", argv);
- exit(1);
- }
-
- if (pid < 0)
- return -1;
-
- close(fds[1]);
- f = fdopen(fds[0], "r");
- if (!f) {
- close(fds[0]);
- goto out;
- }
-
- while (fgets(buf, sizeof(buf), f) != NULL) {
- if (!strlen(buf))
- break;
- if (ignore)
- continue;
- if (first) {
- ULOG_WARN("Command: %s\n", cmd);
- first = false;
- }
- ULOG_WARN("%s%s", buf, strchr(buf, '\n') ? "" : "\n");
- }
-
- fclose(f);
-
-out:
- while (waitpid(pid, &status, 0) < 0)
- if (errno != EINTR)
- break;
-
- return status;
+ return snprintf(buf, len, "tc qdisc %s dev '%s' %s",
+ add ? "add" : "del", dev, type);
}
static int
-prepare_tc_cmd(char *buf, int len, const char *type, const char *cmd,
- const char *dev, const char *extra)
+prepare_filter_cmd(char *buf, int len, const char *dev, int prio, bool add, bool egress)
{
- return snprintf(buf, len, "tc %s %s dev '%s' %s", type, cmd, dev, extra);
+ return snprintf(buf, len, "tc filter %s dev '%s' %sgress prio %d",
+ add ? "add" : "del", dev, egress ? "e" : "in", prio);
}
static int
-cmd_del_qdisc(const char *ifname, const char *type)
+cmd_add_bpf_filter(const char *ifname, int prio, bool egress, bool eth)
{
- char buf[64];
+ struct tcmsg tcmsg = {
+ .tcm_family = AF_UNSPEC,
+ .tcm_ifindex = if_nametoindex(ifname),
+ };
+ struct nl_msg *msg;
+ struct nlattr *opts;
+ const char *suffix;
+ int prog_fd = -1;
+ char name[32];
+
+ suffix = qosify_get_program(!egress * QOSIFY_INGRESS + !eth * QOSIFY_IP_ONLY, &prog_fd);
+ if (!suffix)
+ return -1;
+
+ snprintf(name, sizeof(name), "qosify_%s", suffix);
+
+ if (egress)
+ tcmsg.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_EGRESS);
+ else
+ tcmsg.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
- prepare_tc_cmd(buf, sizeof(buf), "qdisc", "del", ifname, type);
+ tcmsg.tcm_info = TC_H_MAKE(prio << 16, htons(ETH_P_ALL));
- return run_cmd(buf, true);
+ msg = nlmsg_alloc_simple(RTM_NEWTFILTER, NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
+ nlmsg_append(msg, &tcmsg, sizeof(tcmsg), NLMSG_ALIGNTO);
+ nla_put_string(msg, TCA_KIND, "bpf");
+
+ opts = nla_nest_start(msg, TCA_OPTIONS);
+ nla_put_u32(msg, TCA_BPF_FD, prog_fd);
+ nla_put_string(msg, TCA_BPF_NAME, name);
+ nla_put_u32(msg, TCA_BPF_FLAGS, TCA_BPF_FLAG_ACT_DIRECT);
+ nla_put_u32(msg, TCA_BPF_FLAGS_GEN, TCA_CLS_FLAGS_SKIP_HW);
+ nla_nest_end(msg, opts);
+
+ nl_send_auto_complete(rtnl_sock, msg);
+ nlmsg_free(msg);
+
+ return nl_wait_for_ack(rtnl_sock);
}
static int
char buf[512];
int ofs;
- cmd_del_qdisc(ifname, "root");
+ ofs = prepare_qdisc_cmd(buf, sizeof(buf), ifname, true, "clsact");
+ qosify_run_cmd(buf, true);
- ofs = prepare_tc_cmd(buf, sizeof(buf), "qdisc", "add", ifname, "root handle 1: cake");
+ ofs = prepare_qdisc_cmd(buf, sizeof(buf), ifname, true, "root cake");
if (bw)
APPEND(buf, ofs, " bandwidth %s", bw);
cfg->common_opts ? cfg->common_opts : "",
dir_opts ? dir_opts : "");
- run_cmd(buf, false);
-
- ofs = prepare_tc_cmd(buf, sizeof(buf), "filter", "add", ifname, "parent 1: bpf");
- APPEND(buf, ofs, " object-pinned /sys/fs/bpf/qosify_%sgress_%s verbose direct-action",
- egress ? "e" : "in",
- eth ? "eth" : "ip");
-
- return run_cmd(buf, false);
+ return qosify_run_cmd(buf, false);
}
static int
-cmd_del_ingress(struct qosify_iface *iface)
+cmd_add_ingress(struct qosify_iface *iface, bool eth)
{
+ const char *ifbdev = interface_ifb_name(iface);
char buf[256];
+ int prio = QOSIFY_PRIO_BASE;
+ int ofs;
- cmd_del_qdisc(iface->ifname, "handle ffff: ingress");
- snprintf(buf, sizeof(buf), "ip link del '%s'", interface_ifb_name(iface));
+ cmd_add_bpf_filter(iface->ifname, prio++, false, eth);
- return run_cmd(buf, true);
-}
+ ofs = prepare_filter_cmd(buf, sizeof(buf), iface->ifname, prio++, true, false);
+ APPEND(buf, ofs, " protocol ip u32 match ip sport 53 0xffff "
+ "flowid 1:1 action mirred egress redirect dev " QOSIFY_DNS_IFNAME);
+ qosify_run_cmd(buf, false);
+ ofs = prepare_filter_cmd(buf, sizeof(buf), iface->ifname, prio++, true, false);
+ APPEND(buf, ofs, " protocol 802.1Q u32 offset plus 4 match ip sport 53 0xffff "
+ "flowid 1:1 action mirred egress redirect dev " QOSIFY_DNS_IFNAME);
+ qosify_run_cmd(buf, false);
-static int
-cmd_add_ingress(struct qosify_iface *iface, bool eth)
-{
- const char *ifbdev = interface_ifb_name(iface);
- char buf[256];
- int ofs;
+ ofs = prepare_filter_cmd(buf, sizeof(buf), iface->ifname, prio++, true, false);
+ APPEND(buf, ofs, " protocol ipv6 u32 match ip6 sport 53 0xffff "
+ "flowid 1:1 action mirred egress redirect dev " QOSIFY_DNS_IFNAME);
+ qosify_run_cmd(buf, false);
+
+ ofs = prepare_filter_cmd(buf, sizeof(buf), iface->ifname, prio++, true, false);
+ APPEND(buf, ofs, " protocol ipv6 u32 offset plus 4 match ip6 sport 53 0xffff "
+ "flowid 1:1 action mirred egress redirect dev " QOSIFY_DNS_IFNAME);
+ qosify_run_cmd(buf, false);
- cmd_del_ingress(iface);
- ofs = prepare_tc_cmd(buf, sizeof(buf), "qdisc", "add", iface->ifname, " handle ffff: ingress");
- run_cmd(buf, false);
+ if (!iface->config.ingress)
+ return 0;
snprintf(buf, sizeof(buf), "ip link add '%s' type ifb", ifbdev);
- run_cmd(buf, false);
+ qosify_run_cmd(buf, false);
cmd_add_qdisc(iface, ifbdev, false, eth);
snprintf(buf, sizeof(buf), "ip link set dev '%s' up", ifbdev);
- run_cmd(buf, false);
+ qosify_run_cmd(buf, false);
+
+ ofs = prepare_filter_cmd(buf, sizeof(buf), iface->ifname, prio++, true, false);
+ APPEND(buf, ofs, " protocol all u32 match u32 0 0 flowid 1:1"
+ " action mirred egress redirect dev '%s'", ifbdev);
+ return qosify_run_cmd(buf, false);
+}
+
+static int cmd_add_egress(struct qosify_iface *iface, bool eth)
+{
+ if (!iface->config.egress)
+ return 0;
+
+ cmd_add_qdisc(iface, iface->ifname, true, eth);
+
+ return cmd_add_bpf_filter(iface->ifname, QOSIFY_PRIO_BASE, true, eth);
+}
- ofs = prepare_tc_cmd(buf, sizeof(buf), "filter", "add", iface->ifname, " parent ffff:");
- APPEND(buf, ofs, " protocol all prio 10 u32 match u32 0 0 "
- "flowid 1:1 action mirred egress redirect dev '%s'", ifbdev);
- return run_cmd(buf, false);
+static void
+interface_clear_qdisc(struct qosify_iface *iface)
+{
+ char buf[64];
+ int i;
+
+ prepare_qdisc_cmd(buf, sizeof(buf), iface->ifname, false, "root");
+ qosify_run_cmd(buf, true);
+
+ for (i = 0; i < 6; i++) {
+ prepare_filter_cmd(buf, sizeof(buf), iface->ifname, QOSIFY_PRIO_BASE + i, false, false);
+ qosify_run_cmd(buf, true);
+ }
+
+ prepare_filter_cmd(buf, sizeof(buf), iface->ifname, QOSIFY_PRIO_BASE, false, true);
+ qosify_run_cmd(buf, true);
+
+ snprintf(buf, sizeof(buf), "ip link del '%s'", interface_ifb_name(iface));
+ qosify_run_cmd(buf, true);
}
static void
eth = ifr.ifr_hwaddr.sa_family == ARPHRD_ETHER;
- if (iface->config.egress)
- cmd_add_qdisc(iface, iface->ifname, true, eth);
- if (iface->config.ingress)
- cmd_add_ingress(iface, eth);
+ interface_clear_qdisc(iface);
+ cmd_add_egress(iface, eth);
+ cmd_add_ingress(iface, eth);
iface->active = true;
}
ULOG_INFO("stop interface %s\n", iface->ifname);
iface->active = false;
- if (iface->config.egress)
- cmd_del_qdisc(iface->ifname, "root");
- if (iface->config.ingress)
- cmd_del_ingress(iface);
+ interface_clear_qdisc(iface);
}
static void
blobmsg_close_table(b, c);
}
+static int
+qosify_nl_error_cb(struct sockaddr_nl *nla, struct nlmsgerr *err,
+ void *arg)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr *) err - 1;
+ struct nlattr *tb[NLMSGERR_ATTR_MAX + 1];
+ struct nlattr *attrs;
+ int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+ int len = nlh->nlmsg_len;
+ const char *errstr = "(unknown)";
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return NL_STOP;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+ attrs = (void *) ((unsigned char *) nlh + ack_len);
+ len -= ack_len;
+
+ nla_parse(tb, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+ if (tb[NLMSGERR_ATTR_MSG])
+ errstr = nla_data(tb[NLMSGERR_ATTR_MSG]);
+
+ ULOG_ERR("Netlink error(%d): %s\n", err->error, errstr);
+
+ return NL_STOP;
+}
+
int qosify_iface_init(void)
{
+ int fd, opt;
+
socket_fd = socket(AF_UNIX, SOCK_DGRAM, 0);
if (socket < 0)
return -1;
+ rtnl_sock = nl_socket_alloc();
+ if (!rtnl_sock)
+ return -1;
+
+ if (nl_connect(rtnl_sock, NETLINK_ROUTE))
+ return -1;
+
+ nl_cb_err(nl_socket_get_cb(rtnl_sock), NL_CB_CUSTOM,
+ qosify_nl_error_cb, NULL);
+
+ fd = nl_socket_get_fd(rtnl_sock);
+ opt = 1;
+ setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &opt, sizeof(opt));
+
+ opt = 1;
+ setsockopt(fd, SOL_NETLINK, NETLINK_CAP_ACK, &opt, sizeof(opt));
+
return 0;
}
interface_stop(iface);
vlist_for_each_element(&devices, iface, node)
interface_stop(iface);
+
+ nl_socket_free(rtnl_sock);
}