struct flow_bucket {
__u32 last_update;
__u32 pkt_len_avg;
- __u16 pkt_count;
- __u8 dscp;
- __u8 bulk_timeout;
+ __u32 pkt_count;
+ __u32 bulk_timeout;
};
struct {
__uint(max_entries, 1);
} config SEC(".maps");
-typedef struct {
+struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(pinning, 1);
__type(key, __u32);
- __type(value, struct qosify_dscp_val);
+ __type(value, __u8);
__uint(max_entries, 1 << 16);
-} port_array_t;
+} tcp_ports SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(pinning, 1);
+ __type(key, __u32);
+ __type(value, __u8);
+ __uint(max_entries, 1 << 16);
+} udp_ports SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(pinning, 1);
__type(key, __u32);
- __uint(value_size, sizeof(struct flow_bucket));
+ __type(value, struct flow_bucket);
__uint(max_entries, QOSIFY_FLOW_BUCKETS);
} flow_map SEC(".maps");
-port_array_t tcp_ports SEC(".maps");
-port_array_t udp_ports SEC(".maps");
-
struct {
__uint(type, BPF_MAP_TYPE_HASH);
__uint(pinning, 1);
__uint(map_flags, BPF_F_NO_PREALLOC);
} ipv6_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(pinning, 1);
+ __type(key, __u32);
+ __type(value, struct qosify_class);
+ __uint(max_entries, QOSIFY_MAX_CLASS_ENTRIES +
+ QOSIFY_DEFAULT_CLASS_ENTRIES);
+} class_map SEC(".maps");
+
static struct qosify_config *get_config(void)
{
__u32 key = 0;
}
static __always_inline void
-ipv4_change_dsfield(struct iphdr *iph, __u8 mask, __u8 value, bool force)
+ipv4_change_dsfield(struct __sk_buff *skb, __u32 offset,
+ __u8 mask, __u8 value, bool force)
{
- __u32 check = bpf_ntohs(iph->check);
+ struct iphdr *iph;
+ __u32 check;
__u8 dsfield;
+ iph = skb_ptr(skb, offset);
+ if (skb_check(skb, iph + 1))
+ return;
+
+ check = bpf_ntohs(iph->check);
if ((iph->tos & mask) && !force)
return;
}
static __always_inline void
-ipv6_change_dsfield(struct ipv6hdr *ipv6h, __u8 mask, __u8 value, bool force)
+ipv6_change_dsfield(struct __sk_buff *skb, __u32 offset,
+ __u8 mask, __u8 value, bool force)
{
- __u16 *p = (__u16 *)ipv6h;
+ struct ipv6hdr *ipv6h;
+ __u16 *p;
__u16 val;
+ ipv6h = skb_ptr(skb, offset);
+ if (skb_check(skb, ipv6h + 1))
+ return;
+
+ p = (__u16 *)ipv6h;
if (((*p >> 4) & mask) && !force)
return;
static void
parse_l4proto(struct qosify_config *config, struct __sk_buff *skb,
- __u32 offset, __u8 proto, __u8 *dscp_out, bool ingress)
+ __u32 offset, __u8 proto, bool ingress,
+ __u8 *out_val)
{
struct udphdr *udp;
__u32 src, dest, key;
- struct qosify_dscp_val *value;
+ __u8 *value;
udp = skb_ptr(skb, offset);
if (skb_check(skb, &udp->len))
return;
if (config && (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6)) {
- *dscp_out = dscp_val(&config->dscp_icmp, ingress);
+ *out_val = config->dscp_icmp;
return;
}
+ src = READ_ONCE(udp->source);
+ dest = READ_ONCE(udp->dest);
if (ingress)
- key = udp->source;
+ key = src;
else
- key = udp->dest;
+ key = dest;
if (proto == IPPROTO_TCP) {
value = bpf_map_lookup_elem(&tcp_ports, &key);
value = bpf_map_lookup_elem(&udp_ports, &key);
}
- if (!value)
- return;
-
- *dscp_out = dscp_val(value, ingress);
+ if (value)
+ *out_val = *value;
}
static __always_inline void
-check_flow(struct qosify_config *config, struct __sk_buff *skb,
- uint8_t *dscp, bool ingress)
+check_flow_bulk(struct qosify_flow_config *config, struct __sk_buff *skb,
+ struct flow_bucket *flow, __u8 *out_val)
{
- struct flow_bucket flow_data;
- struct flow_bucket *flow;
+ bool trigger = false;
__s32 delta;
- __u32 hash;
__u32 time;
+ int segs = 1;
- if (!(*dscp & QOSIFY_DSCP_DEFAULT_FLAG))
- return;
-
- if (!config)
- return;
-
- if (!config->bulk_trigger_pps &&
- !config->prio_max_avg_pkt_len)
+ if (!config->bulk_trigger_pps)
return;
time = cur_time();
- hash = bpf_get_hash_recalc(skb);
- flow = bpf_map_lookup_elem(&flow_map, &hash);
- if (!flow) {
- memset(&flow_data, 0, sizeof(flow_data));
- bpf_map_update_elem(&flow_map, &hash, &flow_data, BPF_ANY);
- flow = bpf_map_lookup_elem(&flow_map, &hash);
- if (!flow)
- return;
- }
-
if (!flow->last_update)
goto reset;
if ((u32)delta > FLOW_TIMEOUT)
goto reset;
+ if (skb->gso_segs)
+ segs = skb->gso_segs;
+ flow->pkt_count += segs;
+ if (flow->pkt_count > config->bulk_trigger_pps) {
+ flow->bulk_timeout = config->bulk_trigger_timeout + 1;
+ trigger = true;
+ }
+
if (delta >= FLOW_CHECK_INTERVAL) {
- if (flow->bulk_timeout) {
+ if (flow->bulk_timeout && !trigger)
flow->bulk_timeout--;
- if (!flow->bulk_timeout)
- flow->dscp = 0xff;
- }
goto clear;
}
- if (flow->pkt_count < 0xffff)
- flow->pkt_count++;
-
- if (config->bulk_trigger_pps &&
- flow->pkt_count > config->bulk_trigger_pps) {
- flow->dscp = dscp_val(&config->dscp_bulk, ingress);
- flow->bulk_timeout = config->bulk_trigger_timeout;
- }
-
-out:
- if (config->prio_max_avg_pkt_len &&
- flow->dscp != dscp_val(&config->dscp_bulk, ingress)) {
- if (ewma(&flow->pkt_len_avg, skb->len) <
- config->prio_max_avg_pkt_len)
- flow->dscp = dscp_val(&config->dscp_prio, ingress);
- else
- flow->dscp = 0xff;
- }
-
- if (flow->dscp != 0xff)
- *dscp = flow->dscp;
-
- return;
+ goto out;
reset:
- flow->dscp = 0xff;
flow->pkt_len_avg = 0;
clear:
flow->pkt_count = 1;
flow->last_update = time;
+out:
+ if (flow->bulk_timeout)
+ *out_val = config->dscp_bulk;
+}
- goto out;
+static __always_inline void
+check_flow_prio(struct qosify_flow_config *config, struct __sk_buff *skb,
+ struct flow_bucket *flow, __u8 *out_val)
+{
+ int cur_len = skb->len;
+
+ if (flow->bulk_timeout)
+ return;
+
+ if (!config->prio_max_avg_pkt_len)
+ return;
+
+ if (skb->gso_segs > 1)
+ cur_len /= skb->gso_segs;
+
+ if (ewma(&flow->pkt_len_avg, cur_len) <= config->prio_max_avg_pkt_len)
+ *out_val = config->dscp_prio;
}
static __always_inline void
-parse_ipv4(struct __sk_buff *skb, __u32 *offset, bool ingress)
+check_flow(struct qosify_flow_config *config, struct __sk_buff *skb,
+ __u8 *out_val)
+{
+ struct flow_bucket flow_data;
+ struct flow_bucket *flow;
+ __u32 hash;
+
+ if (!config)
+ return;
+
+ hash = bpf_get_hash_recalc(skb);
+ flow = bpf_map_lookup_elem(&flow_map, &hash);
+ if (!flow) {
+ memset(&flow_data, 0, sizeof(flow_data));
+ bpf_map_update_elem(&flow_map, &hash, &flow_data, BPF_ANY);
+ flow = bpf_map_lookup_elem(&flow_map, &hash);
+ if (!flow)
+ return;
+ }
+
+ check_flow_bulk(config, skb, flow, out_val);
+ check_flow_prio(config, skb, flow, out_val);
+}
+
+static __always_inline struct qosify_ip_map_val *
+parse_ipv4(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
+ bool ingress, __u8 *out_val)
{
- struct qosify_config *config;
- struct qosify_ip_map_val *ip_val;
- struct qosify_dscp_val *value;
- const __u32 zero_port = 0;
struct iphdr *iph;
- __u8 dscp = 0xff;
__u8 ipproto;
int hdr_len;
void *key;
- bool force;
-
- config = get_config();
iph = skb_ptr(skb, *offset);
if (skb_check(skb, iph + 1))
- return;
+ return NULL;
hdr_len = iph->ihl * 4;
if (bpf_skb_pull_data(skb, *offset + hdr_len + sizeof(struct udphdr)))
- return;
+ return NULL;
iph = skb_ptr(skb, *offset);
*offset += hdr_len;
if (skb_check(skb, (void *)(iph + 1)))
- return;
+ return NULL;
ipproto = iph->protocol;
- parse_l4proto(config, skb, *offset, ipproto, &dscp, ingress);
+ parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
if (ingress)
key = &iph->saddr;
else
key = &iph->daddr;
- ip_val = bpf_map_lookup_elem(&ipv4_map, key);
- if (ip_val) {
- if (!ip_val->seen)
- ip_val->seen = 1;
- dscp = dscp_val(&ip_val->dscp, ingress);
- } else if (dscp == 0xff) {
- /* use udp port 0 entry as fallback for non-tcp/udp */
- value = bpf_map_lookup_elem(&udp_ports, &zero_port);
- if (value)
- dscp = dscp_val(value, ingress);
- }
-
- check_flow(config, skb, &dscp, ingress);
-
- force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
- dscp &= GENMASK(5, 0);
-
- ipv4_change_dsfield(iph, INET_ECN_MASK, dscp << 2, force);
+ return bpf_map_lookup_elem(&ipv4_map, key);
}
-static __always_inline void
-parse_ipv6(struct __sk_buff *skb, __u32 *offset, bool ingress)
+static __always_inline struct qosify_ip_map_val *
+parse_ipv6(struct qosify_config *config, struct __sk_buff *skb, __u32 *offset,
+ bool ingress, __u8 *out_val)
{
- struct qosify_config *config;
- struct qosify_ip_map_val *ip_val;
- struct qosify_dscp_val *value;
- const __u32 zero_port = 0;
struct ipv6hdr *iph;
- __u8 dscp = 0;
__u8 ipproto;
void *key;
- bool force;
-
- config = get_config();
if (bpf_skb_pull_data(skb, *offset + sizeof(*iph) + sizeof(struct udphdr)))
- return;
+ return NULL;
iph = skb_ptr(skb, *offset);
*offset += sizeof(*iph);
if (skb_check(skb, (void *)(iph + 1)))
- return;
+ return NULL;
ipproto = iph->nexthdr;
if (ingress)
else
key = &iph->daddr;
- parse_l4proto(config, skb, *offset, ipproto, &dscp, ingress);
+ parse_l4proto(config, skb, *offset, ipproto, ingress, out_val);
- ip_val = bpf_map_lookup_elem(&ipv6_map, key);
- if (ip_val) {
- if (!ip_val->seen)
- ip_val->seen = 1;
- dscp = dscp_val(&ip_val->dscp, ingress);
- } else if (dscp == 0xff) {
- /* use udp port 0 entry as fallback for non-tcp/udp */
- value = bpf_map_lookup_elem(&udp_ports, &zero_port);
- if (value)
- dscp = dscp_val(value, ingress);
- }
+ return bpf_map_lookup_elem(&ipv6_map, key);
+}
- check_flow(config, skb, &dscp, ingress);
+static __always_inline int
+dscp_lookup_class(uint8_t *dscp, bool ingress, struct qosify_class **out_class)
+{
+ struct qosify_class *class;
+ __u8 fallback_flag;
+ __u32 key;
- force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
- dscp &= GENMASK(5, 0);
+ if (!(*dscp & QOSIFY_DSCP_CLASS_FLAG))
+ return 0;
+
+ fallback_flag = *dscp & QOSIFY_DSCP_FALLBACK_FLAG;
+ key = *dscp & QOSIFY_DSCP_VALUE_MASK;
+ class = bpf_map_lookup_elem(&class_map, &key);
+ if (!class)
+ return -1;
- ipv6_change_dsfield(iph, INET_ECN_MASK, dscp << 2, force);
+ if (!(class->flags & QOSIFY_CLASS_FLAG_PRESENT))
+ return -1;
+
+ *dscp = dscp_val(&class->val, ingress);
+ *dscp |= fallback_flag;
+ *out_class = class;
+
+ return 0;
}
SEC("classifier")
int classify(struct __sk_buff *skb)
{
bool ingress = module_flags & QOSIFY_INGRESS;
+ struct qosify_config *config;
+ struct qosify_class *class = NULL;
+ struct qosify_ip_map_val *ip_val;
__u32 offset = 0;
+ __u32 iph_offset;
+ void *iph;
+ __u8 dscp;
+ bool force;
int type;
+ config = get_config();
+ if (!config)
+ return TC_ACT_UNSPEC;
+
if (module_flags & QOSIFY_IP_ONLY)
type = skb->protocol;
else
type = parse_ethernet(skb, &offset);
+ iph_offset = offset;
+ if (type == bpf_htons(ETH_P_IP))
+ ip_val = parse_ipv4(config, skb, &offset, ingress, &dscp);
+ else if (type == bpf_htons(ETH_P_IPV6))
+ ip_val = parse_ipv6(config, skb, &offset, ingress, &dscp);
+ else
+ return TC_ACT_UNSPEC;
+
+ if (ip_val) {
+ if (!ip_val->seen)
+ ip_val->seen = 1;
+ dscp = ip_val->dscp;
+ }
+
+ if (dscp_lookup_class(&dscp, ingress, &class))
+ return TC_ACT_UNSPEC;
+
+ if (class) {
+ check_flow(&class->config, skb, &dscp);
+
+ if (dscp_lookup_class(&dscp, ingress, &class))
+ return TC_ACT_UNSPEC;
+ }
+
+ dscp &= GENMASK(5, 0);
+ dscp <<= 2;
+ force = !(dscp & QOSIFY_DSCP_FALLBACK_FLAG);
+
if (type == bpf_htons(ETH_P_IP))
- parse_ipv4(skb, &offset, ingress);
+ ipv4_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
else if (type == bpf_htons(ETH_P_IPV6))
- parse_ipv6(skb, &offset, ingress);
+ ipv6_change_dsfield(skb, iph_offset, INET_ECN_MASK, dscp, force);
- return TC_ACT_OK;
+ return TC_ACT_UNSPEC;
}
char _license[] SEC("license") = "GPL";