1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
5 #define KBUILD_MODNAME "foo"
6 #include <uapi/linux/bpf.h>
7 #include <uapi/linux/if_ether.h>
8 #include <uapi/linux/if_packet.h>
9 #include <uapi/linux/ip.h>
10 #include <uapi/linux/ipv6.h>
11 #include <uapi/linux/in.h>
12 #include <uapi/linux/tcp.h>
13 #include <uapi/linux/udp.h>
14 #include <uapi/linux/filter.h>
15 #include <uapi/linux/pkt_cls.h>
18 #include <bpf/bpf_helpers.h>
19 #include <bpf/bpf_endian.h>
20 #include "qosify-bpf.h"
22 #define INET_ECN_MASK 3
24 #define FLOW_CHECK_INTERVAL ((u32)((1000000000ULL) >> 24))
25 #define FLOW_TIMEOUT ((u32)((30ULL * 1000000000ULL) >> 24))
26 #define FLOW_BULK_TIMEOUT 5
30 const volatile static uint32_t module_flags
= 0;
40 __uint(type
, BPF_MAP_TYPE_ARRAY
);
43 __type(value
, struct qosify_config
);
44 __uint(max_entries
, 1);
45 } config
SEC(".maps");
48 __uint(type
, BPF_MAP_TYPE_ARRAY
);
52 __uint(max_entries
, 1 << 16);
53 } tcp_ports
SEC(".maps");
56 __uint(type
, BPF_MAP_TYPE_ARRAY
);
60 __uint(max_entries
, 1 << 16);
61 } udp_ports
SEC(".maps");
64 __uint(type
, BPF_MAP_TYPE_LRU_HASH
);
67 __type(value
, struct flow_bucket
);
68 __uint(max_entries
, QOSIFY_FLOW_BUCKETS
);
69 } flow_map
SEC(".maps");
72 __uint(type
, BPF_MAP_TYPE_HASH
);
74 __uint(key_size
, sizeof(struct in_addr
));
75 __type(value
, struct qosify_ip_map_val
);
76 __uint(max_entries
, 100000);
77 __uint(map_flags
, BPF_F_NO_PREALLOC
);
78 } ipv4_map
SEC(".maps");
81 __uint(type
, BPF_MAP_TYPE_HASH
);
83 __uint(key_size
, sizeof(struct in6_addr
));
84 __type(value
, struct qosify_ip_map_val
);
85 __uint(max_entries
, 100000);
86 __uint(map_flags
, BPF_F_NO_PREALLOC
);
87 } ipv6_map
SEC(".maps");
90 __uint(type
, BPF_MAP_TYPE_ARRAY
);
93 __type(value
, struct qosify_class
);
94 __uint(max_entries
, QOSIFY_MAX_CLASS_ENTRIES
+
95 QOSIFY_DEFAULT_CLASS_ENTRIES
);
96 } class_map
SEC(".maps");
98 static struct qosify_config
*get_config(void)
102 return bpf_map_lookup_elem(&config
, &key
);
105 static __always_inline
int proto_is_vlan(__u16 h_proto
)
107 return !!(h_proto
== bpf_htons(ETH_P_8021Q
) ||
108 h_proto
== bpf_htons(ETH_P_8021AD
));
111 static __always_inline
int proto_is_ip(__u16 h_proto
)
113 return !!(h_proto
== bpf_htons(ETH_P_IP
) ||
114 h_proto
== bpf_htons(ETH_P_IPV6
));
117 static __always_inline
void *skb_ptr(struct __sk_buff
*skb
, __u32 offset
)
119 void *start
= (void *)(unsigned long long)skb
->data
;
121 return start
+ offset
;
124 static __always_inline
void *skb_end_ptr(struct __sk_buff
*skb
)
126 return (void *)(unsigned long long)skb
->data_end
;
129 static __always_inline
int skb_check(struct __sk_buff
*skb
, void *ptr
)
131 if (ptr
> skb_end_ptr(skb
))
137 static __always_inline __u32
cur_time(void)
139 __u32 val
= bpf_ktime_get_ns() >> 24;
147 static __always_inline __u32
ewma(__u32
*avg
, __u32 val
)
150 *avg
= (*avg
* 3) / 4 + (val
<< EWMA_SHIFT
) / 4;
152 *avg
= val
<< EWMA_SHIFT
;
154 return *avg
>> EWMA_SHIFT
;
157 static __always_inline __u8
dscp_val(struct qosify_dscp_val
*val
, bool ingress
)
159 __u8 ival
= val
->ingress
;
160 __u8 eval
= val
->egress
;
162 return ingress
? ival
: eval
;
165 static __always_inline
void
166 ipv4_change_dsfield(struct __sk_buff
*skb
, __u32 offset
,
167 __u8 mask
, __u8 value
, bool force
)
173 iph
= skb_ptr(skb
, offset
);
174 if (skb_check(skb
, iph
+ 1))
177 check
= bpf_ntohs(iph
->check
);
178 if ((iph
->tos
& mask
) && !force
)
181 dsfield
= (iph
->tos
& mask
) | value
;
182 if (iph
->tos
== dsfield
)
186 if ((check
+ 1) >> 16)
187 check
= (check
+ 1) & 0xffff;
189 check
+= check
>> 16;
190 iph
->check
= bpf_htons(check
);
194 static __always_inline
void
195 ipv6_change_dsfield(struct __sk_buff
*skb
, __u32 offset
,
196 __u8 mask
, __u8 value
, bool force
)
198 struct ipv6hdr
*ipv6h
;
202 ipv6h
= skb_ptr(skb
, offset
);
203 if (skb_check(skb
, ipv6h
+ 1))
207 if (((*p
>> 4) & mask
) && !force
)
210 val
= (*p
& bpf_htons((((__u16
)mask
<< 4) | 0xf00f))) | bpf_htons((__u16
)value
<< 4);
217 static __always_inline
int
218 parse_ethernet(struct __sk_buff
*skb
, __u32
*offset
)
224 eth
= skb_ptr(skb
, *offset
);
225 if (skb_check(skb
, eth
+ 1))
228 h_proto
= eth
->h_proto
;
229 *offset
+= sizeof(*eth
);
232 for (i
= 0; i
< 2; i
++) {
233 struct vlan_hdr
*vlh
= skb_ptr(skb
, *offset
);
235 if (!proto_is_vlan(h_proto
))
238 if (skb_check(skb
, vlh
+ 1))
241 h_proto
= vlh
->h_vlan_encapsulated_proto
;
242 *offset
+= sizeof(*vlh
);
249 parse_l4proto(struct qosify_config
*config
, struct __sk_buff
*skb
,
250 __u32 offset
, __u8 proto
, bool ingress
,
254 __u32 src
, dest
, key
;
257 udp
= skb_ptr(skb
, offset
);
258 if (skb_check(skb
, &udp
->len
))
261 if (config
&& (proto
== IPPROTO_ICMP
|| proto
== IPPROTO_ICMPV6
)) {
262 *out_val
= config
->dscp_icmp
;
266 src
= READ_ONCE(udp
->source
);
267 dest
= READ_ONCE(udp
->dest
);
273 if (proto
== IPPROTO_TCP
) {
274 value
= bpf_map_lookup_elem(&tcp_ports
, &key
);
276 if (proto
!= IPPROTO_UDP
)
279 value
= bpf_map_lookup_elem(&udp_ports
, &key
);
286 static __always_inline
void
287 check_flow_bulk(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
288 struct flow_bucket
*flow
, __u8
*out_val
)
290 bool trigger
= false;
295 if (!config
->bulk_trigger_pps
)
299 if (!flow
->last_update
)
302 delta
= time
- flow
->last_update
;
303 if ((u32
)delta
> FLOW_TIMEOUT
)
307 segs
= skb
->gso_segs
;
308 flow
->pkt_count
+= segs
;
309 if (flow
->pkt_count
> config
->bulk_trigger_pps
) {
310 flow
->bulk_timeout
= config
->bulk_trigger_timeout
+ 1;
314 if (delta
>= FLOW_CHECK_INTERVAL
) {
315 if (flow
->bulk_timeout
&& !trigger
)
316 flow
->bulk_timeout
--;
324 flow
->pkt_len_avg
= 0;
327 flow
->last_update
= time
;
329 if (flow
->bulk_timeout
)
330 *out_val
= config
->dscp_bulk
;
333 static __always_inline
void
334 check_flow_prio(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
335 struct flow_bucket
*flow
, __u8
*out_val
)
337 int cur_len
= skb
->len
;
339 if (flow
->bulk_timeout
)
342 if (!config
->prio_max_avg_pkt_len
)
345 if (skb
->gso_segs
> 1)
346 cur_len
/= skb
->gso_segs
;
348 if (ewma(&flow
->pkt_len_avg
, cur_len
) <= config
->prio_max_avg_pkt_len
)
349 *out_val
= config
->dscp_prio
;
352 static __always_inline
void
353 check_flow(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
356 struct flow_bucket flow_data
;
357 struct flow_bucket
*flow
;
363 hash
= bpf_get_hash_recalc(skb
);
364 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
366 memset(&flow_data
, 0, sizeof(flow_data
));
367 bpf_map_update_elem(&flow_map
, &hash
, &flow_data
, BPF_ANY
);
368 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
373 check_flow_bulk(config
, skb
, flow
, out_val
);
374 check_flow_prio(config
, skb
, flow
, out_val
);
377 static __always_inline
struct qosify_ip_map_val
*
378 parse_ipv4(struct qosify_config
*config
, struct __sk_buff
*skb
, __u32
*offset
,
379 bool ingress
, __u8
*out_val
)
386 iph
= skb_ptr(skb
, *offset
);
387 if (skb_check(skb
, iph
+ 1))
390 hdr_len
= iph
->ihl
* 4;
391 if (bpf_skb_pull_data(skb
, *offset
+ hdr_len
+ sizeof(struct udphdr
)))
394 iph
= skb_ptr(skb
, *offset
);
397 if (skb_check(skb
, (void *)(iph
+ 1)))
400 ipproto
= iph
->protocol
;
401 parse_l4proto(config
, skb
, *offset
, ipproto
, ingress
, out_val
);
408 return bpf_map_lookup_elem(&ipv4_map
, key
);
411 static __always_inline
struct qosify_ip_map_val
*
412 parse_ipv6(struct qosify_config
*config
, struct __sk_buff
*skb
, __u32
*offset
,
413 bool ingress
, __u8
*out_val
)
419 if (bpf_skb_pull_data(skb
, *offset
+ sizeof(*iph
) + sizeof(struct udphdr
)))
422 iph
= skb_ptr(skb
, *offset
);
423 *offset
+= sizeof(*iph
);
425 if (skb_check(skb
, (void *)(iph
+ 1)))
428 ipproto
= iph
->nexthdr
;
434 parse_l4proto(config
, skb
, *offset
, ipproto
, ingress
, out_val
);
436 return bpf_map_lookup_elem(&ipv6_map
, key
);
439 static __always_inline
int
440 dscp_lookup_class(uint8_t *dscp
, bool ingress
, struct qosify_class
**out_class
)
442 struct qosify_class
*class;
446 if (!(*dscp
& QOSIFY_DSCP_CLASS_FLAG
))
449 fallback_flag
= *dscp
& QOSIFY_DSCP_FALLBACK_FLAG
;
450 key
= *dscp
& QOSIFY_DSCP_VALUE_MASK
;
451 class = bpf_map_lookup_elem(&class_map
, &key
);
455 if (!(class->flags
& QOSIFY_CLASS_FLAG_PRESENT
))
458 *dscp
= dscp_val(&class->val
, ingress
);
459 *dscp
|= fallback_flag
;
466 int classify(struct __sk_buff
*skb
)
468 bool ingress
= module_flags
& QOSIFY_INGRESS
;
469 struct qosify_config
*config
;
470 struct qosify_class
*class = NULL
;
471 struct qosify_ip_map_val
*ip_val
;
479 config
= get_config();
481 return TC_ACT_UNSPEC
;
483 if (module_flags
& QOSIFY_IP_ONLY
)
484 type
= skb
->protocol
;
486 type
= parse_ethernet(skb
, &offset
);
489 if (type
== bpf_htons(ETH_P_IP
))
490 ip_val
= parse_ipv4(config
, skb
, &offset
, ingress
, &dscp
);
491 else if (type
== bpf_htons(ETH_P_IPV6
))
492 ip_val
= parse_ipv6(config
, skb
, &offset
, ingress
, &dscp
);
494 return TC_ACT_UNSPEC
;
502 if (dscp_lookup_class(&dscp
, ingress
, &class))
503 return TC_ACT_UNSPEC
;
506 check_flow(&class->config
, skb
, &dscp
);
508 if (dscp_lookup_class(&dscp
, ingress
, &class))
509 return TC_ACT_UNSPEC
;
512 dscp
&= GENMASK(5, 0);
514 force
= !(dscp
& QOSIFY_DSCP_FALLBACK_FLAG
);
516 if (type
== bpf_htons(ETH_P_IP
))
517 ipv4_change_dsfield(skb
, iph_offset
, INET_ECN_MASK
, dscp
, force
);
518 else if (type
== bpf_htons(ETH_P_IPV6
))
519 ipv6_change_dsfield(skb
, iph_offset
, INET_ECN_MASK
, dscp
, force
);
521 return TC_ACT_UNSPEC
;
524 char _license
[] SEC("license") = "GPL";