1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
5 #define KBUILD_MODNAME "foo"
6 #include <uapi/linux/bpf.h>
7 #include <uapi/linux/if_ether.h>
8 #include <uapi/linux/if_packet.h>
9 #include <uapi/linux/ip.h>
10 #include <uapi/linux/ipv6.h>
11 #include <uapi/linux/in.h>
12 #include <uapi/linux/tcp.h>
13 #include <uapi/linux/udp.h>
14 #include <uapi/linux/filter.h>
15 #include <uapi/linux/pkt_cls.h>
18 #include <bpf/bpf_helpers.h>
19 #include <bpf/bpf_endian.h>
20 #include "qosify-bpf.h"
22 #define INET_ECN_MASK 3
24 #define FLOW_CHECK_INTERVAL ((u32)((1000000000ULL) >> 24))
25 #define FLOW_TIMEOUT ((u32)((30ULL * 1000000000ULL) >> 24))
26 #define FLOW_BULK_TIMEOUT 5
30 const volatile static uint32_t module_flags
= 0;
40 __uint(type
, BPF_MAP_TYPE_ARRAY
);
43 __type(value
, struct qosify_config
);
44 __uint(max_entries
, 1);
45 } config
SEC(".maps");
48 __uint(type
, BPF_MAP_TYPE_ARRAY
);
52 __uint(max_entries
, 1 << 16);
56 __uint(type
, BPF_MAP_TYPE_LRU_HASH
);
59 __type(value
, struct flow_bucket
);
60 __uint(max_entries
, QOSIFY_FLOW_BUCKETS
);
61 } flow_map
SEC(".maps");
63 port_array_t tcp_ports
SEC(".maps");
64 port_array_t udp_ports
SEC(".maps");
67 __uint(type
, BPF_MAP_TYPE_HASH
);
69 __uint(key_size
, sizeof(struct in_addr
));
70 __type(value
, struct qosify_ip_map_val
);
71 __uint(max_entries
, 100000);
72 __uint(map_flags
, BPF_F_NO_PREALLOC
);
73 } ipv4_map
SEC(".maps");
76 __uint(type
, BPF_MAP_TYPE_HASH
);
78 __uint(key_size
, sizeof(struct in6_addr
));
79 __type(value
, struct qosify_ip_map_val
);
80 __uint(max_entries
, 100000);
81 __uint(map_flags
, BPF_F_NO_PREALLOC
);
82 } ipv6_map
SEC(".maps");
85 __uint(type
, BPF_MAP_TYPE_ARRAY
);
88 __type(value
, struct qosify_class
);
89 __uint(max_entries
, QOSIFY_MAX_CLASS_ENTRIES
+
90 QOSIFY_DEFAULT_CLASS_ENTRIES
);
91 } class_map
SEC(".maps");
93 static struct qosify_config
*get_config(void)
97 return bpf_map_lookup_elem(&config
, &key
);
100 static __always_inline
int proto_is_vlan(__u16 h_proto
)
102 return !!(h_proto
== bpf_htons(ETH_P_8021Q
) ||
103 h_proto
== bpf_htons(ETH_P_8021AD
));
106 static __always_inline
int proto_is_ip(__u16 h_proto
)
108 return !!(h_proto
== bpf_htons(ETH_P_IP
) ||
109 h_proto
== bpf_htons(ETH_P_IPV6
));
112 static __always_inline
void *skb_ptr(struct __sk_buff
*skb
, __u32 offset
)
114 void *start
= (void *)(unsigned long long)skb
->data
;
116 return start
+ offset
;
119 static __always_inline
void *skb_end_ptr(struct __sk_buff
*skb
)
121 return (void *)(unsigned long long)skb
->data_end
;
124 static __always_inline
int skb_check(struct __sk_buff
*skb
, void *ptr
)
126 if (ptr
> skb_end_ptr(skb
))
132 static __always_inline __u32
cur_time(void)
134 __u32 val
= bpf_ktime_get_ns() >> 24;
142 static __always_inline __u32
ewma(__u32
*avg
, __u32 val
)
145 *avg
= (*avg
* 3) / 4 + (val
<< EWMA_SHIFT
) / 4;
147 *avg
= val
<< EWMA_SHIFT
;
149 return *avg
>> EWMA_SHIFT
;
152 static __always_inline __u8
dscp_val(struct qosify_dscp_val
*val
, bool ingress
)
154 __u8 ival
= val
->ingress
;
155 __u8 eval
= val
->egress
;
157 return ingress
? ival
: eval
;
160 static __always_inline
void
161 ipv4_change_dsfield(struct __sk_buff
*skb
, __u32 offset
,
162 __u8 mask
, __u8 value
, bool force
)
168 iph
= skb_ptr(skb
, offset
);
169 if (skb_check(skb
, iph
+ 1))
172 check
= bpf_ntohs(iph
->check
);
173 if ((iph
->tos
& mask
) && !force
)
176 dsfield
= (iph
->tos
& mask
) | value
;
177 if (iph
->tos
== dsfield
)
181 if ((check
+ 1) >> 16)
182 check
= (check
+ 1) & 0xffff;
184 check
+= check
>> 16;
185 iph
->check
= bpf_htons(check
);
189 static __always_inline
void
190 ipv6_change_dsfield(struct __sk_buff
*skb
, __u32 offset
,
191 __u8 mask
, __u8 value
, bool force
)
193 struct ipv6hdr
*ipv6h
;
197 ipv6h
= skb_ptr(skb
, offset
);
198 if (skb_check(skb
, ipv6h
+ 1))
202 if (((*p
>> 4) & mask
) && !force
)
205 val
= (*p
& bpf_htons((((__u16
)mask
<< 4) | 0xf00f))) | bpf_htons((__u16
)value
<< 4);
212 static __always_inline
int
213 parse_ethernet(struct __sk_buff
*skb
, __u32
*offset
)
219 eth
= skb_ptr(skb
, *offset
);
220 if (skb_check(skb
, eth
+ 1))
223 h_proto
= eth
->h_proto
;
224 *offset
+= sizeof(*eth
);
227 for (i
= 0; i
< 2; i
++) {
228 struct vlan_hdr
*vlh
= skb_ptr(skb
, *offset
);
230 if (!proto_is_vlan(h_proto
))
233 if (skb_check(skb
, vlh
+ 1))
236 h_proto
= vlh
->h_vlan_encapsulated_proto
;
237 *offset
+= sizeof(*vlh
);
244 parse_l4proto(struct qosify_config
*config
, struct __sk_buff
*skb
,
245 __u32 offset
, __u8 proto
, bool ingress
,
249 __u32 src
, dest
, key
;
252 udp
= skb_ptr(skb
, offset
);
253 if (skb_check(skb
, &udp
->len
))
256 if (config
&& (proto
== IPPROTO_ICMP
|| proto
== IPPROTO_ICMPV6
)) {
257 *out_val
= config
->dscp_icmp
;
266 if (proto
== IPPROTO_TCP
) {
267 value
= bpf_map_lookup_elem(&tcp_ports
, &key
);
269 if (proto
!= IPPROTO_UDP
)
272 value
= bpf_map_lookup_elem(&udp_ports
, &key
);
279 static __always_inline
void
280 check_flow_bulk(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
281 struct flow_bucket
*flow
, __u8
*out_val
)
283 bool trigger
= false;
288 if (!config
->bulk_trigger_pps
)
292 if (!flow
->last_update
)
295 delta
= time
- flow
->last_update
;
296 if ((u32
)delta
> FLOW_TIMEOUT
)
300 segs
= skb
->gso_segs
;
301 flow
->pkt_count
+= segs
;
302 if (flow
->pkt_count
> config
->bulk_trigger_pps
) {
303 flow
->bulk_timeout
= config
->bulk_trigger_timeout
+ 1;
307 if (delta
>= FLOW_CHECK_INTERVAL
) {
308 if (flow
->bulk_timeout
&& !trigger
)
309 flow
->bulk_timeout
--;
317 flow
->pkt_len_avg
= 0;
320 flow
->last_update
= time
;
322 if (flow
->bulk_timeout
)
323 *out_val
= config
->dscp_bulk
;
326 static __always_inline
void
327 check_flow_prio(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
328 struct flow_bucket
*flow
, __u8
*out_val
)
330 int cur_len
= skb
->len
;
332 if (flow
->bulk_timeout
)
335 if (!config
->prio_max_avg_pkt_len
)
338 if (skb
->gso_segs
> 1)
339 cur_len
/= skb
->gso_segs
;
341 if (ewma(&flow
->pkt_len_avg
, cur_len
) <= config
->prio_max_avg_pkt_len
)
342 *out_val
= config
->dscp_prio
;
345 static __always_inline
void
346 check_flow(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
349 struct flow_bucket flow_data
;
350 struct flow_bucket
*flow
;
356 hash
= bpf_get_hash_recalc(skb
);
357 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
359 memset(&flow_data
, 0, sizeof(flow_data
));
360 bpf_map_update_elem(&flow_map
, &hash
, &flow_data
, BPF_ANY
);
361 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
366 check_flow_bulk(config
, skb
, flow
, out_val
);
367 check_flow_prio(config
, skb
, flow
, out_val
);
370 static __always_inline
struct qosify_ip_map_val
*
371 parse_ipv4(struct qosify_config
*config
, struct __sk_buff
*skb
, __u32
*offset
,
372 bool ingress
, __u8
*out_val
)
379 iph
= skb_ptr(skb
, *offset
);
380 if (skb_check(skb
, iph
+ 1))
383 hdr_len
= iph
->ihl
* 4;
384 if (bpf_skb_pull_data(skb
, *offset
+ hdr_len
+ sizeof(struct udphdr
)))
387 iph
= skb_ptr(skb
, *offset
);
390 if (skb_check(skb
, (void *)(iph
+ 1)))
393 ipproto
= iph
->protocol
;
394 parse_l4proto(config
, skb
, *offset
, ipproto
, ingress
, out_val
);
401 return bpf_map_lookup_elem(&ipv4_map
, key
);
404 static __always_inline
struct qosify_ip_map_val
*
405 parse_ipv6(struct qosify_config
*config
, struct __sk_buff
*skb
, __u32
*offset
,
406 bool ingress
, __u8
*out_val
)
412 if (bpf_skb_pull_data(skb
, *offset
+ sizeof(*iph
) + sizeof(struct udphdr
)))
415 iph
= skb_ptr(skb
, *offset
);
416 *offset
+= sizeof(*iph
);
418 if (skb_check(skb
, (void *)(iph
+ 1)))
421 ipproto
= iph
->nexthdr
;
427 parse_l4proto(config
, skb
, *offset
, ipproto
, ingress
, out_val
);
429 return bpf_map_lookup_elem(&ipv6_map
, key
);
432 static __always_inline
int
433 dscp_lookup_class(uint8_t *dscp
, bool ingress
, struct qosify_class
**out_class
)
435 struct qosify_class
*class;
439 if (!(*dscp
& QOSIFY_DSCP_CLASS_FLAG
))
442 fallback_flag
= *dscp
& QOSIFY_DSCP_FALLBACK_FLAG
;
443 key
= *dscp
& QOSIFY_DSCP_VALUE_MASK
;
444 class = bpf_map_lookup_elem(&class_map
, &key
);
448 if (!(class->flags
& QOSIFY_CLASS_FLAG_PRESENT
))
451 *dscp
= dscp_val(&class->val
, ingress
);
452 *dscp
|= fallback_flag
;
459 int classify(struct __sk_buff
*skb
)
461 bool ingress
= module_flags
& QOSIFY_INGRESS
;
462 struct qosify_config
*config
;
463 struct qosify_class
*class = NULL
;
464 struct qosify_ip_map_val
*ip_val
;
472 config
= get_config();
476 if (module_flags
& QOSIFY_IP_ONLY
)
477 type
= skb
->protocol
;
479 type
= parse_ethernet(skb
, &offset
);
482 if (type
== bpf_htons(ETH_P_IP
))
483 ip_val
= parse_ipv4(config
, skb
, &offset
, ingress
, &dscp
);
484 else if (type
== bpf_htons(ETH_P_IPV6
))
485 ip_val
= parse_ipv6(config
, skb
, &offset
, ingress
, &dscp
);
495 if (dscp_lookup_class(&dscp
, ingress
, &class))
499 check_flow(&class->config
, skb
, &dscp
);
501 if (dscp_lookup_class(&dscp
, ingress
, &class))
505 dscp
&= GENMASK(5, 0);
507 force
= !(dscp
& QOSIFY_DSCP_FALLBACK_FLAG
);
509 if (type
== bpf_htons(ETH_P_IP
))
510 ipv4_change_dsfield(skb
, iph_offset
, INET_ECN_MASK
, dscp
, force
);
511 else if (type
== bpf_htons(ETH_P_IPV6
))
512 ipv6_change_dsfield(skb
, iph_offset
, INET_ECN_MASK
, dscp
, force
);
517 char _license
[] SEC("license") = "GPL";