1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
5 #define KBUILD_MODNAME "foo"
6 #include <uapi/linux/bpf.h>
7 #include <uapi/linux/if_ether.h>
8 #include <uapi/linux/if_packet.h>
9 #include <uapi/linux/ip.h>
10 #include <uapi/linux/ipv6.h>
11 #include <uapi/linux/in.h>
12 #include <uapi/linux/tcp.h>
13 #include <uapi/linux/udp.h>
14 #include <uapi/linux/filter.h>
15 #include <uapi/linux/pkt_cls.h>
18 #include <bpf/bpf_helpers.h>
19 #include <bpf/bpf_endian.h>
20 #include "qosify-bpf.h"
22 #define INET_ECN_MASK 3
24 #define FLOW_CHECK_INTERVAL ((u32)((1000000000ULL) >> 24))
25 #define FLOW_TIMEOUT ((u32)((30ULL * 1000000000ULL) >> 24))
26 #define FLOW_BULK_TIMEOUT 5
30 const volatile static uint32_t module_flags
= 0;
41 __uint(type
, BPF_MAP_TYPE_ARRAY
);
44 __type(value
, struct qosify_config
);
45 __uint(max_entries
, 1);
46 } config
SEC(".maps");
49 __uint(type
, BPF_MAP_TYPE_ARRAY
);
52 __type(value
, struct qosify_dscp_val
);
53 __uint(max_entries
, 1 << 16);
57 __uint(type
, BPF_MAP_TYPE_LRU_HASH
);
60 __uint(value_size
, sizeof(struct flow_bucket
));
61 __uint(max_entries
, QOSIFY_FLOW_BUCKETS
);
62 } flow_map
SEC(".maps");
64 port_array_t tcp_ports
SEC(".maps");
65 port_array_t udp_ports
SEC(".maps");
68 __uint(type
, BPF_MAP_TYPE_HASH
);
70 __uint(key_size
, sizeof(struct in_addr
));
71 __type(value
, struct qosify_ip_map_val
);
72 __uint(max_entries
, 100000);
73 __uint(map_flags
, BPF_F_NO_PREALLOC
);
74 } ipv4_map
SEC(".maps");
77 __uint(type
, BPF_MAP_TYPE_HASH
);
79 __uint(key_size
, sizeof(struct in6_addr
));
80 __type(value
, struct qosify_ip_map_val
);
81 __uint(max_entries
, 100000);
82 __uint(map_flags
, BPF_F_NO_PREALLOC
);
83 } ipv6_map
SEC(".maps");
85 static struct qosify_config
*get_config(void)
89 return bpf_map_lookup_elem(&config
, &key
);
92 static __always_inline
int proto_is_vlan(__u16 h_proto
)
94 return !!(h_proto
== bpf_htons(ETH_P_8021Q
) ||
95 h_proto
== bpf_htons(ETH_P_8021AD
));
98 static __always_inline
int proto_is_ip(__u16 h_proto
)
100 return !!(h_proto
== bpf_htons(ETH_P_IP
) ||
101 h_proto
== bpf_htons(ETH_P_IPV6
));
104 static __always_inline
void *skb_ptr(struct __sk_buff
*skb
, __u32 offset
)
106 void *start
= (void *)(unsigned long long)skb
->data
;
108 return start
+ offset
;
111 static __always_inline
void *skb_end_ptr(struct __sk_buff
*skb
)
113 return (void *)(unsigned long long)skb
->data_end
;
116 static __always_inline
int skb_check(struct __sk_buff
*skb
, void *ptr
)
118 if (ptr
> skb_end_ptr(skb
))
124 static __always_inline __u32
cur_time(void)
126 __u32 val
= bpf_ktime_get_ns() >> 24;
134 static __always_inline __u32
ewma(__u32
*avg
, __u32 val
)
137 *avg
= (*avg
* 3) / 4 + (val
<< EWMA_SHIFT
) / 4;
139 *avg
= val
<< EWMA_SHIFT
;
141 return *avg
>> EWMA_SHIFT
;
144 static __always_inline __u8
dscp_val(struct qosify_dscp_val
*val
, bool ingress
)
146 __u8 ival
= val
->ingress
;
147 __u8 eval
= val
->egress
;
149 return ingress
? ival
: eval
;
152 static __always_inline
void
153 ipv4_change_dsfield(struct iphdr
*iph
, __u8 mask
, __u8 value
, bool force
)
155 __u32 check
= bpf_ntohs(iph
->check
);
158 if ((iph
->tos
& mask
) && !force
)
161 dsfield
= (iph
->tos
& mask
) | value
;
162 if (iph
->tos
== dsfield
)
166 if ((check
+ 1) >> 16)
167 check
= (check
+ 1) & 0xffff;
169 check
+= check
>> 16;
170 iph
->check
= bpf_htons(check
);
174 static __always_inline
void
175 ipv6_change_dsfield(struct ipv6hdr
*ipv6h
, __u8 mask
, __u8 value
, bool force
)
177 __u16
*p
= (__u16
*)ipv6h
;
180 if (((*p
>> 4) & mask
) && !force
)
183 val
= (*p
& bpf_htons((((__u16
)mask
<< 4) | 0xf00f))) | bpf_htons((__u16
)value
<< 4);
190 static __always_inline
int
191 parse_ethernet(struct __sk_buff
*skb
, __u32
*offset
)
197 eth
= skb_ptr(skb
, *offset
);
198 if (skb_check(skb
, eth
+ 1))
201 h_proto
= eth
->h_proto
;
202 *offset
+= sizeof(*eth
);
205 for (i
= 0; i
< 2; i
++) {
206 struct vlan_hdr
*vlh
= skb_ptr(skb
, *offset
);
208 if (!proto_is_vlan(h_proto
))
211 if (skb_check(skb
, vlh
+ 1))
214 h_proto
= vlh
->h_vlan_encapsulated_proto
;
215 *offset
+= sizeof(*vlh
);
222 parse_l4proto(struct qosify_config
*config
, struct __sk_buff
*skb
,
223 __u32 offset
, __u8 proto
, __u8
*dscp_out
, bool ingress
)
226 __u32 src
, dest
, key
;
227 struct qosify_dscp_val
*value
;
229 udp
= skb_ptr(skb
, offset
);
230 if (skb_check(skb
, &udp
->len
))
233 if (config
&& (proto
== IPPROTO_ICMP
|| proto
== IPPROTO_ICMPV6
)) {
234 *dscp_out
= dscp_val(&config
->dscp_icmp
, ingress
);
243 if (proto
== IPPROTO_TCP
) {
244 value
= bpf_map_lookup_elem(&tcp_ports
, &key
);
246 if (proto
!= IPPROTO_UDP
)
249 value
= bpf_map_lookup_elem(&udp_ports
, &key
);
255 *dscp_out
= dscp_val(value
, ingress
);
258 static __always_inline
void
259 check_flow(struct qosify_config
*config
, struct __sk_buff
*skb
,
260 uint8_t *dscp
, bool ingress
)
262 struct flow_bucket flow_data
;
263 struct flow_bucket
*flow
;
268 if (!(*dscp
& QOSIFY_DSCP_DEFAULT_FLAG
))
274 if (!config
->bulk_trigger_pps
&&
275 !config
->prio_max_avg_pkt_len
)
279 hash
= bpf_get_hash_recalc(skb
);
280 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
282 memset(&flow_data
, 0, sizeof(flow_data
));
283 bpf_map_update_elem(&flow_map
, &hash
, &flow_data
, BPF_ANY
);
284 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
289 if (!flow
->last_update
)
292 delta
= time
- flow
->last_update
;
293 if ((u32
)delta
> FLOW_TIMEOUT
)
296 if (delta
>= FLOW_CHECK_INTERVAL
) {
297 if (flow
->bulk_timeout
) {
298 flow
->bulk_timeout
--;
299 if (!flow
->bulk_timeout
)
306 if (flow
->pkt_count
< 0xffff)
309 if (config
->bulk_trigger_pps
&&
310 flow
->pkt_count
> config
->bulk_trigger_pps
) {
311 flow
->dscp
= dscp_val(&config
->dscp_bulk
, ingress
);
312 flow
->bulk_timeout
= config
->bulk_trigger_timeout
;
316 if (config
->prio_max_avg_pkt_len
&&
317 flow
->dscp
!= dscp_val(&config
->dscp_bulk
, ingress
)) {
318 if (ewma(&flow
->pkt_len_avg
, skb
->len
) <
319 config
->prio_max_avg_pkt_len
)
320 flow
->dscp
= dscp_val(&config
->dscp_prio
, ingress
);
325 if (flow
->dscp
!= 0xff)
332 flow
->pkt_len_avg
= 0;
335 flow
->last_update
= time
;
340 static __always_inline
void
341 parse_ipv4(struct __sk_buff
*skb
, __u32
*offset
, bool ingress
)
343 struct qosify_config
*config
;
344 struct qosify_ip_map_val
*ip_val
;
345 struct qosify_dscp_val
*value
;
346 const __u32 zero_port
= 0;
354 config
= get_config();
356 iph
= skb_ptr(skb
, *offset
);
357 if (skb_check(skb
, iph
+ 1))
360 hdr_len
= iph
->ihl
* 4;
361 if (bpf_skb_pull_data(skb
, *offset
+ hdr_len
+ sizeof(struct udphdr
)))
364 iph
= skb_ptr(skb
, *offset
);
367 if (skb_check(skb
, (void *)(iph
+ 1)))
370 ipproto
= iph
->protocol
;
371 parse_l4proto(config
, skb
, *offset
, ipproto
, &dscp
, ingress
);
378 ip_val
= bpf_map_lookup_elem(&ipv4_map
, key
);
382 dscp
= dscp_val(&ip_val
->dscp
, ingress
);
383 } else if (dscp
== 0xff) {
384 /* use udp port 0 entry as fallback for non-tcp/udp */
385 value
= bpf_map_lookup_elem(&udp_ports
, &zero_port
);
387 dscp
= dscp_val(value
, ingress
);
390 check_flow(config
, skb
, &dscp
, ingress
);
392 force
= !(dscp
& QOSIFY_DSCP_FALLBACK_FLAG
);
393 dscp
&= GENMASK(5, 0);
395 ipv4_change_dsfield(iph
, INET_ECN_MASK
, dscp
<< 2, force
);
398 static __always_inline
void
399 parse_ipv6(struct __sk_buff
*skb
, __u32
*offset
, bool ingress
)
401 struct qosify_config
*config
;
402 struct qosify_ip_map_val
*ip_val
;
403 struct qosify_dscp_val
*value
;
404 const __u32 zero_port
= 0;
411 config
= get_config();
413 if (bpf_skb_pull_data(skb
, *offset
+ sizeof(*iph
) + sizeof(struct udphdr
)))
416 iph
= skb_ptr(skb
, *offset
);
417 *offset
+= sizeof(*iph
);
419 if (skb_check(skb
, (void *)(iph
+ 1)))
422 ipproto
= iph
->nexthdr
;
428 parse_l4proto(config
, skb
, *offset
, ipproto
, &dscp
, ingress
);
430 ip_val
= bpf_map_lookup_elem(&ipv6_map
, key
);
434 dscp
= dscp_val(&ip_val
->dscp
, ingress
);
435 } else if (dscp
== 0xff) {
436 /* use udp port 0 entry as fallback for non-tcp/udp */
437 value
= bpf_map_lookup_elem(&udp_ports
, &zero_port
);
439 dscp
= dscp_val(value
, ingress
);
442 check_flow(config
, skb
, &dscp
, ingress
);
444 force
= !(dscp
& QOSIFY_DSCP_FALLBACK_FLAG
);
445 dscp
&= GENMASK(5, 0);
447 ipv6_change_dsfield(iph
, INET_ECN_MASK
, dscp
<< 2, force
);
451 int classify(struct __sk_buff
*skb
)
453 bool ingress
= module_flags
& QOSIFY_INGRESS
;
457 if (module_flags
& QOSIFY_IP_ONLY
)
458 type
= skb
->protocol
;
460 type
= parse_ethernet(skb
, &offset
);
462 if (type
== bpf_htons(ETH_P_IP
))
463 parse_ipv4(skb
, &offset
, ingress
);
464 else if (type
== bpf_htons(ETH_P_IPV6
))
465 parse_ipv6(skb
, &offset
, ingress
);
470 char _license
[] SEC("license") = "GPL";