1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
5 #define KBUILD_MODNAME "foo"
6 #include <uapi/linux/bpf.h>
7 #include <uapi/linux/if_ether.h>
8 #include <uapi/linux/if_packet.h>
9 #include <uapi/linux/ip.h>
10 #include <uapi/linux/ipv6.h>
11 #include <uapi/linux/in.h>
12 #include <uapi/linux/tcp.h>
13 #include <uapi/linux/udp.h>
14 #include <uapi/linux/filter.h>
15 #include <uapi/linux/pkt_cls.h>
18 #include <bpf/bpf_helpers.h>
19 #include <bpf/bpf_endian.h>
20 #include "qosify-bpf.h"
22 #define INET_ECN_MASK 3
24 #define FLOW_CHECK_INTERVAL ((u32)((1000000000ULL) >> 24))
25 #define FLOW_TIMEOUT ((u32)((30ULL * 1000000000ULL) >> 24))
26 #define FLOW_BULK_TIMEOUT 5
30 const volatile static uint32_t module_flags
= 0;
41 __uint(type
, BPF_MAP_TYPE_ARRAY
);
44 __type(value
, struct qosify_config
);
45 __uint(max_entries
, 1);
46 } config
SEC(".maps");
49 __uint(type
, BPF_MAP_TYPE_ARRAY
);
53 __uint(max_entries
, 1 << 16);
57 __uint(type
, BPF_MAP_TYPE_LRU_HASH
);
60 __uint(value_size
, sizeof(struct flow_bucket
));
61 __uint(max_entries
, QOSIFY_FLOW_BUCKETS
);
62 } flow_map
SEC(".maps");
64 port_array_t tcp_ports
SEC(".maps");
65 port_array_t udp_ports
SEC(".maps");
68 __uint(type
, BPF_MAP_TYPE_HASH
);
70 __uint(key_size
, sizeof(struct in_addr
));
72 __uint(max_entries
, 100000);
73 __uint(map_flags
, BPF_F_NO_PREALLOC
);
74 } ipv4_map
SEC(".maps");
77 __uint(type
, BPF_MAP_TYPE_HASH
);
79 __uint(key_size
, sizeof(struct in6_addr
));
81 __uint(max_entries
, 100000);
82 __uint(map_flags
, BPF_F_NO_PREALLOC
);
83 } ipv6_map
SEC(".maps");
85 static struct qosify_config
*get_config(void)
89 return bpf_map_lookup_elem(&config
, &key
);
92 static __always_inline
int proto_is_vlan(__u16 h_proto
)
94 return !!(h_proto
== bpf_htons(ETH_P_8021Q
) ||
95 h_proto
== bpf_htons(ETH_P_8021AD
));
98 static __always_inline
int proto_is_ip(__u16 h_proto
)
100 return !!(h_proto
== bpf_htons(ETH_P_IP
) ||
101 h_proto
== bpf_htons(ETH_P_IPV6
));
104 static __always_inline
void *skb_ptr(struct __sk_buff
*skb
, __u32 offset
)
106 void *start
= (void *)(unsigned long long)skb
->data
;
108 return start
+ offset
;
111 static __always_inline
void *skb_end_ptr(struct __sk_buff
*skb
)
113 return (void *)(unsigned long long)skb
->data_end
;
116 static __always_inline
int skb_check(struct __sk_buff
*skb
, void *ptr
)
118 if (ptr
> skb_end_ptr(skb
))
124 static __always_inline __u32
cur_time(void)
126 __u32 val
= bpf_ktime_get_ns() >> 24;
134 static __always_inline __u32
ewma(__u32
*avg
, __u32 val
)
137 *avg
= (*avg
* 3) / 4 + (val
<< EWMA_SHIFT
) / 4;
139 *avg
= val
<< EWMA_SHIFT
;
141 return *avg
>> EWMA_SHIFT
;
144 static __always_inline
void
145 ipv4_change_dsfield(struct iphdr
*iph
, __u8 mask
, __u8 value
, bool force
)
147 __u32 check
= bpf_ntohs(iph
->check
);
150 if ((iph
->tos
& mask
) && !force
)
153 dsfield
= (iph
->tos
& mask
) | value
;
154 if (iph
->tos
== dsfield
)
158 if ((check
+ 1) >> 16)
159 check
= (check
+ 1) & 0xffff;
161 check
+= check
>> 16;
162 iph
->check
= bpf_htons(check
);
166 static __always_inline
void
167 ipv6_change_dsfield(struct ipv6hdr
*ipv6h
, __u8 mask
, __u8 value
, bool force
)
169 __u16
*p
= (__u16
*)ipv6h
;
172 if (((*p
>> 4) & mask
) && !force
)
175 val
= (*p
& bpf_htons((((__u16
)mask
<< 4) | 0xf00f))) | bpf_htons((__u16
)value
<< 4);
182 static __always_inline
int
183 parse_ethernet(struct __sk_buff
*skb
, __u32
*offset
)
189 eth
= skb_ptr(skb
, *offset
);
190 if (skb_check(skb
, eth
+ 1))
193 h_proto
= eth
->h_proto
;
194 *offset
+= sizeof(*eth
);
197 for (i
= 0; i
< 2; i
++) {
198 struct vlan_hdr
*vlh
= skb_ptr(skb
, *offset
);
200 if (!proto_is_vlan(h_proto
))
203 if (skb_check(skb
, vlh
+ 1))
206 h_proto
= vlh
->h_vlan_encapsulated_proto
;
207 *offset
+= sizeof(*vlh
);
214 parse_l4proto(struct qosify_config
*config
, struct __sk_buff
*skb
,
215 __u32 offset
, __u8 proto
, __u8
*dscp_out
)
218 __u32 src
, dest
, key
;
221 udp
= skb_ptr(skb
, offset
);
222 if (skb_check(skb
, &udp
->len
))
225 if (config
&& (proto
== IPPROTO_ICMP
|| proto
== IPPROTO_ICMPV6
)) {
226 *dscp_out
= config
->dscp_icmp
;
233 if (module_flags
& QOSIFY_INGRESS
)
238 if (proto
== IPPROTO_TCP
) {
239 value
= bpf_map_lookup_elem(&tcp_ports
, &key
);
241 if (proto
!= IPPROTO_UDP
)
244 value
= bpf_map_lookup_elem(&udp_ports
, &key
);
254 check_flow(struct qosify_config
*config
, struct __sk_buff
*skb
,
257 struct flow_bucket flow_data
;
258 struct flow_bucket
*flow
;
263 if (!(*dscp
& QOSIFY_DSCP_DEFAULT_FLAG
))
269 if (!config
->bulk_trigger_pps
&&
270 !config
->prio_max_avg_pkt_len
)
274 hash
= bpf_get_hash_recalc(skb
);
275 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
277 memset(&flow_data
, 0, sizeof(flow_data
));
278 bpf_map_update_elem(&flow_map
, &hash
, &flow_data
, BPF_ANY
);
279 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
284 if (!flow
->last_update
)
287 delta
= time
- flow
->last_update
;
288 if ((u32
)delta
> FLOW_TIMEOUT
)
291 if (delta
>= FLOW_CHECK_INTERVAL
) {
292 if (flow
->bulk_timeout
) {
293 flow
->bulk_timeout
--;
294 if (!flow
->bulk_timeout
)
301 if (flow
->pkt_count
< 0xffff)
304 if (config
->bulk_trigger_pps
&&
305 flow
->pkt_count
> config
->bulk_trigger_pps
) {
306 flow
->dscp
= config
->dscp_bulk
;
307 flow
->bulk_timeout
= config
->bulk_trigger_timeout
;
311 if (config
->prio_max_avg_pkt_len
&&
312 flow
->dscp
!= config
->dscp_bulk
) {
313 if (ewma(&flow
->pkt_len_avg
, skb
->len
) <
314 config
->prio_max_avg_pkt_len
)
315 flow
->dscp
= config
->dscp_prio
;
320 if (flow
->dscp
!= 0xff)
327 flow
->pkt_len_avg
= 0;
330 flow
->last_update
= time
;
335 static __always_inline
void
336 parse_ipv4(struct __sk_buff
*skb
, __u32
*offset
)
338 struct qosify_config
*config
;
339 const __u32 zero_port
= 0;
348 config
= get_config();
350 iph
= skb_ptr(skb
, *offset
);
351 if (skb_check(skb
, iph
+ 1))
354 hdr_len
= iph
->ihl
* 4;
355 if (bpf_skb_pull_data(skb
, *offset
+ hdr_len
+ sizeof(struct udphdr
)))
358 iph
= skb_ptr(skb
, *offset
);
361 if (skb_check(skb
, (void *)(iph
+ 1)))
364 ipproto
= iph
->protocol
;
365 parse_l4proto(config
, skb
, *offset
, ipproto
, &dscp
);
367 if (module_flags
& QOSIFY_INGRESS
)
372 value
= bpf_map_lookup_elem(&ipv4_map
, key
);
373 /* use udp port 0 entry as fallback for non-tcp/udp */
374 if (!value
&& dscp
== 0xff)
375 value
= bpf_map_lookup_elem(&udp_ports
, &zero_port
);
379 check_flow(config
, skb
, &dscp
);
381 force
= !(dscp
& QOSIFY_DSCP_FALLBACK_FLAG
);
382 dscp
&= GENMASK(5, 0);
384 ipv4_change_dsfield(iph
, INET_ECN_MASK
, dscp
<< 2, force
);
387 static __always_inline
void
388 parse_ipv6(struct __sk_buff
*skb
, __u32
*offset
)
390 struct qosify_config
*config
;
391 const __u32 zero_port
= 0;
399 config
= get_config();
401 if (bpf_skb_pull_data(skb
, *offset
+ sizeof(*iph
) + sizeof(struct udphdr
)))
404 iph
= skb_ptr(skb
, *offset
);
405 *offset
+= sizeof(*iph
);
407 if (skb_check(skb
, (void *)(iph
+ 1)))
410 ipproto
= iph
->nexthdr
;
411 if (module_flags
& QOSIFY_INGRESS
)
416 parse_l4proto(config
, skb
, *offset
, ipproto
, &dscp
);
418 value
= bpf_map_lookup_elem(&ipv6_map
, key
);
420 /* use udp port 0 entry as fallback for non-tcp/udp */
422 value
= bpf_map_lookup_elem(&udp_ports
, &zero_port
);
426 check_flow(config
, skb
, &dscp
);
428 force
= !(dscp
& QOSIFY_DSCP_FALLBACK_FLAG
);
429 dscp
&= GENMASK(5, 0);
431 ipv6_change_dsfield(iph
, INET_ECN_MASK
, dscp
<< 2, force
);
435 int classify(struct __sk_buff
*skb
)
440 if (module_flags
& QOSIFY_IP_ONLY
)
441 type
= skb
->protocol
;
443 type
= parse_ethernet(skb
, &offset
);
445 if (type
== bpf_htons(ETH_P_IP
))
446 parse_ipv4(skb
, &offset
);
447 else if (type
== bpf_htons(ETH_P_IPV6
))
448 parse_ipv6(skb
, &offset
);
453 char _license
[] SEC("license") = "GPL";