1 // SPDX-License-Identifier: GPL-2.0+
3 * Copyright (C) 2021 Felix Fietkau <nbd@nbd.name>
5 #define KBUILD_MODNAME "foo"
6 #include <uapi/linux/bpf.h>
7 #include <uapi/linux/if_ether.h>
8 #include <uapi/linux/if_packet.h>
9 #include <uapi/linux/ip.h>
10 #include <uapi/linux/ipv6.h>
11 #include <uapi/linux/in.h>
12 #include <uapi/linux/tcp.h>
13 #include <uapi/linux/udp.h>
14 #include <uapi/linux/filter.h>
15 #include <uapi/linux/pkt_cls.h>
18 #include <bpf/bpf_helpers.h>
19 #include <bpf/bpf_endian.h>
20 #include "bpf_skb_utils.h"
21 #include "qosify-bpf.h"
23 #define INET_ECN_MASK 3
25 #define FLOW_CHECK_INTERVAL ((u32)((1000000000ULL) >> 24))
26 #define FLOW_TIMEOUT ((u32)((30ULL * 1000000000ULL) >> 24))
27 #define FLOW_BULK_TIMEOUT 5
31 const volatile static uint32_t module_flags
= 0;
41 __uint(type
, BPF_MAP_TYPE_ARRAY
);
44 __type(value
, struct qosify_config
);
45 __uint(max_entries
, 1);
46 } config
SEC(".maps");
49 __uint(type
, BPF_MAP_TYPE_ARRAY
);
53 __uint(max_entries
, 1 << 16);
54 } tcp_ports
SEC(".maps");
57 __uint(type
, BPF_MAP_TYPE_ARRAY
);
61 __uint(max_entries
, 1 << 16);
62 } udp_ports
SEC(".maps");
65 __uint(type
, BPF_MAP_TYPE_LRU_HASH
);
68 __type(value
, struct flow_bucket
);
69 __uint(max_entries
, QOSIFY_FLOW_BUCKETS
);
70 } flow_map
SEC(".maps");
73 __uint(type
, BPF_MAP_TYPE_HASH
);
75 __uint(key_size
, sizeof(struct in_addr
));
76 __type(value
, struct qosify_ip_map_val
);
77 __uint(max_entries
, 100000);
78 __uint(map_flags
, BPF_F_NO_PREALLOC
);
79 } ipv4_map
SEC(".maps");
82 __uint(type
, BPF_MAP_TYPE_HASH
);
84 __uint(key_size
, sizeof(struct in6_addr
));
85 __type(value
, struct qosify_ip_map_val
);
86 __uint(max_entries
, 100000);
87 __uint(map_flags
, BPF_F_NO_PREALLOC
);
88 } ipv6_map
SEC(".maps");
91 __uint(type
, BPF_MAP_TYPE_ARRAY
);
94 __type(value
, struct qosify_class
);
95 __uint(max_entries
, QOSIFY_MAX_CLASS_ENTRIES
+
96 QOSIFY_DEFAULT_CLASS_ENTRIES
);
97 } class_map
SEC(".maps");
99 static struct qosify_config
*get_config(void)
103 return bpf_map_lookup_elem(&config
, &key
);
106 static __always_inline __u32
cur_time(void)
108 __u32 val
= bpf_ktime_get_ns() >> 24;
116 static __always_inline __u32
ewma(__u32
*avg
, __u32 val
)
119 *avg
= (*avg
* 3) / 4 + (val
<< EWMA_SHIFT
) / 4;
121 *avg
= val
<< EWMA_SHIFT
;
123 return *avg
>> EWMA_SHIFT
;
126 static __always_inline __u8
dscp_val(struct qosify_dscp_val
*val
, bool ingress
)
128 __u8 ival
= val
->ingress
;
129 __u8 eval
= val
->egress
;
131 return ingress
? ival
: eval
;
134 static __always_inline
void
135 ipv4_change_dsfield(struct __sk_buff
*skb
, __u32 offset
,
136 __u8 mask
, __u8 value
, bool force
)
142 iph
= skb_ptr(skb
, offset
, sizeof(*iph
));
146 check
= bpf_ntohs(iph
->check
);
147 if ((iph
->tos
& mask
) && !force
)
150 dsfield
= (iph
->tos
& mask
) | value
;
151 if (iph
->tos
== dsfield
)
155 if ((check
+ 1) >> 16)
156 check
= (check
+ 1) & 0xffff;
158 check
+= check
>> 16;
159 iph
->check
= bpf_htons(check
);
163 static __always_inline
void
164 ipv6_change_dsfield(struct __sk_buff
*skb
, __u32 offset
,
165 __u8 mask
, __u8 value
, bool force
)
167 struct ipv6hdr
*ipv6h
;
171 ipv6h
= skb_ptr(skb
, offset
, sizeof(*ipv6h
));
176 if (((*p
>> 4) & mask
) && !force
)
179 val
= (*p
& bpf_htons((((__u16
)mask
<< 4) | 0xf00f))) | bpf_htons((__u16
)value
<< 4);
187 parse_l4proto(struct qosify_config
*config
, struct skb_parser_info
*info
,
188 bool ingress
, __u8
*out_val
)
191 __u32 src
, dest
, key
;
193 __u8 proto
= info
->proto
;
195 udp
= skb_info_ptr(info
, sizeof(*udp
));
199 if (config
&& (proto
== IPPROTO_ICMP
|| proto
== IPPROTO_ICMPV6
)) {
200 *out_val
= config
->dscp_icmp
;
204 src
= READ_ONCE(udp
->source
);
205 dest
= READ_ONCE(udp
->dest
);
211 if (proto
== IPPROTO_TCP
) {
212 value
= bpf_map_lookup_elem(&tcp_ports
, &key
);
214 if (proto
!= IPPROTO_UDP
)
217 value
= bpf_map_lookup_elem(&udp_ports
, &key
);
224 static __always_inline
bool
225 check_flow_bulk(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
226 struct flow_bucket
*flow
, __u8
*out_val
)
228 bool trigger
= false;
234 if (!config
->bulk_trigger_pps
)
238 if (!flow
->last_update
)
241 delta
= time
- flow
->last_update
;
242 if ((u32
)delta
> FLOW_TIMEOUT
)
246 segs
= skb
->gso_segs
;
247 flow
->pkt_count
+= segs
;
248 if (flow
->pkt_count
> config
->bulk_trigger_pps
) {
249 flow
->bulk_timeout
= config
->bulk_trigger_timeout
+ 1;
253 if (delta
>= FLOW_CHECK_INTERVAL
) {
254 if (flow
->bulk_timeout
&& !trigger
)
255 flow
->bulk_timeout
--;
263 flow
->pkt_len_avg
= 0;
266 flow
->last_update
= time
;
268 if (flow
->bulk_timeout
) {
269 *out_val
= config
->dscp_bulk
;
276 static __always_inline
bool
277 check_flow_prio(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
278 struct flow_bucket
*flow
, __u8
*out_val
)
280 int cur_len
= skb
->len
;
282 if (flow
->bulk_timeout
)
285 if (!config
->prio_max_avg_pkt_len
)
288 if (skb
->gso_segs
> 1)
289 cur_len
/= skb
->gso_segs
;
291 if (ewma(&flow
->pkt_len_avg
, cur_len
) <= config
->prio_max_avg_pkt_len
) {
292 *out_val
= config
->dscp_prio
;
299 static __always_inline
bool
300 check_flow(struct qosify_flow_config
*config
, struct __sk_buff
*skb
,
303 struct flow_bucket flow_data
;
304 struct flow_bucket
*flow
;
311 if (!config
->prio_max_avg_pkt_len
&& !config
->bulk_trigger_pps
)
314 hash
= bpf_get_hash_recalc(skb
);
315 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
317 memset(&flow_data
, 0, sizeof(flow_data
));
318 bpf_map_update_elem(&flow_map
, &hash
, &flow_data
, BPF_ANY
);
319 flow
= bpf_map_lookup_elem(&flow_map
, &hash
);
324 ret
|= check_flow_bulk(config
, skb
, flow
, out_val
);
325 ret
|= check_flow_prio(config
, skb
, flow
, out_val
);
330 static __always_inline
struct qosify_ip_map_val
*
331 parse_ipv4(struct qosify_config
*config
, struct skb_parser_info
*info
,
332 bool ingress
, __u8
*out_val
)
339 iph
= skb_parse_ipv4(info
, sizeof(struct udphdr
));
343 parse_l4proto(config
, info
, ingress
, out_val
);
350 return bpf_map_lookup_elem(&ipv4_map
, key
);
353 static __always_inline
struct qosify_ip_map_val
*
354 parse_ipv6(struct qosify_config
*config
, struct skb_parser_info
*info
,
355 bool ingress
, __u8
*out_val
)
361 iph
= skb_parse_ipv6(info
, sizeof(struct udphdr
));
370 parse_l4proto(config
, info
, ingress
, out_val
);
372 return bpf_map_lookup_elem(&ipv6_map
, key
);
375 static __always_inline
int
376 dscp_lookup_class(uint8_t *dscp
, bool ingress
, struct qosify_class
**out_class
,
379 struct qosify_class
*class;
383 if (!(*dscp
& QOSIFY_DSCP_CLASS_FLAG
))
386 fallback_flag
= *dscp
& QOSIFY_DSCP_FALLBACK_FLAG
;
387 key
= *dscp
& QOSIFY_DSCP_VALUE_MASK
;
388 class = bpf_map_lookup_elem(&class_map
, &key
);
392 if (!(class->flags
& QOSIFY_CLASS_FLAG_PRESENT
))
397 *dscp
= dscp_val(&class->val
, ingress
);
398 *dscp
|= fallback_flag
;
405 int classify(struct __sk_buff
*skb
)
407 struct skb_parser_info info
;
408 bool ingress
= module_flags
& QOSIFY_INGRESS
;
409 struct qosify_config
*config
;
410 struct qosify_class
*class = NULL
;
411 struct qosify_ip_map_val
*ip_val
;
418 config
= get_config();
420 return TC_ACT_UNSPEC
;
422 skb_parse_init(&info
, skb
);
423 if (module_flags
& QOSIFY_IP_ONLY
) {
424 type
= info
.proto
= skb
->protocol
;
425 } else if (skb_parse_ethernet(&info
)) {
426 skb_parse_vlan(&info
);
427 skb_parse_vlan(&info
);
430 return TC_ACT_UNSPEC
;
433 iph_offset
= info
.offset
;
434 if (type
== bpf_htons(ETH_P_IP
))
435 ip_val
= parse_ipv4(config
, &info
, ingress
, &dscp
);
436 else if (type
== bpf_htons(ETH_P_IPV6
))
437 ip_val
= parse_ipv6(config
, &info
, ingress
, &dscp
);
439 return TC_ACT_UNSPEC
;
447 if (dscp_lookup_class(&dscp
, ingress
, &class, true))
448 return TC_ACT_UNSPEC
;
451 if (check_flow(&class->config
, skb
, &dscp
) &&
452 dscp_lookup_class(&dscp
, ingress
, &class, false))
453 return TC_ACT_UNSPEC
;
456 dscp
&= GENMASK(5, 0);
458 force
= !(dscp
& QOSIFY_DSCP_FALLBACK_FLAG
);
460 if (type
== bpf_htons(ETH_P_IP
))
461 ipv4_change_dsfield(skb
, iph_offset
, INET_ECN_MASK
, dscp
, force
);
462 else if (type
== bpf_htons(ETH_P_IPV6
))
463 ipv6_change_dsfield(skb
, iph_offset
, INET_ECN_MASK
, dscp
, force
);
465 return TC_ACT_UNSPEC
;
468 char _license
[] SEC("license") = "GPL";