1 From dff8eadcab33209e040e77a5d56d5def04808144 Mon Sep 17 00:00:00 2001
2 From: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
3 Date: Fri, 15 Mar 2019 09:35:37 +0000
4 Subject: [PATCH] tc: add support for action act_ctinfo
6 Content-Type: text/plain; charset=UTF-8
7 Content-Transfer-Encoding: 8bit
9 ctinfo is a tc action restoring data stored in conntrack marks to
10 various fields. At present it has two independent modes of operation,
11 restoration of DSCP into IPv4/v6 diffserv and restoration of conntrack
12 marks into packet skb marks.
14 It understands a number of parameters specific to this action in
15 additional to the usual action syntax. Each operating mode is
16 independent of the other so all options are optional, however not
17 specifying at least one mode is a bit pointless.
19 Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE]
20 [CONTROL] [index <INDEX>]
24 dscp enables copying of a DSCP stored in the conntrack mark into the
25 ipv4/v6 diffserv field. The mask is a 32bit field and specifies where
26 in the conntrack mark the DSCP value is located. It must be 6
27 contiguous bits long. eg. 0xfc000000 would restore the DSCP from the
28 upper 6 bits of the conntrack mark.
30 The DSCP copying may be optionally controlled by a statemask. The
31 statemask is a 32bit field, usually with a single bit set and must not
32 overlap the dscp mask. The DSCP restore operation will only take place
33 if the corresponding bit/s in conntrack mark ANDed with the statemask
34 yield a non zero result.
36 eg. dscp 0xfc000000 0x01000000 would retrieve the DSCP from the top 6
37 bits, whilst using bit 25 as a flag to do so. Bit 26 is unused in this
42 cpmark enables copying of the conntrack mark to the packet skb mark. In
43 this mode it is completely equivalent to the existing act_connmark
44 action. Additional functionality is provided by the optional mask
45 parameter, whereby the stored conntrack mark is logically ANDed with the
46 cpmark mask before being stored into skb mark. This allows shared usage
47 of the conntrack mark between applications.
49 eg. cpmark 0x00ffffff would restore only the lower 24 bits of the
50 conntrack mark, thus may be useful in the event that the upper 8 bits
51 are used by the DSCP function.
53 Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE]
54 [CONTROL] [index <INDEX>]
56 dscp MASK is the bitmask to restore DSCP
57 STATEMASK is the bitmask to determine conditional restoring
58 cpmark MASK mask applied to restored packet mark
59 ZONE is the conntrack zone
60 CONTROL := reclassify | pipe | drop | continue | ok |
61 goto chain <CHAIN_INDEX>
63 Signed-off-by: Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
64 Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
66 include/uapi/linux/pkt_cls.h | 3 +-
67 include/uapi/linux/tc_act/tc_ctinfo.h | 29 +++
68 man/man8/tc-ctinfo.8 | 170 ++++++++++++++++
70 tc/m_ctinfo.c | 268 ++++++++++++++++++++++++++
71 5 files changed, 470 insertions(+), 1 deletion(-)
72 create mode 100644 include/uapi/linux/tc_act/tc_ctinfo.h
73 create mode 100644 man/man8/tc-ctinfo.8
74 create mode 100644 tc/m_ctinfo.c
76 --- a/include/uapi/linux/pkt_cls.h
77 +++ b/include/uapi/linux/pkt_cls.h
78 @@ -105,6 +105,7 @@ enum tca_id {
79 TCA_ID_IFE = TCA_ACT_IFE,
80 TCA_ID_SAMPLE = TCA_ACT_SAMPLE,
81 /* other actions go here */
87 +++ b/include/uapi/linux/tc_act/tc_ctinfo.h
89 +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
90 +#ifndef __UAPI_TC_CTINFO_H
91 +#define __UAPI_TC_CTINFO_H
93 +#include <linux/types.h>
94 +#include <linux/pkt_cls.h>
106 + TCA_CTINFO_PARMS_DSCP_MASK,
107 + TCA_CTINFO_PARMS_DSCP_STATEMASK,
108 + TCA_CTINFO_PARMS_CPMARK_MASK,
109 + TCA_CTINFO_STATS_DSCP_SET,
110 + TCA_CTINFO_STATS_DSCP_ERROR,
111 + TCA_CTINFO_STATS_CPMARK_SET,
115 +#define TCA_CTINFO_MAX (__TCA_CTINFO_MAX - 1)
119 +++ b/man/man8/tc-ctinfo.8
121 +.TH "ctinfo action in tc" 8 "4 Jun 2019" "iproute2" "Linux"
123 +ctinfo \- tc connmark processing action
125 +.B tc ... action ctinfo
128 +MASK [STATEMASK] ] [
140 +CTINFO (Conntrack Information) is a tc action for retrieving data from
141 +conntrack marks into various fields. At present it has two independent
142 +processing modes which may be viewed as sub-functions.
144 +DSCP mode copies a DSCP stored in conntrack's connmark into the IPv4/v6 diffserv
145 +field. The copying may conditionally occur based on a flag also stored in the
146 +connmark. DSCP mode was designed to assist in restoring packet classifications on
147 +ingress, classifications which may then be used by qdiscs such as CAKE. It may be
148 +used in any circumstance where ingress classification needs to be maintained across
149 +links that otherwise bleach or remap according to their own policies.
151 +CPMARK (copymark) mode copies the conntrack connmark into the packet's mark field. Without
152 +additional parameters it is functionally completely equivalent to the existing
153 +connmark action. An optional mask may be specified to mask which bits of the
154 +connmark are restored. This may be useful when DSCP and CPMARK modes are combined.
156 +Simple statistics (tc -s) on DSCP restores and CPMARK copies are maintained where values for
157 +set indicate a count of packets altered for that mode. DSCP includes an error count
158 +where the destination packet's diffserv field was unwriteable.
160 +.SS DSCP mode parameters:
162 +A mask of 6 contiguous bits indicating where the DSCP value is located in the 32 bit
163 +conntrack mark field. A mask must be provided for this mode. mask is a 32 bit
166 +A mask of at least 1 bit indicating where a conditional restore flag is located in the
167 +32 bit conntrack mark field. The statemask bit/s must NOT overlap the mask bits. The
168 +DSCP will be restored if the conntrack mark logically ANDed with the statemask yields
169 +a non-zero result. statemask is an optional unsigned 32 bit value.
170 +.SS CPMARK mode parameters:
172 +Store the logically ANDed result of conntrack mark and mask into the packet's mark
173 +field. Default is 0xffffffff i.e. the whole mark field. mask is an optional unsigned 32 bit
175 +.SS Overall action parameters:
177 +Specify the conntrack zone when doing conntrack lookups for packets.
178 +zone is a 16bit unsigned decimal value.
181 +The following keywords allow to control how the tree of qdisc, classes,
182 +filters and actions is further traversed after this action.
186 +Restart with the first filter in the current list.
189 +Continue with the next action attached to the same filter.
199 +Continue classification with the next filter in line.
202 +Finish classification process and return to calling qdisc for further packet
203 +processing. This is the default.
206 +Specify an index for this action in order to being able to identify it in later
207 +commands. index is a 32bit unsigned decimal value.
209 +Example showing conditional restoration of DSCP on ingress via an IFB
213 +#Set up the IFB interface
215 +tc qdisc add dev ifb4eth0 handle ffff: ingress
217 +#Put CAKE qdisc on it
219 +tc qdisc add dev ifb4eth0 root cake bandwidth 40mbit
223 +ip link set dev ifb4eth0 up
225 +#Add 2 actions, ctinfo to restore dscp & mirred to redirect the packets to IFB
227 +tc filter add dev eth0 parent ffff: protocol all prio 10 u32 \\
228 + match u32 0 0 flowid 1:1 action \\
229 + ctinfo dscp 0xfc000000 0x01000000 \\
230 + mirred egress redirect dev ifb4eth0
232 +tc -s qdisc show dev eth0 ingress
234 + filter parent ffff: protocol all pref 10 u32 chain 0
235 + filter parent ffff: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1
236 + filter parent ffff: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw
237 + match 00000000/00000000 at 0
238 + action order 1: ctinfo zone 0 pipe
239 + index 2 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 72 sec used 0 sec DSCP set 1333 error 0 CPMARK set 0
241 + Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0)
242 + backlog 0b 0p requeues 0
244 + action order 2: mirred (Egress Redirect to device ifb4eth0) stolen
245 + index 1 ref 1 bind 1 installed 72 sec used 0 sec
247 + Sent 658484 bytes 1833 pkt (dropped 0, overlimits 0 requeues 0)
248 + backlog 0b 0p requeues 0
252 +Example showing conditional restoration of DSCP on egress
254 +This may appear nonsensical since iptables marking of egress packets is easy
255 +to achieve, however the iptables flow classification rules may be extensive
256 +and so some sort of set once and forget may be useful especially on cpu
257 +constrained devices.
261 +# Send unmarked connections to a marking chain which needs to store a DSCP
262 +and set statemask bit in the connmark
264 +iptables -t mangle -A POSTROUTING -o eth0 -m connmark \\
265 + --mark 0x00000000/0x01000000 -g CLASS_MARKING_CHAIN
267 +# Apply marked DSCP to the packets
269 +tc filter add dev eth0 protocol all prio 10 u32 \\
270 + match u32 0 0 flowid 1:1 action \\
271 + ctinfo dscp 0xfc000000 0x01000000
273 +tc -s filter show dev eth0
274 + filter parent 800e: protocol all pref 10 u32 chain 0
275 + filter parent 800e: protocol all pref 10 u32 chain 0 fh 800: ht divisor 1
276 + filter parent 800e: protocol all pref 10 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1 not_in_hw
277 + match 00000000/00000000 at 0
278 + action order 1: ctinfo zone 0 pipe
279 + index 1 ref 1 bind 1 dscp 0xfc000000 0x01000000 installed 7414 sec used 0 sec DSCP set 53404 error 0 CPMARK set 0
281 + Sent 32890260 bytes 120441 pkt (dropped 0, overlimits 0 requeues 0)
282 + backlog 0b 0p requeues 0
290 +ctinfo was written by Kevin Darbyshire-Bryant.
293 @@ -48,6 +48,7 @@ TCMODULES += m_csum.o
294 TCMODULES += m_simple.o
295 TCMODULES += m_vlan.o
296 TCMODULES += m_connmark.o
297 +TCMODULES += m_ctinfo.o
299 TCMODULES += m_tunnel_key.o
300 TCMODULES += m_sample.o
304 +/* SPDX-License-Identifier: GPL-2.0 */
306 + * m_ctinfo.c netfilter ctinfo mark action
308 + * Copyright (c) 2019 Kevin Darbyshire-Bryant <ldir@darbyshire-bryant.me.uk>
316 +#include "tc_util.h"
317 +#include <linux/tc_act/tc_ctinfo.h>
323 + "Usage: ... ctinfo [dscp mask [statemask]] [cpmark [mask]] [zone ZONE] [CONTROL] [index <INDEX>]\n"
325 + "\tdscp MASK bitmask location of stored DSCP\n"
326 + "\t STATEMASK bitmask to determine conditional restoring\n"
327 + "\tcpmark MASK mask applied to mark on restoration\n"
328 + "\tZONE is the conntrack zone\n"
329 + "\tCONTROL := reclassify | pipe | drop | continue | ok |\n"
330 + "\t goto chain <CHAIN_INDEX>\n");
341 +parse_ctinfo(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
342 + struct nlmsghdr *n)
344 + unsigned int cpmarkmask = 0, dscpmask = 0, dscpstatemask = 0;
345 + struct tc_ctinfo sel = {};
346 + unsigned short zone = 0;
347 + char **argv = *argv_p;
348 + struct rtattr *tail;
349 + int argc = *argc_p;
354 + if (matches(*argv, "ctinfo") == 0) {
357 + } else if (matches(*argv, "help") == 0) {
371 + if (matches(*argv, "dscp") == 0) {
373 + if (get_u32(&dscpmask, *argv, 0)) {
375 + "ctinfo: Illegal dscp \"mask\"\n");
378 + if (NEXT_ARG_OK()) {
380 + if (!get_u32(&dscpstatemask, *argv, 0))
381 + NEXT_ARG_FWD(); /* was a statemask */
388 + /* cpmark has optional mask parameter, so the next arg might not */
389 + /* exist, or it might be the next option, or it may actually be a */
392 + if (matches(*argv, "cpmark") == 0) {
394 + if (NEXT_ARG_OK()) {
396 + if (!get_u32(&cpmarkmask, *argv, 0))
397 + NEXT_ARG_FWD(); /* was a mask */
405 + if (matches(*argv, "zone") == 0) {
407 + if (get_u16(&zone, *argv, 10)) {
408 + fprintf(stderr, "ctinfo: Illegal \"zone\"\n");
415 + parse_action_control_dflt(&argc, &argv, &sel.action,
416 + false, TC_ACT_PIPE);
419 + if (matches(*argv, "index") == 0) {
421 + if (get_u32(&sel.index, *argv, 10)) {
422 + fprintf(stderr, "ctinfo: Illegal \"index\"\n");
429 + if (dscpmask & dscpstatemask) {
431 + "ctinfo: dscp mask & statemask must NOT overlap\n");
436 + if (i && ((~0 & (dscpmask >> (i - 1))) != 0x3f)) {
438 + "ctinfo: dscp mask must be 6 contiguous bits long\n");
442 + tail = addattr_nest(n, MAX_MSG, tca_id);
443 + addattr_l(n, MAX_MSG, TCA_CTINFO_ACT, &sel, sizeof(sel));
444 + addattr16(n, MAX_MSG, TCA_CTINFO_ZONE, zone);
447 + addattr32(n, MAX_MSG,
448 + TCA_CTINFO_PARMS_DSCP_MASK, dscpmask);
451 + addattr32(n, MAX_MSG,
452 + TCA_CTINFO_PARMS_DSCP_STATEMASK, dscpstatemask);
455 + addattr32(n, MAX_MSG,
456 + TCA_CTINFO_PARMS_CPMARK_MASK, cpmarkmask);
458 + addattr_nest_end(n, tail);
465 +static void print_ctinfo_stats(FILE *f, struct rtattr *tb[TCA_CTINFO_MAX + 1])
469 + if (tb[TCA_CTINFO_TM]) {
470 + tm = RTA_DATA(tb[TCA_CTINFO_TM]);
475 + if (tb[TCA_CTINFO_STATS_DSCP_SET])
476 + print_lluint(PRINT_ANY, "dscpset", " DSCP set %llu",
477 + rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_SET]));
478 + if (tb[TCA_CTINFO_STATS_DSCP_ERROR])
479 + print_lluint(PRINT_ANY, "dscperror", " error %llu",
480 + rta_getattr_u64(tb[TCA_CTINFO_STATS_DSCP_ERROR]));
482 + if (tb[TCA_CTINFO_STATS_CPMARK_SET])
483 + print_lluint(PRINT_ANY, "cpmarkset", " CPMARK set %llu",
484 + rta_getattr_u64(tb[TCA_CTINFO_STATS_CPMARK_SET]));
487 +static int print_ctinfo(struct action_util *au, FILE *f, struct rtattr *arg)
489 + unsigned int cpmarkmask = ~0, dscpmask = 0, dscpstatemask = 0;
490 + struct rtattr *tb[TCA_CTINFO_MAX + 1];
491 + unsigned short zone = 0;
492 + struct tc_ctinfo *ci;
497 + parse_rtattr_nested(tb, TCA_CTINFO_MAX, arg);
498 + if (!tb[TCA_CTINFO_ACT]) {
499 + print_string(PRINT_FP, NULL, "%s",
500 + "[NULL ctinfo action parameters]");
504 + ci = RTA_DATA(tb[TCA_CTINFO_ACT]);
506 + if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
507 + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_MASK]) >=
509 + dscpmask = rta_getattr_u32(
510 + tb[TCA_CTINFO_PARMS_DSCP_MASK]);
512 + print_string(PRINT_FP, NULL, "%s",
513 + "[invalid dscp mask parameter]");
516 + if (tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) {
517 + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]) >=
519 + dscpstatemask = rta_getattr_u32(
520 + tb[TCA_CTINFO_PARMS_DSCP_STATEMASK]);
522 + print_string(PRINT_FP, NULL, "%s",
523 + "[invalid dscp statemask parameter]");
526 + if (tb[TCA_CTINFO_PARMS_CPMARK_MASK]) {
527 + if (RTA_PAYLOAD(tb[TCA_CTINFO_PARMS_CPMARK_MASK]) >=
529 + cpmarkmask = rta_getattr_u32(
530 + tb[TCA_CTINFO_PARMS_CPMARK_MASK]);
532 + print_string(PRINT_FP, NULL, "%s",
533 + "[invalid cpmark mask parameter]");
536 + if (tb[TCA_CTINFO_ZONE] && RTA_PAYLOAD(tb[TCA_CTINFO_ZONE]) >=
538 + zone = rta_getattr_u16(tb[TCA_CTINFO_ZONE]);
540 + print_string(PRINT_ANY, "kind", "%s ", "ctinfo");
541 + print_hu(PRINT_ANY, "zone", "zone %u", zone);
542 + print_action_control(f, " ", ci->action, "");
544 + print_string(PRINT_FP, NULL, "%s", _SL_);
545 + print_uint(PRINT_ANY, "index", "\t index %u", ci->index);
546 + print_int(PRINT_ANY, "ref", " ref %d", ci->refcnt);
547 + print_int(PRINT_ANY, "bind", " bind %d", ci->bindcnt);
549 + if (tb[TCA_CTINFO_PARMS_DSCP_MASK]) {
550 + print_0xhex(PRINT_ANY, "dscpmask", " dscp %#010llx", dscpmask);
551 + print_0xhex(PRINT_ANY, "dscpstatemask", " %#010llx",
555 + if (tb[TCA_CTINFO_PARMS_CPMARK_MASK])
556 + print_0xhex(PRINT_ANY, "cpmark", " cpmark %#010llx",
560 + print_ctinfo_stats(f, tb);
562 + print_string(PRINT_FP, NULL, "%s", _SL_);
567 +struct action_util ctinfo_action_util = {
569 + .parse_aopt = parse_ctinfo,
570 + .print_aopt = print_ctinfo,