1 --- a/include/linux/pkt_sched.h
2 +++ b/include/linux/pkt_sched.h
3 @@ -850,4 +850,59 @@ struct tc_pie_xstats {
4 __u32 maxq; /* maximum queue size */
5 __u32 ecn_mark; /* packets marked with ecn*/
12 + TCA_CAKE_DIFFSERV_MODE,
25 +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
27 +struct tc_cake_traffic_stats {
33 +#define TC_CAKE_MAX_TINS (8)
34 +struct tc_cake_xstats {
35 + __u16 version; /* == 4, increments when struct extended */
36 + __u8 max_tins; /* == TC_CAKE_MAX_TINS */
37 + __u8 tin_cnt; /* <= TC_CAKE_MAX_TINS */
39 + __u32 threshold_rate [TC_CAKE_MAX_TINS];
40 + __u32 target_us [TC_CAKE_MAX_TINS];
41 + struct tc_cake_traffic_stats sent [TC_CAKE_MAX_TINS];
42 + struct tc_cake_traffic_stats dropped [TC_CAKE_MAX_TINS];
43 + struct tc_cake_traffic_stats ecn_marked[TC_CAKE_MAX_TINS];
44 + struct tc_cake_traffic_stats backlog [TC_CAKE_MAX_TINS];
45 + __u32 interval_us [TC_CAKE_MAX_TINS];
46 + __u32 way_indirect_hits[TC_CAKE_MAX_TINS];
47 + __u32 way_misses [TC_CAKE_MAX_TINS];
48 + __u32 way_collisions [TC_CAKE_MAX_TINS];
49 + __u32 peak_delay_us [TC_CAKE_MAX_TINS]; /* ~= delay to bulk flows */
50 + __u32 avge_delay_us [TC_CAKE_MAX_TINS];
51 + __u32 base_delay_us [TC_CAKE_MAX_TINS]; /* ~= delay to sparse flows */
52 + __u16 sparse_flows [TC_CAKE_MAX_TINS];
53 + __u16 bulk_flows [TC_CAKE_MAX_TINS];
54 + __u16 unresponse_flows [TC_CAKE_MAX_TINS]; /* v4 - was u32 last_len */
55 + __u16 spare [TC_CAKE_MAX_TINS]; /* v4 - split last_len */
56 + __u32 max_skblen [TC_CAKE_MAX_TINS];
57 + __u32 capacity_estimate; /* version 2 */
58 + __u32 memory_limit; /* version 3 */
59 + __u32 memory_used; /* version 3 */
65 @@ -63,6 +63,7 @@ TCMODULES += q_codel.o
66 TCMODULES += q_fq_codel.o
69 +TCMODULES += q_cake.o
77 + * Common Applications Kept Enhanced -- CAKE
79 + * Copyright (C) 2014-2015 Jonathan Morton <chromatix99@gmail.com>
81 + * Redistribution and use in source and binary forms, with or without
82 + * modification, are permitted provided that the following conditions
84 + * 1. Redistributions of source code must retain the above copyright
85 + * notice, this list of conditions, and the following disclaimer,
86 + * without modification.
87 + * 2. Redistributions in binary form must reproduce the above copyright
88 + * notice, this list of conditions and the following disclaimer in the
89 + * documentation and/or other materials provided with the distribution.
90 + * 3. The names of the authors may not be used to endorse or promote products
91 + * derived from this software without specific prior written permission.
93 + * Alternatively, provided that this notice is retained in full, this
94 + * software may be distributed under the terms of the GNU General
95 + * Public License ("GPL") version 2, in which case the provisions of the
96 + * GPL apply INSTEAD OF those given above.
98 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
99 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
100 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
101 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
102 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
103 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
104 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
105 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
106 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
107 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
108 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
119 +#include <sys/socket.h>
120 +#include <netinet/in.h>
121 +#include <arpa/inet.h>
125 +#include "tc_util.h"
127 +static void explain(void)
129 + fprintf(stderr, "Usage: ... cake [ bandwidth RATE | unlimited* | autorate_ingress ]\n"
130 + " [ rtt TIME | datacentre | lan | metro | regional | internet* | oceanic | satellite | interplanetary ]\n"
131 + " [ besteffort | precedence | diffserv8 | diffserv4 | diffserv-llt | diffserv3* ]\n"
132 + " [ flowblind | srchost | dsthost | hosts | flows | dual-srchost | dual-dsthost | triple-isolate* ] [ nat | nonat* ]\n"
133 + " [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n"
134 + " [ wash | nowash* ]\n"
135 + " [ memlimit LIMIT ]\n"
136 + " (* marks defaults)\n");
139 +static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv,
140 + struct nlmsghdr *n)
143 + unsigned bandwidth = 0;
144 + unsigned interval = 0;
145 + unsigned target = 0;
146 + unsigned diffserv = 0;
147 + unsigned memlimit = 0;
149 + bool overhead_set = false;
150 + bool overhead_override = false;
156 + struct rtattr *tail;
159 + if (strcmp(*argv, "bandwidth") == 0) {
161 + if (get_rate(&bandwidth, *argv)) {
162 + fprintf(stderr, "Illegal \"bandwidth\"\n");
167 + } else if (strcmp(*argv, "unlimited") == 0) {
171 + } else if (strcmp(*argv, "autorate_ingress") == 0) {
174 + } else if (strcmp(*argv, "rtt") == 0) {
176 + if (get_time(&interval, *argv)) {
177 + fprintf(stderr, "Illegal \"rtt\"\n");
180 + target = interval / 20;
183 + } else if (strcmp(*argv, "datacentre") == 0) {
186 + } else if (strcmp(*argv, "lan") == 0) {
189 + } else if (strcmp(*argv, "metro") == 0) {
192 + } else if (strcmp(*argv, "regional") == 0) {
195 + } else if (strcmp(*argv, "internet") == 0) {
198 + } else if (strcmp(*argv, "oceanic") == 0) {
201 + } else if (strcmp(*argv, "satellite") == 0) {
202 + interval = 1000000;
204 + } else if (strcmp(*argv, "interplanetary") == 0) {
205 + interval = 3600000000U;
208 + } else if (strcmp(*argv, "besteffort") == 0) {
210 + } else if (strcmp(*argv, "precedence") == 0) {
212 + } else if (strcmp(*argv, "diffserv8") == 0) {
214 + } else if (strcmp(*argv, "diffserv4") == 0) {
216 + } else if (strcmp(*argv, "diffserv") == 0) {
218 + } else if (strcmp(*argv, "diffserv-llt") == 0) {
220 + } else if (strcmp(*argv, "diffserv3") == 0) {
223 + } else if (strcmp(*argv, "nowash") == 0) {
225 + } else if (strcmp(*argv, "wash") == 0) {
228 + } else if (strcmp(*argv, "flowblind") == 0) {
230 + } else if (strcmp(*argv, "srchost") == 0) {
232 + } else if (strcmp(*argv, "dsthost") == 0) {
234 + } else if (strcmp(*argv, "hosts") == 0) {
236 + } else if (strcmp(*argv, "flows") == 0) {
238 + } else if (strcmp(*argv, "dual-srchost") == 0) {
240 + } else if (strcmp(*argv, "dual-dsthost") == 0) {
242 + } else if (strcmp(*argv, "triple-isolate") == 0) {
245 + } else if (strcmp(*argv, "nat") == 0) {
247 + } else if (strcmp(*argv, "nonat") == 0) {
250 + } else if (strcmp(*argv, "ptm") == 0) {
252 + } else if (strcmp(*argv, "atm") == 0) {
254 + } else if (strcmp(*argv, "noatm") == 0) {
257 + } else if (strcmp(*argv, "raw") == 0) {
260 + overhead_set = true;
261 + overhead_override = true;
262 + } else if (strcmp(*argv, "conservative") == 0) {
264 + * Deliberately over-estimate overhead:
265 + * one whole ATM cell plus ATM framing.
266 + * A safe choice if the actual overhead is unknown.
270 + overhead_set = true;
272 + /* Various ADSL framing schemes, all over ATM cells */
273 + } else if (strcmp(*argv, "ipoa-vcmux") == 0) {
276 + overhead_set = true;
277 + } else if (strcmp(*argv, "ipoa-llcsnap") == 0) {
280 + overhead_set = true;
281 + } else if (strcmp(*argv, "bridged-vcmux") == 0) {
284 + overhead_set = true;
285 + } else if (strcmp(*argv, "bridged-llcsnap") == 0) {
288 + overhead_set = true;
289 + } else if (strcmp(*argv, "pppoa-vcmux") == 0) {
292 + overhead_set = true;
293 + } else if (strcmp(*argv, "pppoa-llc") == 0) {
296 + overhead_set = true;
297 + } else if (strcmp(*argv, "pppoe-vcmux") == 0) {
300 + overhead_set = true;
301 + } else if (strcmp(*argv, "pppoe-llcsnap") == 0) {
304 + overhead_set = true;
306 + /* Typical VDSL2 framing schemes, both over PTM */
307 + /* PTM has 64b/65b coding which absorbs some bandwidth */
308 + } else if (strcmp(*argv, "pppoe-ptm") == 0) {
311 + overhead_set = true;
312 + } else if (strcmp(*argv, "bridged-ptm") == 0) {
315 + overhead_set = true;
317 + } else if (strcmp(*argv, "via-ethernet") == 0) {
319 + * We used to use this flag to manually compensate for
320 + * Linux including the Ethernet header on Ethernet-type
321 + * interfaces, but not on IP-type interfaces.
323 + * It is no longer needed, because Cake now adjusts for
324 + * that automatically, and is thus ignored.
326 + * It would be deleted entirely, but it appears in the
327 + * stats output when the automatic compensation is active.
330 + } else if (strcmp(*argv, "ethernet") == 0) {
331 + /* ethernet pre-amble & interframe gap & FCS
332 + * you may need to add vlan tag */
334 + overhead_set = true;
336 + /* Additional Ethernet-related overhead used by some ISPs */
337 + } else if (strcmp(*argv, "ether-vlan") == 0) {
338 + /* 802.1q VLAN tag - may be repeated */
340 + overhead_set = true;
343 + * DOCSIS cable shapers account for Ethernet frame with FCS,
344 + * but not interframe gap nor preamble.
346 + } else if (strcmp(*argv, "docsis") == 0) {
349 + overhead_set = true;
351 + } else if (strcmp(*argv, "overhead") == 0) {
354 + overhead = strtol(*argv, &p, 10);
355 + if(!p || *p || !*argv || overhead < -64 || overhead > 256) {
356 + fprintf(stderr, "Illegal \"overhead\", valid range is -64 to 256\\n");
359 + overhead_set = true;
361 + } else if (strcmp(*argv, "memlimit") == 0) {
363 + if(get_size(&memlimit, *argv)) {
364 + fprintf(stderr, "Illegal value for \"memlimit\": \"%s\"\n", *argv);
368 + } else if (strcmp(*argv, "help") == 0) {
372 + fprintf(stderr, "What is \"%s\"?\n", *argv);
379 + tail = NLMSG_TAIL(n);
380 + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
381 + if (bandwidth || unlimited)
382 + addattr_l(n, 1024, TCA_CAKE_BASE_RATE, &bandwidth, sizeof(bandwidth));
384 + addattr_l(n, 1024, TCA_CAKE_DIFFSERV_MODE, &diffserv, sizeof(diffserv));
386 + addattr_l(n, 1024, TCA_CAKE_ATM, &atm, sizeof(atm));
387 + if (flowmode != -1)
388 + addattr_l(n, 1024, TCA_CAKE_FLOW_MODE, &flowmode, sizeof(flowmode));
390 + addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead));
391 + if (overhead_override) {
393 + addattr_l(n, 1024, TCA_CAKE_ETHERNET, &zero, sizeof(zero));
396 + addattr_l(n, 1024, TCA_CAKE_RTT, &interval, sizeof(interval));
398 + addattr_l(n, 1024, TCA_CAKE_TARGET, &target, sizeof(target));
399 + if (autorate != -1)
400 + addattr_l(n, 1024, TCA_CAKE_AUTORATE, &autorate, sizeof(autorate));
402 + addattr_l(n, 1024, TCA_CAKE_MEMORY, &memlimit, sizeof(memlimit));
404 + addattr_l(n, 1024, TCA_CAKE_NAT, &nat, sizeof(nat));
406 + addattr_l(n, 1024, TCA_CAKE_WASH, &wash, sizeof(wash));
408 + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
413 +static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
415 + struct rtattr *tb[TCA_CAKE_MAX + 1];
416 + unsigned bandwidth = 0;
417 + unsigned diffserv = 0;
418 + unsigned flowmode = 0;
419 + unsigned interval = 0;
420 + unsigned memlimit = 0;
433 + parse_rtattr_nested(tb, TCA_CAKE_MAX, opt);
435 + if (tb[TCA_CAKE_BASE_RATE] &&
436 + RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE]) >= sizeof(__u32)) {
437 + bandwidth = rta_getattr_u32(tb[TCA_CAKE_BASE_RATE]);
439 + fprintf(f, "bandwidth %s ", sprint_rate(bandwidth, b1));
441 + fprintf(f, "unlimited ");
443 + if (tb[TCA_CAKE_AUTORATE] &&
444 + RTA_PAYLOAD(tb[TCA_CAKE_AUTORATE]) >= sizeof(__u32)) {
445 + autorate = rta_getattr_u32(tb[TCA_CAKE_AUTORATE]);
447 + fprintf(f, "autorate_ingress ");
449 + fprintf(f, "(?autorate?) ");
451 + if (tb[TCA_CAKE_DIFFSERV_MODE] &&
452 + RTA_PAYLOAD(tb[TCA_CAKE_DIFFSERV_MODE]) >= sizeof(__u32)) {
453 + diffserv = rta_getattr_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
456 + fprintf(f, "besteffort ");
459 + fprintf(f, "precedence ");
462 + fprintf(f, "diffserv8 ");
465 + fprintf(f, "diffserv4 ");
468 + fprintf(f, "diffserv-llt ");
471 + fprintf(f, "diffserv3 ");
474 + fprintf(f, "(?diffserv?) ");
478 + if (tb[TCA_CAKE_FLOW_MODE] &&
479 + RTA_PAYLOAD(tb[TCA_CAKE_FLOW_MODE]) >= sizeof(__u32)) {
480 + flowmode = rta_getattr_u32(tb[TCA_CAKE_FLOW_MODE]);
481 + nat = !!(flowmode & 64);
485 + fprintf(f, "flowblind ");
488 + fprintf(f, "srchost ");
491 + fprintf(f, "dsthost ");
494 + fprintf(f, "hosts ");
497 + fprintf(f, "flows ");
500 + fprintf(f, "dual-srchost ");
503 + fprintf(f, "dual-dsthost ");
506 + fprintf(f, "triple-isolate ");
509 + fprintf(f, "(?flowmode?) ");
514 + fprintf(f, "nat ");
516 + if (tb[TCA_CAKE_WASH] &&
517 + RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) {
518 + wash = rta_getattr_u32(tb[TCA_CAKE_WASH]);
520 + if (tb[TCA_CAKE_ATM] &&
521 + RTA_PAYLOAD(tb[TCA_CAKE_ATM]) >= sizeof(__u32)) {
522 + atm = rta_getattr_u32(tb[TCA_CAKE_ATM]);
524 + if (tb[TCA_CAKE_OVERHEAD] &&
525 + RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) {
526 + overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]);
528 + if (tb[TCA_CAKE_ETHERNET] &&
529 + RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) {
530 + ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]);
532 + if (tb[TCA_CAKE_RTT] &&
533 + RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) {
534 + interval = rta_getattr_u32(tb[TCA_CAKE_RTT]);
538 + fprintf(f,"wash ");
541 + fprintf(f, "rtt %s ", sprint_time(interval, b2));
543 + if (!atm && overhead == ethernet) {
544 + fprintf(f, "raw ");
547 + fprintf(f, "atm ");
549 + fprintf(f, "ptm ");
551 + fprintf(f, "noatm ");
553 + fprintf(f, "overhead %d ", overhead);
555 + // This is actually the *amount* of automatic compensation, but we only report
556 + // its presence as a boolean for now.
558 + fprintf(f, "via-ethernet ");
562 + fprintf(f, "memlimit %s", sprint_size(memlimit, b1));
567 +static int cake_print_xstats(struct qdisc_util *qu, FILE *f,
568 + struct rtattr *xstats)
570 + /* fq_codel stats format borrowed */
571 + struct tc_fq_codel_xstats *st;
572 + struct tc_cake_xstats *stnc;
576 + if (xstats == NULL)
579 + if (RTA_PAYLOAD(xstats) < sizeof(st->type))
582 + st = RTA_DATA(xstats);
583 + stnc = RTA_DATA(xstats);
585 + if (st->type == TCA_FQ_CODEL_XSTATS_QDISC && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
586 + fprintf(f, " maxpacket %u drop_overlimit %u new_flow_count %u ecn_mark %u",
587 + st->qdisc_stats.maxpacket,
588 + st->qdisc_stats.drop_overlimit,
589 + st->qdisc_stats.new_flow_count,
590 + st->qdisc_stats.ecn_mark);
591 + fprintf(f, "\n new_flows_len %u old_flows_len %u",
592 + st->qdisc_stats.new_flows_len,
593 + st->qdisc_stats.old_flows_len);
594 + } else if (st->type == TCA_FQ_CODEL_XSTATS_CLASS && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
595 + fprintf(f, " deficit %d count %u lastcount %u ldelay %s",
596 + st->class_stats.deficit,
597 + st->class_stats.count,
598 + st->class_stats.lastcount,
599 + sprint_time(st->class_stats.ldelay, b1));
600 + if (st->class_stats.dropping) {
601 + fprintf(f, " dropping");
602 + if (st->class_stats.drop_next < 0)
603 + fprintf(f, " drop_next -%s",
604 + sprint_time(-st->class_stats.drop_next, b1));
606 + fprintf(f, " drop_next %s",
607 + sprint_time(st->class_stats.drop_next, b1));
609 + } else if (stnc->version >= 1 && stnc->version < 0xFF
610 + && stnc->max_tins == TC_CAKE_MAX_TINS
611 + && RTA_PAYLOAD(xstats) >= offsetof(struct tc_cake_xstats, capacity_estimate))
615 + if(stnc->version >= 3)
616 + fprintf(f, " memory used: %s of %s\n", sprint_size(stnc->memory_used, b1), sprint_size(stnc->memory_limit, b2));
618 + if(stnc->version >= 2)
619 + fprintf(f, " capacity estimate: %s\n", sprint_rate(stnc->capacity_estimate, b1));
621 + switch(stnc->tin_cnt) {
623 + fprintf(f, " Bulk Best Effort Voice\n");
627 + fprintf(f, " Bulk Best Effort Video Voice\n");
631 + fprintf(f, " Low Loss Best Effort Low Delay Bulk Net Control\n");
636 + for(i=0; i < stnc->tin_cnt; i++)
637 + fprintf(f, " Tin %u", i);
641 + fprintf(f, " thresh ");
642 + for(i=0; i < stnc->tin_cnt; i++)
643 + fprintf(f, "%12s", sprint_rate(stnc->threshold_rate[i], b1));
646 + fprintf(f, " target ");
647 + for(i=0; i < stnc->tin_cnt; i++)
648 + fprintf(f, "%12s", sprint_time(stnc->target_us[i], b1));
651 + fprintf(f, " interval");
652 + for(i=0; i < stnc->tin_cnt; i++)
653 + fprintf(f, "%12s", sprint_time(stnc->interval_us[i], b1));
656 + fprintf(f, " pk_delay");
657 + for(i=0; i < stnc->tin_cnt; i++)
658 + fprintf(f, "%12s", sprint_time(stnc->peak_delay_us[i], b1));
661 + fprintf(f, " av_delay");
662 + for(i=0; i < stnc->tin_cnt; i++)
663 + fprintf(f, "%12s", sprint_time(stnc->avge_delay_us[i], b1));
666 + fprintf(f, " sp_delay");
667 + for(i=0; i < stnc->tin_cnt; i++)
668 + fprintf(f, "%12s", sprint_time(stnc->base_delay_us[i], b1));
671 + fprintf(f, " pkts ");
672 + for(i=0; i < stnc->tin_cnt; i++)
673 + fprintf(f, "%12u", stnc->sent[i].packets);
676 + fprintf(f, " bytes ");
677 + for(i=0; i < stnc->tin_cnt; i++)
678 + fprintf(f, "%12llu", stnc->sent[i].bytes);
681 + fprintf(f, " way_inds");
682 + for(i=0; i < stnc->tin_cnt; i++)
683 + fprintf(f, "%12u", stnc->way_indirect_hits[i]);
686 + fprintf(f, " way_miss");
687 + for(i=0; i < stnc->tin_cnt; i++)
688 + fprintf(f, "%12u", stnc->way_misses[i]);
691 + fprintf(f, " way_cols");
692 + for(i=0; i < stnc->tin_cnt; i++)
693 + fprintf(f, "%12u", stnc->way_collisions[i]);
696 + fprintf(f, " drops ");
697 + for(i=0; i < stnc->tin_cnt; i++)
698 + fprintf(f, "%12u", stnc->dropped[i].packets);
701 + fprintf(f, " marks ");
702 + for(i=0; i < stnc->tin_cnt; i++)
703 + fprintf(f, "%12u", stnc->ecn_marked[i].packets);
706 + fprintf(f, " sp_flows");
707 + for(i=0; i < stnc->tin_cnt; i++)
708 + fprintf(f, "%12u", stnc->sparse_flows[i]);
711 + fprintf(f, " bk_flows");
712 + for(i=0; i < stnc->tin_cnt; i++)
713 + fprintf(f, "%12u", stnc->bulk_flows[i]);
716 + if(stnc->version >= 4) {
717 + fprintf(f, " un_flows");
718 + for(i=0; i < stnc->tin_cnt; i++)
719 + fprintf(f, "%12u", stnc->unresponse_flows[i]);
723 + fprintf(f, " max_len ");
724 + for(i=0; i < stnc->tin_cnt; i++)
725 + fprintf(f, "%12u", stnc->max_skblen[i]);
733 +struct qdisc_util cake_qdisc_util = {
735 + .parse_qopt = cake_parse_opt,
736 + .print_qopt = cake_print_opt,
737 + .print_xstats = cake_print_xstats,