1 --- a/include/linux/pkt_sched.h
2 +++ b/include/linux/pkt_sched.h
3 @@ -850,4 +850,57 @@ struct tc_pie_xstats {
4 __u32 maxq; /* maximum queue size */
5 __u32 ecn_mark; /* packets marked with ecn*/
12 + TCA_CAKE_DIFFSERV_MODE,
23 +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
25 +struct tc_cake_traffic_stats {
31 +#define TC_CAKE_MAX_TINS (8)
32 +struct tc_cake_xstats {
33 + __u16 version; /* == 4, increments when struct extended */
34 + __u8 max_tins; /* == TC_CAKE_MAX_TINS */
35 + __u8 tin_cnt; /* <= TC_CAKE_MAX_TINS */
37 + __u32 threshold_rate [TC_CAKE_MAX_TINS];
38 + __u32 target_us [TC_CAKE_MAX_TINS];
39 + struct tc_cake_traffic_stats sent [TC_CAKE_MAX_TINS];
40 + struct tc_cake_traffic_stats dropped [TC_CAKE_MAX_TINS];
41 + struct tc_cake_traffic_stats ecn_marked[TC_CAKE_MAX_TINS];
42 + struct tc_cake_traffic_stats backlog [TC_CAKE_MAX_TINS];
43 + __u32 interval_us [TC_CAKE_MAX_TINS];
44 + __u32 way_indirect_hits[TC_CAKE_MAX_TINS];
45 + __u32 way_misses [TC_CAKE_MAX_TINS];
46 + __u32 way_collisions [TC_CAKE_MAX_TINS];
47 + __u32 peak_delay_us [TC_CAKE_MAX_TINS]; /* ~= delay to bulk flows */
48 + __u32 avge_delay_us [TC_CAKE_MAX_TINS];
49 + __u32 base_delay_us [TC_CAKE_MAX_TINS]; /* ~= delay to sparse flows */
50 + __u16 sparse_flows [TC_CAKE_MAX_TINS];
51 + __u16 bulk_flows [TC_CAKE_MAX_TINS];
52 + __u16 unresponse_flows [TC_CAKE_MAX_TINS]; /* v4 - was u32 last_len */
53 + __u16 spare [TC_CAKE_MAX_TINS]; /* v4 - split last_len */
54 + __u32 max_skblen [TC_CAKE_MAX_TINS];
55 + __u32 capacity_estimate; /* version 2 */
56 + __u32 memory_limit; /* version 3 */
57 + __u32 memory_used; /* version 3 */
63 @@ -63,6 +63,7 @@ TCMODULES += q_codel.o
64 TCMODULES += q_fq_codel.o
67 +TCMODULES += q_cake.o
75 + * Common Applications Kept Enhanced -- CAKE
77 + * Copyright (C) 2014-2015 Jonathan Morton <chromatix99@gmail.com>
79 + * Redistribution and use in source and binary forms, with or without
80 + * modification, are permitted provided that the following conditions
82 + * 1. Redistributions of source code must retain the above copyright
83 + * notice, this list of conditions, and the following disclaimer,
84 + * without modification.
85 + * 2. Redistributions in binary form must reproduce the above copyright
86 + * notice, this list of conditions and the following disclaimer in the
87 + * documentation and/or other materials provided with the distribution.
88 + * 3. The names of the authors may not be used to endorse or promote products
89 + * derived from this software without specific prior written permission.
91 + * Alternatively, provided that this notice is retained in full, this
92 + * software may be distributed under the terms of the GNU General
93 + * Public License ("GPL") version 2, in which case the provisions of the
94 + * GPL apply INSTEAD OF those given above.
96 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
97 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
98 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
99 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
100 + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
101 + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
102 + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
103 + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
104 + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
105 + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
106 + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
117 +#include <sys/socket.h>
118 +#include <netinet/in.h>
119 +#include <arpa/inet.h>
123 +#include "tc_util.h"
125 +static void explain(void)
127 + fprintf(stderr, "Usage: ... cake [ bandwidth RATE | unlimited* | autorate_ingress ]\n"
128 + " [ rtt TIME | datacentre | lan | metro | regional | internet* | oceanic | satellite | interplanetary ]\n"
129 + " [ besteffort | precedence | diffserv8 | diffserv4* ]\n"
130 + " [ flowblind | srchost | dsthost | hosts | flows* | dual-srchost | dual-dsthost | triple-isolate ] [ nat | nonat* ]\n"
131 + " [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n"
132 + " [ memlimit LIMIT ]\n"
133 + " (* marks defaults)\n");
136 +static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv,
137 + struct nlmsghdr *n)
140 + unsigned bandwidth = 0;
141 + unsigned interval = 0;
142 + unsigned target = 0;
143 + unsigned diffserv = 0;
144 + unsigned memlimit = 0;
146 + bool overhead_set = false;
151 + struct rtattr *tail;
154 + if (strcmp(*argv, "bandwidth") == 0) {
156 + if (get_rate(&bandwidth, *argv)) {
157 + fprintf(stderr, "Illegal \"bandwidth\"\n");
162 + } else if (strcmp(*argv, "unlimited") == 0) {
166 + } else if (strcmp(*argv, "autorate_ingress") == 0) {
169 + } else if (strcmp(*argv, "rtt") == 0) {
171 + if (get_time(&interval, *argv)) {
172 + fprintf(stderr, "Illegal \"rtt\"\n");
175 + target = interval / 20;
178 + } else if (strcmp(*argv, "datacentre") == 0) {
181 + } else if (strcmp(*argv, "lan") == 0) {
184 + } else if (strcmp(*argv, "metro") == 0) {
187 + } else if (strcmp(*argv, "regional") == 0) {
190 + } else if (strcmp(*argv, "internet") == 0) {
193 + } else if (strcmp(*argv, "oceanic") == 0) {
196 + } else if (strcmp(*argv, "satellite") == 0) {
197 + interval = 1000000;
199 + } else if (strcmp(*argv, "interplanetary") == 0) {
200 + interval = 3600000000U;
203 + } else if (strcmp(*argv, "besteffort") == 0) {
205 + } else if (strcmp(*argv, "precedence") == 0) {
207 + } else if (strcmp(*argv, "diffserv8") == 0) {
209 + } else if (strcmp(*argv, "diffserv4") == 0) {
211 + } else if (strcmp(*argv, "diffserv") == 0) {
213 + } else if (strcmp(*argv, "diffserv-llt") == 0) {
216 + } else if (strcmp(*argv, "flowblind") == 0) {
218 + } else if (strcmp(*argv, "srchost") == 0) {
220 + } else if (strcmp(*argv, "dsthost") == 0) {
222 + } else if (strcmp(*argv, "hosts") == 0) {
224 + } else if (strcmp(*argv, "flows") == 0) {
226 + } else if (strcmp(*argv, "dual-srchost") == 0) {
228 + } else if (strcmp(*argv, "dual-dsthost") == 0) {
230 + } else if (strcmp(*argv, "triple-isolate") == 0) {
233 + } else if (strcmp(*argv, "nat") == 0) {
235 + } else if (strcmp(*argv, "nonat") == 0) {
238 + } else if (strcmp(*argv, "ptm") == 0) {
240 + } else if (strcmp(*argv, "atm") == 0) {
242 + } else if (strcmp(*argv, "noatm") == 0) {
245 + } else if (strcmp(*argv, "raw") == 0) {
248 + overhead_set = true;
249 + } else if (strcmp(*argv, "conservative") == 0) {
251 + * Deliberately over-estimate overhead:
252 + * one whole ATM cell plus ATM framing.
253 + * A safe choice if the actual overhead is unknown.
257 + overhead_set = true;
260 + * DOCSIS overhead figures courtesy of Greg White @ CableLabs.
261 + * The "-ip" versions include the Ethernet frame header, in case
262 + * you are shaping an IP interface instead of an Ethernet one.
264 + } else if (strcmp(*argv, "docsis-downstream-ip") == 0) {
267 + overhead_set = true;
268 + } else if (strcmp(*argv, "docsis-downstream") == 0) {
270 + overhead += 35 - 14;
271 + overhead_set = true;
272 + } else if (strcmp(*argv, "docsis-upstream-ip") == 0) {
275 + overhead_set = true;
276 + } else if (strcmp(*argv, "docsis-upstream") == 0) {
278 + overhead += 28 - 14;
279 + overhead_set = true;
281 + /* Various ADSL framing schemes, all over ATM cells */
282 + } else if (strcmp(*argv, "ipoa-vcmux") == 0) {
285 + overhead_set = true;
286 + } else if (strcmp(*argv, "ipoa-llcsnap") == 0) {
289 + overhead_set = true;
290 + } else if (strcmp(*argv, "bridged-vcmux") == 0) {
293 + overhead_set = true;
294 + } else if (strcmp(*argv, "bridged-llcsnap") == 0) {
297 + overhead_set = true;
298 + } else if (strcmp(*argv, "pppoa-vcmux") == 0) {
301 + overhead_set = true;
302 + } else if (strcmp(*argv, "pppoa-llc") == 0) {
305 + overhead_set = true;
306 + } else if (strcmp(*argv, "pppoe-vcmux") == 0) {
309 + overhead_set = true;
310 + } else if (strcmp(*argv, "pppoe-llcsnap") == 0) {
313 + overhead_set = true;
315 + /* Typical VDSL2 framing schemes, both over PTM */
316 + /* PTM has 64b/65b coding which absorbs some bandwidth */
317 + } else if (strcmp(*argv, "pppoe-ptm") == 0) {
320 + overhead_set = true;
321 + } else if (strcmp(*argv, "bridged-ptm") == 0) {
324 + overhead_set = true;
326 + } else if (strcmp(*argv, "via-ethernet") == 0) {
328 + * The above overheads are relative to an IP packet,
329 + * but Linux includes Ethernet framing overhead already
330 + * if we are shaping an Ethernet interface rather than
334 + overhead_set = true;
336 + /* Additional Ethernet-related overheads used by some ISPs */
337 + } else if (strcmp(*argv, "ether-phy") == 0) {
338 + /* ethernet pre-amble & interframe gap 20 bytes
339 + * Linux will have already accounted for MACs & frame type 14 bytes
340 + * you probably want to add an FCS as well*/
342 + overhead_set = true;
343 + } else if (strcmp(*argv, "ether-all") == 0) {
344 + /* ethernet pre-amble & interframe gap & FCS
345 + * Linux will have already accounted for MACs & frame type 14 bytes
346 + * you may need to add vlan tag*/
348 + overhead_set = true;
350 + } else if (strcmp(*argv, "ether-fcs") == 0) {
351 + /* Frame Check Sequence */
352 + /* we ignore the minimum frame size, because IP packets usually meet it */
354 + overhead_set = true;
355 + } else if (strcmp(*argv, "ether-vlan") == 0) {
356 + /* 802.1q VLAN tag - may be repeated */
358 + overhead_set = true;
360 + } else if (strcmp(*argv, "overhead") == 0) {
363 + overhead = strtol(*argv, &p, 10);
364 + if(!p || *p || !*argv || overhead < -64 || overhead > 256) {
365 + fprintf(stderr, "Illegal \"overhead\", valid range is -64 to 256\\n");
368 + overhead_set = true;
370 + } else if (strcmp(*argv, "memlimit") == 0) {
372 + if(get_size(&memlimit, *argv)) {
373 + fprintf(stderr, "Illegal value for \"memlimit\": \"%s\"\n", *argv);
377 + } else if (strcmp(*argv, "help") == 0) {
381 + fprintf(stderr, "What is \"%s\"?\n", *argv);
388 + tail = NLMSG_TAIL(n);
389 + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
390 + if (bandwidth || unlimited)
391 + addattr_l(n, 1024, TCA_CAKE_BASE_RATE, &bandwidth, sizeof(bandwidth));
393 + addattr_l(n, 1024, TCA_CAKE_DIFFSERV_MODE, &diffserv, sizeof(diffserv));
395 + addattr_l(n, 1024, TCA_CAKE_ATM, &atm, sizeof(atm));
396 + if (flowmode != -1)
397 + addattr_l(n, 1024, TCA_CAKE_FLOW_MODE, &flowmode, sizeof(flowmode));
399 + addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead));
401 + addattr_l(n, 1024, TCA_CAKE_RTT, &interval, sizeof(interval));
403 + addattr_l(n, 1024, TCA_CAKE_TARGET, &target, sizeof(target));
404 + if (autorate != -1)
405 + addattr_l(n, 1024, TCA_CAKE_AUTORATE, &autorate, sizeof(autorate));
407 + addattr_l(n, 1024, TCA_CAKE_MEMORY, &memlimit, sizeof(memlimit));
409 + addattr_l(n, 1024, TCA_CAKE_NAT, &nat, sizeof(nat));
411 + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
416 +static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
418 + struct rtattr *tb[TCA_CAKE_MAX + 1];
419 + unsigned bandwidth = 0;
420 + unsigned diffserv = 0;
421 + unsigned flowmode = 0;
422 + unsigned interval = 0;
423 + unsigned memlimit = 0;
434 + parse_rtattr_nested(tb, TCA_CAKE_MAX, opt);
436 + if (tb[TCA_CAKE_BASE_RATE] &&
437 + RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE]) >= sizeof(__u32)) {
438 + bandwidth = rta_getattr_u32(tb[TCA_CAKE_BASE_RATE]);
440 + fprintf(f, "bandwidth %s ", sprint_rate(bandwidth, b1));
442 + fprintf(f, "unlimited ");
444 + if (tb[TCA_CAKE_AUTORATE] &&
445 + RTA_PAYLOAD(tb[TCA_CAKE_AUTORATE]) >= sizeof(__u32)) {
446 + autorate = rta_getattr_u32(tb[TCA_CAKE_AUTORATE]);
448 + fprintf(f, "autorate_ingress ");
450 + fprintf(f, "(?autorate?) ");
452 + if (tb[TCA_CAKE_DIFFSERV_MODE] &&
453 + RTA_PAYLOAD(tb[TCA_CAKE_DIFFSERV_MODE]) >= sizeof(__u32)) {
454 + diffserv = rta_getattr_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
457 + fprintf(f, "besteffort ");
460 + fprintf(f, "precedence ");
463 + fprintf(f, "diffserv8 ");
466 + fprintf(f, "diffserv4 ");
469 + fprintf(f, "diffserv-llt ");
472 + fprintf(f, "(?diffserv?) ");
476 + if (tb[TCA_CAKE_FLOW_MODE] &&
477 + RTA_PAYLOAD(tb[TCA_CAKE_FLOW_MODE]) >= sizeof(__u32)) {
478 + flowmode = rta_getattr_u32(tb[TCA_CAKE_FLOW_MODE]);
479 + nat = !!(flowmode & 64);
483 + fprintf(f, "flowblind ");
486 + fprintf(f, "srchost ");
489 + fprintf(f, "dsthost ");
492 + fprintf(f, "hosts ");
495 + fprintf(f, "flows ");
498 + fprintf(f, "dual-srchost ");
501 + fprintf(f, "dual-dsthost ");
504 + fprintf(f, "triple-isolate ");
507 + fprintf(f, "(?flowmode?) ");
512 + fprintf(f, "nat ");
514 + if (tb[TCA_CAKE_ATM] &&
515 + RTA_PAYLOAD(tb[TCA_CAKE_ATM]) >= sizeof(__u32)) {
516 + atm = rta_getattr_u32(tb[TCA_CAKE_ATM]);
518 + if (tb[TCA_CAKE_OVERHEAD] &&
519 + RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) {
520 + overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]);
522 + if (tb[TCA_CAKE_RTT] &&
523 + RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) {
524 + interval = rta_getattr_u32(tb[TCA_CAKE_RTT]);
528 + fprintf(f, "rtt %s ", sprint_time(interval, b2));
531 + fprintf(f, "atm ");
533 + fprintf(f, "ptm ");
535 + fprintf(f, "noatm ");
537 + if (overhead || atm)
538 + fprintf(f, "overhead %d ", overhead);
540 + if (!atm && !overhead)
541 + fprintf(f, "raw ");
544 + fprintf(f, "memlimit %s", sprint_size(memlimit, b1));
549 +static int cake_print_xstats(struct qdisc_util *qu, FILE *f,
550 + struct rtattr *xstats)
552 + /* fq_codel stats format borrowed */
553 + struct tc_fq_codel_xstats *st;
554 + struct tc_cake_xstats *stnc;
558 + if (xstats == NULL)
561 + if (RTA_PAYLOAD(xstats) < sizeof(st->type))
564 + st = RTA_DATA(xstats);
565 + stnc = RTA_DATA(xstats);
567 + if (st->type == TCA_FQ_CODEL_XSTATS_QDISC && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
568 + fprintf(f, " maxpacket %u drop_overlimit %u new_flow_count %u ecn_mark %u",
569 + st->qdisc_stats.maxpacket,
570 + st->qdisc_stats.drop_overlimit,
571 + st->qdisc_stats.new_flow_count,
572 + st->qdisc_stats.ecn_mark);
573 + fprintf(f, "\n new_flows_len %u old_flows_len %u",
574 + st->qdisc_stats.new_flows_len,
575 + st->qdisc_stats.old_flows_len);
576 + } else if (st->type == TCA_FQ_CODEL_XSTATS_CLASS && RTA_PAYLOAD(xstats) >= sizeof(*st)) {
577 + fprintf(f, " deficit %d count %u lastcount %u ldelay %s",
578 + st->class_stats.deficit,
579 + st->class_stats.count,
580 + st->class_stats.lastcount,
581 + sprint_time(st->class_stats.ldelay, b1));
582 + if (st->class_stats.dropping) {
583 + fprintf(f, " dropping");
584 + if (st->class_stats.drop_next < 0)
585 + fprintf(f, " drop_next -%s",
586 + sprint_time(-st->class_stats.drop_next, b1));
588 + fprintf(f, " drop_next %s",
589 + sprint_time(st->class_stats.drop_next, b1));
591 + } else if (stnc->version >= 1 && stnc->version < 0xFF
592 + && stnc->max_tins == TC_CAKE_MAX_TINS
593 + && RTA_PAYLOAD(xstats) >= offsetof(struct tc_cake_xstats, capacity_estimate))
597 + if(stnc->version >= 3)
598 + fprintf(f, " memory used: %s of %s\n", sprint_size(stnc->memory_used, b1), sprint_size(stnc->memory_limit, b2));
600 + if(stnc->version >= 2)
601 + fprintf(f, " capacity estimate: %s\n", sprint_rate(stnc->capacity_estimate, b1));
603 + switch(stnc->tin_cnt) {
605 + fprintf(f, " Bulk Best Effort Video Voice\n");
609 + fprintf(f, " Low Loss Best Effort Low Delay Bulk Net Control\n");
614 + for(i=0; i < stnc->tin_cnt; i++)
615 + fprintf(f, " Tin %u", i);
619 + fprintf(f, " thresh ");
620 + for(i=0; i < stnc->tin_cnt; i++)
621 + fprintf(f, "%12s", sprint_rate(stnc->threshold_rate[i], b1));
624 + fprintf(f, " target ");
625 + for(i=0; i < stnc->tin_cnt; i++)
626 + fprintf(f, "%12s", sprint_time(stnc->target_us[i], b1));
629 + fprintf(f, " interval");
630 + for(i=0; i < stnc->tin_cnt; i++)
631 + fprintf(f, "%12s", sprint_time(stnc->interval_us[i], b1));
634 + fprintf(f, " pk_delay");
635 + for(i=0; i < stnc->tin_cnt; i++)
636 + fprintf(f, "%12s", sprint_time(stnc->peak_delay_us[i], b1));
639 + fprintf(f, " av_delay");
640 + for(i=0; i < stnc->tin_cnt; i++)
641 + fprintf(f, "%12s", sprint_time(stnc->avge_delay_us[i], b1));
644 + fprintf(f, " sp_delay");
645 + for(i=0; i < stnc->tin_cnt; i++)
646 + fprintf(f, "%12s", sprint_time(stnc->base_delay_us[i], b1));
649 + fprintf(f, " pkts ");
650 + for(i=0; i < stnc->tin_cnt; i++)
651 + fprintf(f, "%12u", stnc->sent[i].packets);
654 + fprintf(f, " bytes ");
655 + for(i=0; i < stnc->tin_cnt; i++)
656 + fprintf(f, "%12llu", stnc->sent[i].bytes);
659 + fprintf(f, " way_inds");
660 + for(i=0; i < stnc->tin_cnt; i++)
661 + fprintf(f, "%12u", stnc->way_indirect_hits[i]);
664 + fprintf(f, " way_miss");
665 + for(i=0; i < stnc->tin_cnt; i++)
666 + fprintf(f, "%12u", stnc->way_misses[i]);
669 + fprintf(f, " way_cols");
670 + for(i=0; i < stnc->tin_cnt; i++)
671 + fprintf(f, "%12u", stnc->way_collisions[i]);
674 + fprintf(f, " drops ");
675 + for(i=0; i < stnc->tin_cnt; i++)
676 + fprintf(f, "%12u", stnc->dropped[i].packets);
679 + fprintf(f, " marks ");
680 + for(i=0; i < stnc->tin_cnt; i++)
681 + fprintf(f, "%12u", stnc->ecn_marked[i].packets);
684 + fprintf(f, " sp_flows");
685 + for(i=0; i < stnc->tin_cnt; i++)
686 + fprintf(f, "%12u", stnc->sparse_flows[i]);
689 + fprintf(f, " bk_flows");
690 + for(i=0; i < stnc->tin_cnt; i++)
691 + fprintf(f, "%12u", stnc->bulk_flows[i]);
694 + if(stnc->version >= 4) {
695 + fprintf(f, " un_flows");
696 + for(i=0; i < stnc->tin_cnt; i++)
697 + fprintf(f, "%12u", stnc->unresponse_flows[i]);
701 + fprintf(f, " max_len ");
702 + for(i=0; i < stnc->tin_cnt; i++)
703 + fprintf(f, "%12u", stnc->max_skblen[i]);
711 +struct qdisc_util cake_qdisc_util = {
713 + .parse_qopt = cake_parse_opt,
714 + .print_qopt = cake_print_opt,
715 + .print_xstats = cake_print_xstats,