f61168c696f67f72092be8c820299e5a7ec64289
[openwrt/openwrt.git] / package / network / utils / iproute2 / patches / 190-add-cake-to-tc.patch
1 --- a/include/uapi/linux/pkt_sched.h
2 +++ b/include/uapi/linux/pkt_sched.h
3 @@ -934,4 +934,118 @@ enum {
4
5 #define TCA_CBS_MAX (__TCA_CBS_MAX - 1)
6
7 +/* CAKE */
8 +enum {
9 + TCA_CAKE_UNSPEC,
10 + TCA_CAKE_PAD,
11 + TCA_CAKE_BASE_RATE64,
12 + TCA_CAKE_DIFFSERV_MODE,
13 + TCA_CAKE_ATM,
14 + TCA_CAKE_FLOW_MODE,
15 + TCA_CAKE_OVERHEAD,
16 + TCA_CAKE_RTT,
17 + TCA_CAKE_TARGET,
18 + TCA_CAKE_AUTORATE,
19 + TCA_CAKE_MEMORY,
20 + TCA_CAKE_NAT,
21 + TCA_CAKE_RAW, // was _ETHERNET
22 + TCA_CAKE_WASH,
23 + TCA_CAKE_MPU,
24 + TCA_CAKE_INGRESS,
25 + TCA_CAKE_ACK_FILTER,
26 + TCA_CAKE_SPLIT_GSO,
27 + __TCA_CAKE_MAX
28 +};
29 +#define TCA_CAKE_MAX (__TCA_CAKE_MAX - 1)
30 +
31 +enum {
32 + __TCA_CAKE_STATS_INVALID,
33 + TCA_CAKE_STATS_PAD,
34 + TCA_CAKE_STATS_CAPACITY_ESTIMATE64,
35 + TCA_CAKE_STATS_MEMORY_LIMIT,
36 + TCA_CAKE_STATS_MEMORY_USED,
37 + TCA_CAKE_STATS_AVG_NETOFF,
38 + TCA_CAKE_STATS_MIN_NETLEN,
39 + TCA_CAKE_STATS_MAX_NETLEN,
40 + TCA_CAKE_STATS_MIN_ADJLEN,
41 + TCA_CAKE_STATS_MAX_ADJLEN,
42 + TCA_CAKE_STATS_TIN_STATS,
43 + TCA_CAKE_STATS_DEFICIT,
44 + TCA_CAKE_STATS_COBALT_COUNT,
45 + TCA_CAKE_STATS_DROPPING,
46 + TCA_CAKE_STATS_DROP_NEXT_US,
47 + TCA_CAKE_STATS_P_DROP,
48 + TCA_CAKE_STATS_BLUE_TIMER_US,
49 + __TCA_CAKE_STATS_MAX
50 +};
51 +#define TCA_CAKE_STATS_MAX (__TCA_CAKE_STATS_MAX - 1)
52 +
53 +enum {
54 + __TCA_CAKE_TIN_STATS_INVALID,
55 + TCA_CAKE_TIN_STATS_PAD,
56 + TCA_CAKE_TIN_STATS_SENT_PACKETS,
57 + TCA_CAKE_TIN_STATS_SENT_BYTES64,
58 + TCA_CAKE_TIN_STATS_DROPPED_PACKETS,
59 + TCA_CAKE_TIN_STATS_DROPPED_BYTES64,
60 + TCA_CAKE_TIN_STATS_ACKS_DROPPED_PACKETS,
61 + TCA_CAKE_TIN_STATS_ACKS_DROPPED_BYTES64,
62 + TCA_CAKE_TIN_STATS_ECN_MARKED_PACKETS,
63 + TCA_CAKE_TIN_STATS_ECN_MARKED_BYTES64,
64 + TCA_CAKE_TIN_STATS_BACKLOG_PACKETS,
65 + TCA_CAKE_TIN_STATS_BACKLOG_BYTES,
66 + TCA_CAKE_TIN_STATS_THRESHOLD_RATE64,
67 + TCA_CAKE_TIN_STATS_TARGET_US,
68 + TCA_CAKE_TIN_STATS_INTERVAL_US,
69 + TCA_CAKE_TIN_STATS_WAY_INDIRECT_HITS,
70 + TCA_CAKE_TIN_STATS_WAY_MISSES,
71 + TCA_CAKE_TIN_STATS_WAY_COLLISIONS,
72 + TCA_CAKE_TIN_STATS_PEAK_DELAY_US,
73 + TCA_CAKE_TIN_STATS_AVG_DELAY_US,
74 + TCA_CAKE_TIN_STATS_BASE_DELAY_US,
75 + TCA_CAKE_TIN_STATS_SPARSE_FLOWS,
76 + TCA_CAKE_TIN_STATS_BULK_FLOWS,
77 + TCA_CAKE_TIN_STATS_UNRESPONSIVE_FLOWS,
78 + TCA_CAKE_TIN_STATS_MAX_SKBLEN,
79 + TCA_CAKE_TIN_STATS_FLOW_QUANTUM,
80 + __TCA_CAKE_TIN_STATS_MAX
81 +};
82 +#define TCA_CAKE_TIN_STATS_MAX (__TCA_CAKE_TIN_STATS_MAX - 1)
83 +#define TC_CAKE_MAX_TINS (8)
84 +
85 +enum {
86 + CAKE_FLOW_NONE = 0,
87 + CAKE_FLOW_SRC_IP,
88 + CAKE_FLOW_DST_IP,
89 + CAKE_FLOW_HOSTS, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_DST_IP */
90 + CAKE_FLOW_FLOWS,
91 + CAKE_FLOW_DUAL_SRC, /* = CAKE_FLOW_SRC_IP | CAKE_FLOW_FLOWS */
92 + CAKE_FLOW_DUAL_DST, /* = CAKE_FLOW_DST_IP | CAKE_FLOW_FLOWS */
93 + CAKE_FLOW_TRIPLE, /* = CAKE_FLOW_HOSTS | CAKE_FLOW_FLOWS */
94 + CAKE_FLOW_MAX,
95 +};
96 +
97 +enum {
98 + CAKE_DIFFSERV_DIFFSERV3 = 0,
99 + CAKE_DIFFSERV_DIFFSERV4,
100 + CAKE_DIFFSERV_DIFFSERV8,
101 + CAKE_DIFFSERV_BESTEFFORT,
102 + CAKE_DIFFSERV_PRECEDENCE,
103 + CAKE_DIFFSERV_MAX
104 +};
105 +
106 +enum {
107 + CAKE_ACK_NONE = 0,
108 + CAKE_ACK_FILTER,
109 + CAKE_ACK_AGGRESSIVE,
110 + CAKE_ACK_MAX
111 +};
112 +
113 +enum {
114 + CAKE_ATM_NONE = 0,
115 + CAKE_ATM_ATM,
116 + CAKE_ATM_PTM,
117 + CAKE_ATM_MAX
118 +};
119 +
120 +
121 #endif
122 --- /dev/null
123 +++ b/man/man8/tc-cake.8
124 @@ -0,0 +1,710 @@
125 +.TH CAKE 8 "19 July 2018" "iproute2" "Linux"
126 +.SH NAME
127 +CAKE \- Common Applications Kept Enhanced (CAKE)
128 +.SH SYNOPSIS
129 +.B tc qdisc ... cake
130 +.br
131 +[
132 +.BR bandwidth
133 +RATE |
134 +.BR unlimited*
135 +|
136 +.BR autorate-ingress
137 +]
138 +.br
139 +[
140 +.BR rtt
141 +TIME |
142 +.BR datacentre
143 +|
144 +.BR lan
145 +|
146 +.BR metro
147 +|
148 +.BR regional
149 +|
150 +.BR internet*
151 +|
152 +.BR oceanic
153 +|
154 +.BR satellite
155 +|
156 +.BR interplanetary
157 +]
158 +.br
159 +[
160 +.BR besteffort
161 +|
162 +.BR diffserv8
163 +|
164 +.BR diffserv4
165 +|
166 +.BR diffserv3*
167 +]
168 +.br
169 +[
170 +.BR flowblind
171 +|
172 +.BR srchost
173 +|
174 +.BR dsthost
175 +|
176 +.BR hosts
177 +|
178 +.BR flows
179 +|
180 +.BR dual-srchost
181 +|
182 +.BR dual-dsthost
183 +|
184 +.BR triple-isolate*
185 +]
186 +.br
187 +[
188 +.BR nat
189 +|
190 +.BR nonat*
191 +]
192 +.br
193 +[
194 +.BR wash
195 +|
196 +.BR nowash*
197 +]
198 +.br
199 +[
200 +.BR split-gso*
201 +|
202 +.BR no-split-gso
203 +]
204 +.br
205 +[
206 +.BR ack-filter
207 +|
208 +.BR ack-filter-aggressive
209 +|
210 +.BR no-ack-filter*
211 +]
212 +.br
213 +[
214 +.BR memlimit
215 +LIMIT ]
216 +.br
217 +[
218 +.BR ptm
219 +|
220 +.BR atm
221 +|
222 +.BR noatm*
223 +]
224 +.br
225 +[
226 +.BR overhead
227 +N |
228 +.BR conservative
229 +|
230 +.BR raw*
231 +]
232 +.br
233 +[
234 +.BR mpu
235 +N ]
236 +.br
237 +[
238 +.BR ingress
239 +|
240 +.BR egress*
241 +]
242 +.br
243 +(* marks defaults)
244 +
245 +
246 +.SH DESCRIPTION
247 +CAKE (Common Applications Kept Enhanced) is a shaping-capable queue discipline
248 +which uses both AQM and FQ. It combines COBALT, which is an AQM algorithm
249 +combining Codel and BLUE, a shaper which operates in deficit mode, and a variant
250 +of DRR++ for flow isolation. 8-way set-associative hashing is used to virtually
251 +eliminate hash collisions. Priority queuing is available through a simplified
252 +diffserv implementation. Overhead compensation for various encapsulation
253 +schemes is tightly integrated.
254 +
255 +All settings are optional; the default settings are chosen to be sensible in
256 +most common deployments. Most people will only need to set the
257 +.B bandwidth
258 +parameter to get useful results, but reading the
259 +.B Overhead Compensation
260 +and
261 +.B Round Trip Time
262 +sections is strongly encouraged.
263 +
264 +.SH SHAPER PARAMETERS
265 +CAKE uses a deficit-mode shaper, which does not exhibit the initial burst
266 +typical of token-bucket shapers. It will automatically burst precisely as much
267 +as required to maintain the configured throughput. As such, it is very
268 +straightforward to configure.
269 +.PP
270 +.B unlimited
271 +(default)
272 +.br
273 + No limit on the bandwidth.
274 +.PP
275 +.B bandwidth
276 +RATE
277 +.br
278 + Set the shaper bandwidth. See
279 +.BR tc(8)
280 +or examples below for details of the RATE value.
281 +.PP
282 +.B autorate-ingress
283 +.br
284 + Automatic capacity estimation based on traffic arriving at this qdisc.
285 +This is most likely to be useful with cellular links, which tend to change
286 +quality randomly. A
287 +.B bandwidth
288 +parameter can be used in conjunction to specify an initial estimate. The shaper
289 +will periodically be set to a bandwidth slightly below the estimated rate. This
290 +estimator cannot estimate the bandwidth of links downstream of itself.
291 +
292 +.SH OVERHEAD COMPENSATION PARAMETERS
293 +The size of each packet on the wire may differ from that seen by Linux. The
294 +following parameters allow CAKE to compensate for this difference by internally
295 +considering each packet to be bigger than Linux informs it. To assist users who
296 +are not expert network engineers, keywords have been provided to represent a
297 +number of common link technologies.
298 +
299 +.SS Manual Overhead Specification
300 +.B overhead
301 +BYTES
302 +.br
303 + Adds BYTES to the size of each packet. BYTES may be negative; values
304 +between -64 and 256 (inclusive) are accepted.
305 +.PP
306 +.B mpu
307 +BYTES
308 +.br
309 + Rounds each packet (including overhead) up to a minimum length
310 +BYTES. BYTES may not be negative; values between 0 and 256 (inclusive)
311 +are accepted.
312 +.PP
313 +.B atm
314 +.br
315 + Compensates for ATM cell framing, which is normally found on ADSL links.
316 +This is performed after the
317 +.B overhead
318 +parameter above. ATM uses fixed 53-byte cells, each of which can carry 48 bytes
319 +payload.
320 +.PP
321 +.B ptm
322 +.br
323 + Compensates for PTM encoding, which is normally found on VDSL2 links and
324 +uses a 64b/65b encoding scheme. It is even more efficient to simply
325 +derate the specified shaper bandwidth by a factor of 64/65 or 0.984. See
326 +ITU G.992.3 Annex N and IEEE 802.3 Section 61.3 for details.
327 +.PP
328 +.B noatm
329 +.br
330 + Disables ATM and PTM compensation.
331 +
332 +.SS Failsafe Overhead Keywords
333 +These two keywords are provided for quick-and-dirty setup. Use them if you
334 +can't be bothered to read the rest of this section.
335 +.PP
336 +.B raw
337 +(default)
338 +.br
339 + Turns off all overhead compensation in CAKE. The packet size reported
340 +by Linux will be used directly.
341 +.PP
342 + Other overhead keywords may be added after "raw". The effect of this is
343 +to make the overhead compensation operate relative to the reported packet size,
344 +not the underlying IP packet size.
345 +.PP
346 +.B conservative
347 +.br
348 + Compensates for more overhead than is likely to occur on any
349 +widely-deployed link technology.
350 +.br
351 + Equivalent to
352 +.B overhead 48 atm.
353 +
354 +.SS ADSL Overhead Keywords
355 +Most ADSL modems have a way to check which framing scheme is in use. Often this
356 +is also specified in the settings document provided by the ISP. The keywords in
357 +this section are intended to correspond with these sources of information. All
358 +of them implicitly set the
359 +.B atm
360 +flag.
361 +.PP
362 +.B pppoa-vcmux
363 +.br
364 + Equivalent to
365 +.B overhead 10 atm
366 +.PP
367 +.B pppoa-llc
368 +.br
369 + Equivalent to
370 +.B overhead 14 atm
371 +.PP
372 +.B pppoe-vcmux
373 +.br
374 + Equivalent to
375 +.B overhead 32 atm
376 +.PP
377 +.B pppoe-llcsnap
378 +.br
379 + Equivalent to
380 +.B overhead 40 atm
381 +.PP
382 +.B bridged-vcmux
383 +.br
384 + Equivalent to
385 +.B overhead 24 atm
386 +.PP
387 +.B bridged-llcsnap
388 +.br
389 + Equivalent to
390 +.B overhead 32 atm
391 +.PP
392 +.B ipoa-vcmux
393 +.br
394 + Equivalent to
395 +.B overhead 8 atm
396 +.PP
397 +.B ipoa-llcsnap
398 +.br
399 + Equivalent to
400 +.B overhead 16 atm
401 +.PP
402 +See also the Ethernet Correction Factors section below.
403 +
404 +.SS VDSL2 Overhead Keywords
405 +ATM was dropped from VDSL2 in favour of PTM, which is a much more
406 +straightforward framing scheme. Some ISPs retained PPPoE for compatibility with
407 +their existing back-end systems.
408 +.PP
409 +.B pppoe-ptm
410 +.br
411 + Equivalent to
412 +.B overhead 30 ptm
413 +
414 +.br
415 + PPPoE: 2B PPP + 6B PPPoE +
416 +.br
417 + ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence +
418 +.br
419 + PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS)
420 +.br
421 +.PP
422 +.B bridged-ptm
423 +.br
424 + Equivalent to
425 +.B overhead 22 ptm
426 +.br
427 + ETHERNET: 6B dest MAC + 6B src MAC + 2B ethertype + 4B Frame Check Sequence +
428 +.br
429 + PTM: 1B Start of Frame (S) + 1B End of Frame (Ck) + 2B TC-CRC (PTM-FCS)
430 +.br
431 +.PP
432 +See also the Ethernet Correction Factors section below.
433 +
434 +.SS DOCSIS Cable Overhead Keyword
435 +DOCSIS is the universal standard for providing Internet service over cable-TV
436 +infrastructure.
437 +
438 +In this case, the actual on-wire overhead is less important than the packet size
439 +the head-end equipment uses for shaping and metering. This is specified to be
440 +an Ethernet frame including the CRC (aka FCS).
441 +.PP
442 +.B docsis
443 +.br
444 + Equivalent to
445 +.B overhead 18 mpu 64 noatm
446 +
447 +.SS Ethernet Overhead Keywords
448 +.PP
449 +.B ethernet
450 +.br
451 + Accounts for Ethernet's preamble, inter-frame gap, and Frame Check
452 +Sequence. Use this keyword when the bottleneck being shaped for is an
453 +actual Ethernet cable.
454 +.br
455 + Equivalent to
456 +.B overhead 38 mpu 84 noatm
457 +.PP
458 +.B ether-vlan
459 +.br
460 + Adds 4 bytes to the overhead compensation, accounting for an IEEE 802.1Q
461 +VLAN header appended to the Ethernet frame header. NB: Some ISPs use one or
462 +even two of these within PPPoE; this keyword may be repeated as necessary to
463 +express this.
464 +
465 +.SH ROUND TRIP TIME PARAMETERS
466 +Active Queue Management (AQM) consists of embedding congestion signals in the
467 +packet flow, which receivers use to instruct senders to slow down when the queue
468 +is persistently occupied. CAKE uses ECN signalling when available, and packet
469 +drops otherwise, according to a combination of the Codel and BLUE AQM algorithms
470 +called COBALT.
471 +
472 +Very short latencies require a very rapid AQM response to adequately control
473 +latency. However, such a rapid response tends to impair throughput when the
474 +actual RTT is relatively long. CAKE allows specifying the RTT it assumes for
475 +tuning various parameters. Actual RTTs within an order of magnitude of this
476 +will generally work well for both throughput and latency management.
477 +
478 +At the 'lan' setting and below, the time constants are similar in magnitude to
479 +the jitter in the Linux kernel itself, so congestion might be signalled
480 +prematurely. The flows will then become sparse and total throughput reduced,
481 +leaving little or no back-pressure for the fairness logic to work against. Use
482 +the "metro" setting for local lans unless you have a custom kernel.
483 +.PP
484 +.B rtt
485 +TIME
486 +.br
487 + Manually specify an RTT.
488 +.PP
489 +.B datacentre
490 +.br
491 + For extremely high-performance 10GigE+ networks only. Equivalent to
492 +.B rtt 100us.
493 +.PP
494 +.B lan
495 +.br
496 + For pure Ethernet (not Wi-Fi) networks, at home or in the office. Don't
497 +use this when shaping for an Internet access link. Equivalent to
498 +.B rtt 1ms.
499 +.PP
500 +.B metro
501 +.br
502 + For traffic mostly within a single city. Equivalent to
503 +.B rtt 10ms.
504 +.PP
505 +.B regional
506 +.br
507 + For traffic mostly within a European-sized country. Equivalent to
508 +.B rtt 30ms.
509 +.PP
510 +.B internet
511 +(default)
512 +.br
513 + This is suitable for most Internet traffic. Equivalent to
514 +.B rtt 100ms.
515 +.PP
516 +.B oceanic
517 +.br
518 + For Internet traffic with generally above-average latency, such as that
519 +suffered by Australasian residents. Equivalent to
520 +.B rtt 300ms.
521 +.PP
522 +.B satellite
523 +.br
524 + For traffic via geostationary satellites. Equivalent to
525 +.B rtt 1000ms.
526 +.PP
527 +.B interplanetary
528 +.br
529 + So named because Jupiter is about 1 light-hour from Earth. Use this to
530 +(almost) completely disable AQM actions. Equivalent to
531 +.B rtt 3600s.
532 +
533 +.SH FLOW ISOLATION PARAMETERS
534 +With flow isolation enabled, CAKE places packets from different flows into
535 +different queues, each of which carries its own AQM state. Packets from each
536 +queue are then delivered fairly, according to a DRR++ algorithm which minimises
537 +latency for "sparse" flows. CAKE uses a set-associative hashing algorithm to
538 +minimise flow collisions.
539 +
540 +These keywords specify whether fairness based on source address, destination
541 +address, individual flows, or any combination of those is desired.
542 +.PP
543 +.B flowblind
544 +.br
545 + Disables flow isolation; all traffic passes through a single queue for
546 +each tin.
547 +.PP
548 +.B srchost
549 +.br
550 + Flows are defined only by source address. Could be useful on the egress
551 +path of an ISP backhaul.
552 +.PP
553 +.B dsthost
554 +.br
555 + Flows are defined only by destination address. Could be useful on the
556 +ingress path of an ISP backhaul.
557 +.PP
558 +.B hosts
559 +.br
560 + Flows are defined by source-destination host pairs. This is host
561 +isolation, rather than flow isolation.
562 +.PP
563 +.B flows
564 +.br
565 + Flows are defined by the entire 5-tuple of source address, destination
566 +address, transport protocol, source port and destination port. This is the type
567 +of flow isolation performed by SFQ and fq_codel.
568 +.PP
569 +.B dual-srchost
570 +.br
571 + Flows are defined by the 5-tuple, and fairness is applied first over
572 +source addresses, then over individual flows. Good for use on egress traffic
573 +from a LAN to the internet, where it'll prevent any one LAN host from
574 +monopolising the uplink, regardless of the number of flows they use.
575 +.PP
576 +.B dual-dsthost
577 +.br
578 + Flows are defined by the 5-tuple, and fairness is applied first over
579 +destination addresses, then over individual flows. Good for use on ingress
580 +traffic to a LAN from the internet, where it'll prevent any one LAN host from
581 +monopolising the downlink, regardless of the number of flows they use.
582 +.PP
583 +.B triple-isolate
584 +(default)
585 +.br
586 + Flows are defined by the 5-tuple, and fairness is applied over source
587 +*and* destination addresses intelligently (ie. not merely by host-pairs), and
588 +also over individual flows. Use this if you're not certain whether to use
589 +dual-srchost or dual-dsthost; it'll do both jobs at once, preventing any one
590 +host on *either* side of the link from monopolising it with a large number of
591 +flows.
592 +.PP
593 +.B nat
594 +.br
595 + Instructs Cake to perform a NAT lookup before applying flow-isolation
596 +rules, to determine the true addresses and port numbers of the packet, to
597 +improve fairness between hosts "inside" the NAT. This has no practical effect
598 +in "flowblind" or "flows" modes, or if NAT is performed on a different host.
599 +.PP
600 +.B nonat
601 +(default)
602 +.br
603 + Cake will not perform a NAT lookup. Flow isolation will be performed
604 +using the addresses and port numbers directly visible to the interface Cake is
605 +attached to.
606 +
607 +.SH PRIORITY QUEUE PARAMETERS
608 +CAKE can divide traffic into "tins" based on the Diffserv field. Each tin has
609 +its own independent set of flow-isolation queues, and is serviced based on a WRR
610 +algorithm. To avoid perverse Diffserv marking incentives, tin weights have a
611 +"priority sharing" value when bandwidth used by that tin is below a threshold,
612 +and a lower "bandwidth sharing" value when above. Bandwidth is compared against
613 +the threshold using the same algorithm as the deficit-mode shaper.
614 +
615 +Detailed customisation of tin parameters is not provided. The following presets
616 +perform all necessary tuning, relative to the current shaper bandwidth and RTT
617 +settings.
618 +.PP
619 +.B besteffort
620 +.br
621 + Disables priority queuing by placing all traffic in one tin.
622 +.PP
623 +.B precedence
624 +.br
625 + Enables legacy interpretation of TOS "Precedence" field. Use of this
626 +preset on the modern Internet is firmly discouraged.
627 +.PP
628 +.B diffserv4
629 +.br
630 + Provides a general-purpose Diffserv implementation with four tins:
631 +.br
632 + Bulk (CS1), 6.25% threshold, generally low priority.
633 +.br
634 + Best Effort (general), 100% threshold.
635 +.br
636 + Video (AF4x, AF3x, CS3, AF2x, CS2, TOS4, TOS1), 50% threshold.
637 +.br
638 + Voice (CS7, CS6, EF, VA, CS5, CS4), 25% threshold.
639 +.PP
640 +.B diffserv3
641 +(default)
642 +.br
643 + Provides a simple, general-purpose Diffserv implementation with three tins:
644 +.br
645 + Bulk (CS1), 6.25% threshold, generally low priority.
646 +.br
647 + Best Effort (general), 100% threshold.
648 +.br
649 + Voice (CS7, CS6, EF, VA, TOS4), 25% threshold, reduced Codel interval.
650 +
651 +.SH OTHER PARAMETERS
652 +.B memlimit
653 +LIMIT
654 +.br
655 + Limit the memory consumed by Cake to LIMIT bytes. Note that this does
656 +not translate directly to queue size (so do not size this based on bandwidth
657 +delay product considerations, but rather on worst case acceptable memory
658 +consumption), as there is some overhead in the data structures containing the
659 +packets, especially for small packets.
660 +
661 + By default, the limit is calculated based on the bandwidth and RTT
662 +settings.
663 +
664 +.PP
665 +.B wash
666 +
667 +.br
668 + Traffic entering your diffserv domain is frequently mis-marked in
669 +transit from the perspective of your network, and traffic exiting yours may be
670 +mis-marked from the perspective of the transiting provider.
671 +
672 +Apply the wash option to clear all extra diffserv (but not ECN bits), after
673 +priority queuing has taken place.
674 +
675 +If you are shaping inbound, and cannot trust the diffserv markings (as is the
676 +case for Comcast Cable, among others), it is best to use a single queue
677 +"besteffort" mode with wash.
678 +
679 +.PP
680 +.B split-gso
681 +
682 +.br
683 + This option controls whether CAKE will split General Segmentation
684 +Offload (GSO) super-packets into their on-the-wire components and
685 +dequeue them individually.
686 +
687 +.br
688 +Super-packets are created by the networking stack to improve efficiency.
689 +However, because they are larger they take longer to dequeue, which
690 +translates to higher latency for competing flows, especially at lower
691 +bandwidths. CAKE defaults to splitting GSO packets to achieve the lowest
692 +possible latency. At link speeds higher than 10 Gbps, setting the
693 +no-split-gso parameter can increase the maximum achievable throughput by
694 +retaining the full GSO packets.
695 +
696 +.SH OVERRIDING CLASSIFICATION WITH TC FILTERS
697 +
698 +CAKE supports overriding of its internal classification of packets through the
699 +tc filter mechanism. Packets can be assigned to different priority tins by
700 +setting the
701 +.B priority
702 +field on the skb, and the flow hashing can be overridden by setting the
703 +.B classid
704 +parameter.
705 +
706 +.PP
707 +.B Tin override
708 +
709 +.br
710 + To assign a priority tin, the major number of the priority field needs
711 +to match the qdisc handle of the cake instance; if it does, the minor number
712 +will be interpreted as the tin index. For example, to classify all ICMP packets
713 +as 'bulk', the following filter can be used:
714 +
715 +.br
716 + # tc qdisc replace dev eth0 handle 1: root cake diffserv3
717 + # tc filter add dev eth0 parent 1: protocol ip prio 1 \\
718 + u32 match icmp type 0 0 action skbedit priority 1:1
719 +
720 +.PP
721 +.B Flow hash override
722 +
723 +.br
724 + To override flow hashing, the classid can be set. CAKE will interpret
725 +the major number of the classid as the host hash used in host isolation mode,
726 +and the minor number as the flow hash used for flow-based queueing. One or both
727 +of those can be set, and will be used if the relevant flow isolation parameter
728 +is set (i.e., the major number will be ignored if CAKE is not configured in
729 +hosts mode, and the minor number will be ignored if CAKE is not configured in
730 +flows mode).
731 +
732 +.br
733 +This example will assign all ICMP packets to the first queue:
734 +
735 +.br
736 + # tc qdisc replace dev eth0 handle 1: root cake
737 + # tc filter add dev eth0 parent 1: protocol ip prio 1 \\
738 + u32 match icmp type 0 0 classid 0:1
739 +
740 +.br
741 +If only one of the host and flow overrides is set, CAKE will compute the other
742 +hash from the packet as normal. Note, however, that the host isolation mode
743 +works by assigning a host ID to the flow queue; so if overriding both host and
744 +flow, the same flow cannot have more than one host assigned. In addition, it is
745 +not possible to assign different source and destination host IDs through the
746 +override mechanism; if a host ID is assigned, it will be used as both source and
747 +destination host.
748 +
749 +
750 +
751 +.SH EXAMPLES
752 +# tc qdisc delete root dev eth0
753 +.br
754 +# tc qdisc add root dev eth0 cake bandwidth 100Mbit ethernet
755 +.br
756 +# tc -s qdisc show dev eth0
757 +.br
758 +qdisc cake 1: root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84
759 + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
760 + backlog 0b 0p requeues 0
761 + memory used: 0b of 5000000b
762 + capacity estimate: 100Mbit
763 + min/max network layer size: 65535 / 0
764 + min/max overhead-adjusted size: 65535 / 0
765 + average network hdr offset: 0
766 +
767 + Bulk Best Effort Voice
768 + thresh 6250Kbit 100Mbit 25Mbit
769 + target 5.0ms 5.0ms 5.0ms
770 + interval 100.0ms 100.0ms 100.0ms
771 + pk_delay 0us 0us 0us
772 + av_delay 0us 0us 0us
773 + sp_delay 0us 0us 0us
774 + pkts 0 0 0
775 + bytes 0 0 0
776 + way_inds 0 0 0
777 + way_miss 0 0 0
778 + way_cols 0 0 0
779 + drops 0 0 0
780 + marks 0 0 0
781 + ack_drop 0 0 0
782 + sp_flows 0 0 0
783 + bk_flows 0 0 0
784 + un_flows 0 0 0
785 + max_len 0 0 0
786 + quantum 300 1514 762
787 +
788 +After some use:
789 +.br
790 +# tc -s qdisc show dev eth0
791 +
792 +qdisc cake 1: root refcnt 2 bandwidth 100Mbit diffserv3 triple-isolate rtt 100.0ms noatm overhead 38 mpu 84
793 + Sent 44709231 bytes 31931 pkt (dropped 45, overlimits 93782 requeues 0)
794 + backlog 33308b 22p requeues 0
795 + memory used: 292352b of 5000000b
796 + capacity estimate: 100Mbit
797 + min/max network layer size: 28 / 1500
798 + min/max overhead-adjusted size: 84 / 1538
799 + average network hdr offset: 14
800 +
801 + Bulk Best Effort Voice
802 + thresh 6250Kbit 100Mbit 25Mbit
803 + target 5.0ms 5.0ms 5.0ms
804 + interval 100.0ms 100.0ms 100.0ms
805 + pk_delay 8.7ms 6.9ms 5.0ms
806 + av_delay 4.9ms 5.3ms 3.8ms
807 + sp_delay 727us 1.4ms 511us
808 + pkts 2590 21271 8137
809 + bytes 3081804 30302659 11426206
810 + way_inds 0 46 0
811 + way_miss 3 17 4
812 + way_cols 0 0 0
813 + drops 20 15 10
814 + marks 0 0 0
815 + ack_drop 0 0 0
816 + sp_flows 2 4 1
817 + bk_flows 1 2 1
818 + un_flows 0 0 0
819 + max_len 1514 1514 1514
820 + quantum 300 1514 762
821 +
822 +.SH SEE ALSO
823 +.BR tc (8),
824 +.BR tc-codel (8),
825 +.BR tc-fq_codel (8),
826 +.BR tc-htb (8)
827 +
828 +.SH AUTHORS
829 +Cake's principal author is Jonathan Morton, with contributions from
830 +Tony Ambardar, Kevin Darbyshire-Bryant, Toke Høiland-Jørgensen,
831 +Sebastian Moeller, Ryan Mounce, Dean Scarff, Nils Andreas Svee, and Dave Täht.
832 +
833 +This manual page was written by Loganaden Velvindron. Please report corrections
834 +to the Linux Networking mailing list <netdev@vger.kernel.org>.
835 --- a/man/man8/tc.8
836 +++ b/man/man8/tc.8
837 @@ -795,6 +795,7 @@ was written by Alexey N. Kuznetsov and a
838 .BR tc-basic (8),
839 .BR tc-bfifo (8),
840 .BR tc-bpf (8),
841 +.BR tc-cake (8),
842 .BR tc-cbq (8),
843 .BR tc-cgroup (8),
844 .BR tc-choke (8),
845 --- a/tc/Makefile
846 +++ b/tc/Makefile
847 @@ -66,6 +66,7 @@ TCMODULES += q_codel.o
848 TCMODULES += q_fq_codel.o
849 TCMODULES += q_fq.o
850 TCMODULES += q_pie.o
851 +TCMODULES += q_cake.o
852 TCMODULES += q_hhf.o
853 TCMODULES += q_clsact.o
854 TCMODULES += e_bpf.o
855 --- /dev/null
856 +++ b/tc/q_cake.c
857 @@ -0,0 +1,801 @@
858 +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
859 +
860 +/*
861 + * Common Applications Kept Enhanced -- CAKE
862 + *
863 + * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
864 + * Copyright (C) 2017-2018 Toke Høiland-Jørgensen <toke@toke.dk>
865 + */
866 +
867 +#include <stddef.h>
868 +#include <stdio.h>
869 +#include <stdlib.h>
870 +#include <unistd.h>
871 +#include <syslog.h>
872 +#include <fcntl.h>
873 +#include <sys/socket.h>
874 +#include <netinet/in.h>
875 +#include <arpa/inet.h>
876 +#include <string.h>
877 +#include <inttypes.h>
878 +
879 +#include "utils.h"
880 +#include "tc_util.h"
881 +
882 +struct cake_preset {
883 + char *name;
884 + unsigned int target;
885 + unsigned int interval;
886 +};
887 +
888 +static struct cake_preset presets[] = {
889 + {"datacentre", 5, 100},
890 + {"lan", 50, 1000},
891 + {"metro", 500, 10000},
892 + {"regional", 1500, 30000},
893 + {"internet", 5000, 100000},
894 + {"oceanic", 15000, 300000},
895 + {"satellite", 50000, 1000000},
896 + {"interplanetary", 50000000, 1000000000},
897 +};
898 +
899 +static const char * diffserv_names[CAKE_DIFFSERV_MAX] = {
900 + [CAKE_DIFFSERV_DIFFSERV3] = "diffserv3",
901 + [CAKE_DIFFSERV_DIFFSERV4] = "diffserv4",
902 + [CAKE_DIFFSERV_DIFFSERV8] = "diffserv8",
903 + [CAKE_DIFFSERV_BESTEFFORT] = "besteffort",
904 + [CAKE_DIFFSERV_PRECEDENCE] = "precedence",
905 +};
906 +
907 +static const char * flowmode_names[CAKE_FLOW_MAX] = {
908 + [CAKE_FLOW_NONE] = "flowblind",
909 + [CAKE_FLOW_SRC_IP] = "srchost",
910 + [CAKE_FLOW_DST_IP] = "dsthost",
911 + [CAKE_FLOW_HOSTS] = "hosts",
912 + [CAKE_FLOW_FLOWS] = "flows",
913 + [CAKE_FLOW_DUAL_SRC] = "dual-srchost",
914 + [CAKE_FLOW_DUAL_DST] = "dual-dsthost",
915 + [CAKE_FLOW_TRIPLE] = "triple-isolate",
916 +};
917 +
918 +static struct cake_preset *find_preset(char *argv)
919 +{
920 + int i;
921 +
922 + for (i = 0; i < ARRAY_SIZE(presets); i++)
923 + if (!strcmp(argv, presets[i].name))
924 + return &presets[i];
925 + return NULL;
926 +}
927 +
928 +static void explain(void)
929 +{
930 + fprintf(stderr,
931 +"Usage: ... cake [ bandwidth RATE | unlimited* | autorate-ingress ]\n"
932 +" [ rtt TIME | datacentre | lan | metro | regional |\n"
933 +" internet* | oceanic | satellite | interplanetary ]\n"
934 +" [ besteffort | diffserv8 | diffserv4 | diffserv3* ]\n"
935 +" [ flowblind | srchost | dsthost | hosts | flows |\n"
936 +" dual-srchost | dual-dsthost | triple-isolate* ]\n"
937 +" [ nat | nonat* ]\n"
938 +" [ wash | nowash* ]\n"
939 +" [ split-gso* | no-split-gso ]\n"
940 +" [ ack-filter | ack-filter-aggressive | no-ack-filter* ]\n"
941 +" [ memlimit LIMIT ]\n"
942 +" [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n"
943 +" [ mpu N ] [ ingress | egress* ]\n"
944 +" (* marks defaults)\n");
945 +}
946 +
947 +static int cake_parse_opt(struct qdisc_util *qu, int argc, char **argv,
948 + struct nlmsghdr *n, const char *dev)
949 +{
950 + struct cake_preset *preset, *preset_set = NULL;
951 + bool overhead_override = false;
952 + bool overhead_set = false;
953 + unsigned int interval = 0;
954 + unsigned int diffserv = 0;
955 + unsigned int memlimit = 0;
956 + unsigned int target = 0;
957 + __u64 bandwidth = 0;
958 + int ack_filter = -1;
959 + struct rtattr *tail;
960 + int split_gso = -1;
961 + int unlimited = 0;
962 + int flowmode = -1;
963 + int autorate = -1;
964 + int ingress = -1;
965 + int overhead = 0;
966 + int wash = -1;
967 + int nat = -1;
968 + int atm = -1;
969 + int mpu = 0;
970 +
971 + while (argc > 0) {
972 + if (strcmp(*argv, "bandwidth") == 0) {
973 + NEXT_ARG();
974 + if (get_rate64(&bandwidth, *argv)) {
975 + fprintf(stderr, "Illegal \"bandwidth\"\n");
976 + return -1;
977 + }
978 + unlimited = 0;
979 + autorate = 0;
980 + } else if (strcmp(*argv, "unlimited") == 0) {
981 + bandwidth = 0;
982 + unlimited = 1;
983 + autorate = 0;
984 + } else if (strcmp(*argv, "autorate-ingress") == 0) {
985 + autorate = 1;
986 + } else if (strcmp(*argv, "rtt") == 0) {
987 + NEXT_ARG();
988 + if (get_time(&interval, *argv)) {
989 + fprintf(stderr, "Illegal \"rtt\"\n");
990 + return -1;
991 + }
992 + target = interval / 20;
993 + if (!target)
994 + target = 1;
995 + } else if ((preset = find_preset(*argv))) {
996 + if (preset_set)
997 + duparg(*argv, preset_set->name);
998 + preset_set = preset;
999 + target = preset->target;
1000 + interval = preset->interval;
1001 + } else if (strcmp(*argv, "besteffort") == 0) {
1002 + diffserv = CAKE_DIFFSERV_BESTEFFORT;
1003 + } else if (strcmp(*argv, "precedence") == 0) {
1004 + diffserv = CAKE_DIFFSERV_PRECEDENCE;
1005 + } else if (strcmp(*argv, "diffserv8") == 0) {
1006 + diffserv = CAKE_DIFFSERV_DIFFSERV8;
1007 + } else if (strcmp(*argv, "diffserv4") == 0) {
1008 + diffserv = CAKE_DIFFSERV_DIFFSERV4;
1009 + } else if (strcmp(*argv, "diffserv") == 0) {
1010 + diffserv = CAKE_DIFFSERV_DIFFSERV4;
1011 + } else if (strcmp(*argv, "diffserv3") == 0) {
1012 + diffserv = CAKE_DIFFSERV_DIFFSERV3;
1013 + } else if (strcmp(*argv, "nowash") == 0) {
1014 + wash = 0;
1015 + } else if (strcmp(*argv, "wash") == 0) {
1016 + wash = 1;
1017 + } else if (strcmp(*argv, "split-gso") == 0) {
1018 + split_gso = 1;
1019 + } else if (strcmp(*argv, "no-split-gso") == 0) {
1020 + split_gso = 0;
1021 + } else if (strcmp(*argv, "flowblind") == 0) {
1022 + flowmode = CAKE_FLOW_NONE;
1023 + } else if (strcmp(*argv, "srchost") == 0) {
1024 + flowmode = CAKE_FLOW_SRC_IP;
1025 + } else if (strcmp(*argv, "dsthost") == 0) {
1026 + flowmode = CAKE_FLOW_DST_IP;
1027 + } else if (strcmp(*argv, "hosts") == 0) {
1028 + flowmode = CAKE_FLOW_HOSTS;
1029 + } else if (strcmp(*argv, "flows") == 0) {
1030 + flowmode = CAKE_FLOW_FLOWS;
1031 + } else if (strcmp(*argv, "dual-srchost") == 0) {
1032 + flowmode = CAKE_FLOW_DUAL_SRC;
1033 + } else if (strcmp(*argv, "dual-dsthost") == 0) {
1034 + flowmode = CAKE_FLOW_DUAL_DST;
1035 + } else if (strcmp(*argv, "triple-isolate") == 0) {
1036 + flowmode = CAKE_FLOW_TRIPLE;
1037 + } else if (strcmp(*argv, "nat") == 0) {
1038 + nat = 1;
1039 + } else if (strcmp(*argv, "nonat") == 0) {
1040 + nat = 0;
1041 + } else if (strcmp(*argv, "ptm") == 0) {
1042 + atm = CAKE_ATM_PTM;
1043 + } else if (strcmp(*argv, "atm") == 0) {
1044 + atm = CAKE_ATM_ATM;
1045 + } else if (strcmp(*argv, "noatm") == 0) {
1046 + atm = CAKE_ATM_NONE;
1047 + } else if (strcmp(*argv, "raw") == 0) {
1048 + atm = CAKE_ATM_NONE;
1049 + overhead = 0;
1050 + overhead_set = true;
1051 + overhead_override = true;
1052 + } else if (strcmp(*argv, "conservative") == 0) {
1053 + /*
1054 + * Deliberately over-estimate overhead:
1055 + * one whole ATM cell plus ATM framing.
1056 + * A safe choice if the actual overhead is unknown.
1057 + */
1058 + atm = CAKE_ATM_ATM;
1059 + overhead = 48;
1060 + overhead_set = true;
1061 +
1062 + /* Various ADSL framing schemes, all over ATM cells */
1063 + } else if (strcmp(*argv, "ipoa-vcmux") == 0) {
1064 + atm = CAKE_ATM_ATM;
1065 + overhead += 8;
1066 + overhead_set = true;
1067 + } else if (strcmp(*argv, "ipoa-llcsnap") == 0) {
1068 + atm = CAKE_ATM_ATM;
1069 + overhead += 16;
1070 + overhead_set = true;
1071 + } else if (strcmp(*argv, "bridged-vcmux") == 0) {
1072 + atm = CAKE_ATM_ATM;
1073 + overhead += 24;
1074 + overhead_set = true;
1075 + } else if (strcmp(*argv, "bridged-llcsnap") == 0) {
1076 + atm = CAKE_ATM_ATM;
1077 + overhead += 32;
1078 + overhead_set = true;
1079 + } else if (strcmp(*argv, "pppoa-vcmux") == 0) {
1080 + atm = CAKE_ATM_ATM;
1081 + overhead += 10;
1082 + overhead_set = true;
1083 + } else if (strcmp(*argv, "pppoa-llc") == 0) {
1084 + atm = CAKE_ATM_ATM;
1085 + overhead += 14;
1086 + overhead_set = true;
1087 + } else if (strcmp(*argv, "pppoe-vcmux") == 0) {
1088 + atm = CAKE_ATM_ATM;
1089 + overhead += 32;
1090 + overhead_set = true;
1091 + } else if (strcmp(*argv, "pppoe-llcsnap") == 0) {
1092 + atm = CAKE_ATM_ATM;
1093 + overhead += 40;
1094 + overhead_set = true;
1095 +
1096 + /* Typical VDSL2 framing schemes, both over PTM */
1097 + /* PTM has 64b/65b coding which absorbs some bandwidth */
1098 + } else if (strcmp(*argv, "pppoe-ptm") == 0) {
1099 + /* 2B PPP + 6B PPPoE + 6B dest MAC + 6B src MAC
1100 + * + 2B ethertype + 4B Frame Check Sequence
1101 + * + 1B Start of Frame (S) + 1B End of Frame (Ck)
1102 + * + 2B TC-CRC (PTM-FCS) = 30B
1103 + */
1104 + atm = CAKE_ATM_PTM;
1105 + overhead += 30;
1106 + overhead_set = true;
1107 + } else if (strcmp(*argv, "bridged-ptm") == 0) {
1108 + /* 6B dest MAC + 6B src MAC + 2B ethertype
1109 + * + 4B Frame Check Sequence
1110 + * + 1B Start of Frame (S) + 1B End of Frame (Ck)
1111 + * + 2B TC-CRC (PTM-FCS) = 22B
1112 + */
1113 + atm = CAKE_ATM_PTM;
1114 + overhead += 22;
1115 + overhead_set = true;
1116 + } else if (strcmp(*argv, "via-ethernet") == 0) {
1117 + /*
1118 + * We used to use this flag to manually compensate for
1119 + * Linux including the Ethernet header on Ethernet-type
1120 + * interfaces, but not on IP-type interfaces.
1121 + *
1122 + * It is no longer needed, because Cake now adjusts for
1123 + * that automatically, and is thus ignored.
1124 + *
1125 + * It would be deleted entirely, but it appears in the
1126 + * stats output when the automatic compensation is
1127 + * active.
1128 + */
1129 + } else if (strcmp(*argv, "ethernet") == 0) {
1130 + /* ethernet pre-amble & interframe gap & FCS
1131 + * you may need to add vlan tag
1132 + */
1133 + overhead += 38;
1134 + overhead_set = true;
1135 + mpu = 84;
1136 +
1137 + /* Additional Ethernet-related overhead used by some ISPs */
1138 + } else if (strcmp(*argv, "ether-vlan") == 0) {
1139 + /* 802.1q VLAN tag - may be repeated */
1140 + overhead += 4;
1141 + overhead_set = true;
1142 +
1143 + /*
1144 + * DOCSIS cable shapers account for Ethernet frame with FCS,
1145 + * but not interframe gap or preamble.
1146 + */
1147 + } else if (strcmp(*argv, "docsis") == 0) {
1148 + atm = CAKE_ATM_NONE;
1149 + overhead += 18;
1150 + overhead_set = true;
1151 + mpu = 64;
1152 + } else if (strcmp(*argv, "overhead") == 0) {
1153 + char *p = NULL;
1154 +
1155 + NEXT_ARG();
1156 + overhead = strtol(*argv, &p, 10);
1157 + if (!p || *p || !*argv ||
1158 + overhead < -64 || overhead > 256) {
1159 + fprintf(stderr,
1160 + "Illegal \"overhead\", valid range is -64 to 256\\n");
1161 + return -1;
1162 + }
1163 + overhead_set = true;
1164 +
1165 + } else if (strcmp(*argv, "mpu") == 0) {
1166 + char *p = NULL;
1167 +
1168 + NEXT_ARG();
1169 + mpu = strtol(*argv, &p, 10);
1170 + if (!p || *p || !*argv || mpu < 0 || mpu > 256) {
1171 + fprintf(stderr,
1172 + "Illegal \"mpu\", valid range is 0 to 256\\n");
1173 + return -1;
1174 + }
1175 + } else if (strcmp(*argv, "ingress") == 0) {
1176 + ingress = 1;
1177 + } else if (strcmp(*argv, "egress") == 0) {
1178 + ingress = 0;
1179 + } else if (strcmp(*argv, "no-ack-filter") == 0) {
1180 + ack_filter = CAKE_ACK_NONE;
1181 + } else if (strcmp(*argv, "ack-filter") == 0) {
1182 + ack_filter = CAKE_ACK_FILTER;
1183 + } else if (strcmp(*argv, "ack-filter-aggressive") == 0) {
1184 + ack_filter = CAKE_ACK_AGGRESSIVE;
1185 + } else if (strcmp(*argv, "memlimit") == 0) {
1186 + NEXT_ARG();
1187 + if (get_size(&memlimit, *argv)) {
1188 + fprintf(stderr,
1189 + "Illegal value for \"memlimit\": \"%s\"\n", *argv);
1190 + return -1;
1191 + }
1192 + } else if (strcmp(*argv, "help") == 0) {
1193 + explain();
1194 + return -1;
1195 + } else {
1196 + fprintf(stderr, "What is \"%s\"?\n", *argv);
1197 + explain();
1198 + return -1;
1199 + }
1200 + argc--; argv++;
1201 + }
1202 +
1203 + tail = NLMSG_TAIL(n);
1204 + addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
1205 + if (bandwidth || unlimited)
1206 + addattr_l(n, 1024, TCA_CAKE_BASE_RATE64, &bandwidth,
1207 + sizeof(bandwidth));
1208 + if (diffserv)
1209 + addattr_l(n, 1024, TCA_CAKE_DIFFSERV_MODE, &diffserv,
1210 + sizeof(diffserv));
1211 + if (atm != -1)
1212 + addattr_l(n, 1024, TCA_CAKE_ATM, &atm, sizeof(atm));
1213 + if (flowmode != -1)
1214 + addattr_l(n, 1024, TCA_CAKE_FLOW_MODE, &flowmode,
1215 + sizeof(flowmode));
1216 + if (overhead_set)
1217 + addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead,
1218 + sizeof(overhead));
1219 + if (overhead_override) {
1220 + unsigned int zero = 0;
1221 +
1222 + addattr_l(n, 1024, TCA_CAKE_RAW, &zero, sizeof(zero));
1223 + }
1224 + if (mpu > 0)
1225 + addattr_l(n, 1024, TCA_CAKE_MPU, &mpu, sizeof(mpu));
1226 + if (interval)
1227 + addattr_l(n, 1024, TCA_CAKE_RTT, &interval, sizeof(interval));
1228 + if (target)
1229 + addattr_l(n, 1024, TCA_CAKE_TARGET, &target, sizeof(target));
1230 + if (autorate != -1)
1231 + addattr_l(n, 1024, TCA_CAKE_AUTORATE, &autorate,
1232 + sizeof(autorate));
1233 + if (memlimit)
1234 + addattr_l(n, 1024, TCA_CAKE_MEMORY, &memlimit,
1235 + sizeof(memlimit));
1236 + if (nat != -1)
1237 + addattr_l(n, 1024, TCA_CAKE_NAT, &nat, sizeof(nat));
1238 + if (wash != -1)
1239 + addattr_l(n, 1024, TCA_CAKE_WASH, &wash, sizeof(wash));
1240 + if (split_gso != -1)
1241 + addattr_l(n, 1024, TCA_CAKE_SPLIT_GSO, &split_gso,
1242 + sizeof(split_gso));
1243 + if (ingress != -1)
1244 + addattr_l(n, 1024, TCA_CAKE_INGRESS, &ingress, sizeof(ingress));
1245 + if (ack_filter != -1)
1246 + addattr_l(n, 1024, TCA_CAKE_ACK_FILTER, &ack_filter,
1247 + sizeof(ack_filter));
1248 +
1249 + tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
1250 + return 0;
1251 +}
1252 +
1253 +static void cake_print_mode(unsigned int value, unsigned int max,
1254 + const char *key, const char **table)
1255 +{
1256 + if (value < max && table[value]) {
1257 + print_string(PRINT_ANY, key, "%s ", table[value]);
1258 + } else {
1259 + print_string(PRINT_JSON, key, NULL, "unknown");
1260 + print_string(PRINT_FP, NULL, "(?%s?)", key);
1261 + }
1262 +}
1263 +
1264 +static int cake_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
1265 +{
1266 + struct rtattr *tb[TCA_CAKE_MAX + 1];
1267 + unsigned int interval = 0;
1268 + unsigned int memlimit = 0;
1269 + __u64 bandwidth = 0;
1270 + int ack_filter = 0;
1271 + int split_gso = 0;
1272 + int overhead = 0;
1273 + int autorate = 0;
1274 + int ingress = 0;
1275 + int wash = 0;
1276 + int raw = 0;
1277 + int mpu = 0;
1278 + int atm = 0;
1279 + int nat = 0;
1280 +
1281 + SPRINT_BUF(b1);
1282 + SPRINT_BUF(b2);
1283 +
1284 + if (opt == NULL)
1285 + return 0;
1286 +
1287 + parse_rtattr_nested(tb, TCA_CAKE_MAX, opt);
1288 +
1289 + if (tb[TCA_CAKE_BASE_RATE64] &&
1290 + RTA_PAYLOAD(tb[TCA_CAKE_BASE_RATE64]) >= sizeof(bandwidth)) {
1291 + bandwidth = rta_getattr_u64(tb[TCA_CAKE_BASE_RATE64]);
1292 + if (bandwidth) {
1293 + print_uint(PRINT_JSON, "bandwidth", NULL, bandwidth);
1294 + print_string(PRINT_FP, NULL, "bandwidth %s ",
1295 + sprint_rate(bandwidth, b1));
1296 + } else
1297 + print_string(PRINT_ANY, "bandwidth", "bandwidth %s ",
1298 + "unlimited");
1299 + }
1300 + if (tb[TCA_CAKE_AUTORATE] &&
1301 + RTA_PAYLOAD(tb[TCA_CAKE_AUTORATE]) >= sizeof(__u32)) {
1302 + autorate = rta_getattr_u32(tb[TCA_CAKE_AUTORATE]);
1303 + if (autorate == 1)
1304 + print_string(PRINT_ANY, "autorate", "%s ",
1305 + "autorate-ingress");
1306 + else if (autorate)
1307 + print_string(PRINT_ANY, "autorate", "(?autorate?) ",
1308 + "unknown");
1309 + }
1310 + if (tb[TCA_CAKE_DIFFSERV_MODE] &&
1311 + RTA_PAYLOAD(tb[TCA_CAKE_DIFFSERV_MODE]) >= sizeof(__u32)) {
1312 + cake_print_mode(rta_getattr_u32(tb[TCA_CAKE_DIFFSERV_MODE]),
1313 + CAKE_DIFFSERV_MAX, "diffserv", diffserv_names);
1314 + }
1315 + if (tb[TCA_CAKE_FLOW_MODE] &&
1316 + RTA_PAYLOAD(tb[TCA_CAKE_FLOW_MODE]) >= sizeof(__u32)) {
1317 + cake_print_mode(rta_getattr_u32(tb[TCA_CAKE_FLOW_MODE]),
1318 + CAKE_FLOW_MAX, "flowmode", flowmode_names);
1319 + }
1320 +
1321 + if (tb[TCA_CAKE_NAT] &&
1322 + RTA_PAYLOAD(tb[TCA_CAKE_NAT]) >= sizeof(__u32)) {
1323 + nat = rta_getattr_u32(tb[TCA_CAKE_NAT]);
1324 + }
1325 +
1326 + if (nat)
1327 + print_string(PRINT_FP, NULL, "nat ", NULL);
1328 + print_bool(PRINT_JSON, "nat", NULL, nat);
1329 +
1330 + if (tb[TCA_CAKE_WASH] &&
1331 + RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) {
1332 + wash = rta_getattr_u32(tb[TCA_CAKE_WASH]);
1333 + }
1334 + if (tb[TCA_CAKE_ATM] &&
1335 + RTA_PAYLOAD(tb[TCA_CAKE_ATM]) >= sizeof(__u32)) {
1336 + atm = rta_getattr_u32(tb[TCA_CAKE_ATM]);
1337 + }
1338 + if (tb[TCA_CAKE_OVERHEAD] &&
1339 + RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__s32)) {
1340 + overhead = *(__s32 *) RTA_DATA(tb[TCA_CAKE_OVERHEAD]);
1341 + }
1342 + if (tb[TCA_CAKE_MPU] &&
1343 + RTA_PAYLOAD(tb[TCA_CAKE_MPU]) >= sizeof(__u32)) {
1344 + mpu = rta_getattr_u32(tb[TCA_CAKE_MPU]);
1345 + }
1346 + if (tb[TCA_CAKE_INGRESS] &&
1347 + RTA_PAYLOAD(tb[TCA_CAKE_INGRESS]) >= sizeof(__u32)) {
1348 + ingress = rta_getattr_u32(tb[TCA_CAKE_INGRESS]);
1349 + }
1350 + if (tb[TCA_CAKE_ACK_FILTER] &&
1351 + RTA_PAYLOAD(tb[TCA_CAKE_ACK_FILTER]) >= sizeof(__u32)) {
1352 + ack_filter = rta_getattr_u32(tb[TCA_CAKE_ACK_FILTER]);
1353 + }
1354 + if (tb[TCA_CAKE_SPLIT_GSO] &&
1355 + RTA_PAYLOAD(tb[TCA_CAKE_SPLIT_GSO]) >= sizeof(__u32)) {
1356 + split_gso = rta_getattr_u32(tb[TCA_CAKE_SPLIT_GSO]);
1357 + }
1358 + if (tb[TCA_CAKE_RAW]) {
1359 + raw = 1;
1360 + }
1361 + if (tb[TCA_CAKE_RTT] &&
1362 + RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) {
1363 + interval = rta_getattr_u32(tb[TCA_CAKE_RTT]);
1364 + }
1365 +
1366 + if (wash)
1367 + print_string(PRINT_FP, NULL, "wash ", NULL);
1368 + print_bool(PRINT_JSON, "wash", NULL, wash);
1369 +
1370 + if (ingress)
1371 + print_string(PRINT_FP, NULL, "ingress ", NULL);
1372 + print_bool(PRINT_JSON, "ingress", NULL, ingress);
1373 +
1374 + if (ack_filter == CAKE_ACK_AGGRESSIVE)
1375 + print_string(PRINT_ANY, "ack-filter", "ack-filter-%s ",
1376 + "aggressive");
1377 + else if (ack_filter == CAKE_ACK_FILTER)
1378 + print_string(PRINT_ANY, "ack-filter", "ack-filter ", "enabled");
1379 + else
1380 + print_string(PRINT_JSON, "ack-filter", NULL, "disabled");
1381 +
1382 + if (split_gso)
1383 + print_string(PRINT_FP, NULL, "split-gso ", NULL);
1384 + else
1385 + print_string(PRINT_FP, NULL, "no-split-gso ", NULL);
1386 + print_bool(PRINT_JSON, "split_gso", NULL, split_gso);
1387 +
1388 + if (interval)
1389 + print_string(PRINT_FP, NULL, "rtt %s ",
1390 + sprint_time(interval, b2));
1391 + print_uint(PRINT_JSON, "rtt", NULL, interval);
1392 +
1393 + if (raw)
1394 + print_string(PRINT_FP, NULL, "raw ", NULL);
1395 + print_bool(PRINT_JSON, "raw", NULL, raw);
1396 +
1397 + if (atm == CAKE_ATM_ATM)
1398 + print_string(PRINT_ANY, "atm", "%s ", "atm");
1399 + else if (atm == CAKE_ATM_PTM)
1400 + print_string(PRINT_ANY, "atm", "%s ", "ptm");
1401 + else if (!raw)
1402 + print_string(PRINT_ANY, "atm", "%s ", "noatm");
1403 +
1404 + print_int(PRINT_ANY, "overhead", "overhead %d ", overhead);
1405 +
1406 + if (mpu)
1407 + print_uint(PRINT_ANY, "mpu", "mpu %u ", mpu);
1408 +
1409 + if (memlimit) {
1410 + print_uint(PRINT_JSON, "memlimit", NULL, memlimit);
1411 + print_string(PRINT_FP, NULL, "memlimit %s",
1412 + sprint_size(memlimit, b1));
1413 + }
1414 +
1415 + return 0;
1416 +}
1417 +
1418 +static void cake_print_json_tin(struct rtattr **tstat)
1419 +{
1420 +#define PRINT_TSTAT_JSON(type, name, attr) if (tstat[TCA_CAKE_TIN_STATS_ ## attr]) \
1421 + print_u64(PRINT_JSON, name, NULL, \
1422 + rta_getattr_ ## type((struct rtattr *) \
1423 + tstat[TCA_CAKE_TIN_STATS_ ## attr]))
1424 +
1425 + open_json_object(NULL);
1426 + PRINT_TSTAT_JSON(u64, "threshold_rate", THRESHOLD_RATE64);
1427 + PRINT_TSTAT_JSON(u64, "sent_bytes", SENT_BYTES64);
1428 + PRINT_TSTAT_JSON(u32, "backlog_bytes", BACKLOG_BYTES);
1429 + PRINT_TSTAT_JSON(u32, "target_us", TARGET_US);
1430 + PRINT_TSTAT_JSON(u32, "interval_us", INTERVAL_US);
1431 + PRINT_TSTAT_JSON(u32, "peak_delay_us", PEAK_DELAY_US);
1432 + PRINT_TSTAT_JSON(u32, "avg_delay_us", AVG_DELAY_US);
1433 + PRINT_TSTAT_JSON(u32, "base_delay_us", BASE_DELAY_US);
1434 + PRINT_TSTAT_JSON(u32, "sent_packets", SENT_PACKETS);
1435 + PRINT_TSTAT_JSON(u32, "way_indirect_hits", WAY_INDIRECT_HITS);
1436 + PRINT_TSTAT_JSON(u32, "way_misses", WAY_MISSES);
1437 + PRINT_TSTAT_JSON(u32, "way_collisions", WAY_COLLISIONS);
1438 + PRINT_TSTAT_JSON(u32, "drops", DROPPED_PACKETS);
1439 + PRINT_TSTAT_JSON(u32, "ecn_mark", ECN_MARKED_PACKETS);
1440 + PRINT_TSTAT_JSON(u32, "ack_drops", ACKS_DROPPED_PACKETS);
1441 + PRINT_TSTAT_JSON(u32, "sparse_flows", SPARSE_FLOWS);
1442 + PRINT_TSTAT_JSON(u32, "bulk_flows", BULK_FLOWS);
1443 + PRINT_TSTAT_JSON(u32, "unresponsive_flows", UNRESPONSIVE_FLOWS);
1444 + PRINT_TSTAT_JSON(u32, "max_pkt_len", MAX_SKBLEN);
1445 + PRINT_TSTAT_JSON(u32, "flow_quantum", FLOW_QUANTUM);
1446 + close_json_object();
1447 +
1448 +#undef PRINT_TSTAT_JSON
1449 +}
1450 +
1451 +static int cake_print_xstats(struct qdisc_util *qu, FILE *f,
1452 + struct rtattr *xstats)
1453 +{
1454 + struct rtattr *st[TCA_CAKE_STATS_MAX + 1];
1455 + SPRINT_BUF(b1);
1456 + int i;
1457 +
1458 + if (xstats == NULL)
1459 + return 0;
1460 +
1461 +#define GET_STAT_U32(attr) rta_getattr_u32(st[TCA_CAKE_STATS_ ## attr])
1462 +#define GET_STAT_S32(attr) (*(__s32 *)RTA_DATA(st[TCA_CAKE_STATS_ ## attr]))
1463 +#define GET_STAT_U64(attr) rta_getattr_u64(st[TCA_CAKE_STATS_ ## attr])
1464 +
1465 + parse_rtattr_nested(st, TCA_CAKE_STATS_MAX, xstats);
1466 +
1467 + if (st[TCA_CAKE_STATS_MEMORY_USED] &&
1468 + st[TCA_CAKE_STATS_MEMORY_LIMIT]) {
1469 + print_string(PRINT_FP, NULL, " memory used: %s",
1470 + sprint_size(GET_STAT_U32(MEMORY_USED), b1));
1471 +
1472 + print_string(PRINT_FP, NULL, " of %s\n",
1473 + sprint_size(GET_STAT_U32(MEMORY_LIMIT), b1));
1474 +
1475 + print_uint(PRINT_JSON, "memory_used", NULL,
1476 + GET_STAT_U32(MEMORY_USED));
1477 + print_uint(PRINT_JSON, "memory_limit", NULL,
1478 + GET_STAT_U32(MEMORY_LIMIT));
1479 + }
1480 +
1481 + if (st[TCA_CAKE_STATS_CAPACITY_ESTIMATE64]) {
1482 + print_string(PRINT_FP, NULL, " capacity estimate: %s\n",
1483 + sprint_rate(GET_STAT_U64(CAPACITY_ESTIMATE64), b1));
1484 + print_uint(PRINT_JSON, "capacity_estimate", NULL,
1485 + GET_STAT_U64(CAPACITY_ESTIMATE64));
1486 + }
1487 +
1488 + if (st[TCA_CAKE_STATS_MIN_NETLEN] &&
1489 + st[TCA_CAKE_STATS_MAX_NETLEN]) {
1490 + print_uint(PRINT_ANY, "min_network_size",
1491 + " min/max network layer size: %12u",
1492 + GET_STAT_U32(MIN_NETLEN));
1493 + print_uint(PRINT_ANY, "max_network_size",
1494 + " /%8u\n", GET_STAT_U32(MAX_NETLEN));
1495 + }
1496 +
1497 + if (st[TCA_CAKE_STATS_MIN_ADJLEN] &&
1498 + st[TCA_CAKE_STATS_MAX_ADJLEN]) {
1499 + print_uint(PRINT_ANY, "min_adj_size",
1500 + " min/max overhead-adjusted size: %8u",
1501 + GET_STAT_U32(MIN_ADJLEN));
1502 + print_uint(PRINT_ANY, "max_adj_size",
1503 + " /%8u\n", GET_STAT_U32(MAX_ADJLEN));
1504 + }
1505 +
1506 + if (st[TCA_CAKE_STATS_AVG_NETOFF])
1507 + print_uint(PRINT_ANY, "avg_hdr_offset",
1508 + " average network hdr offset: %12u\n\n",
1509 + GET_STAT_U32(AVG_NETOFF));
1510 +
1511 + /* class stats */
1512 + if (st[TCA_CAKE_STATS_DEFICIT])
1513 + print_int(PRINT_ANY, "deficit", " deficit %u",
1514 + GET_STAT_S32(DEFICIT));
1515 + if (st[TCA_CAKE_STATS_COBALT_COUNT])
1516 + print_uint(PRINT_ANY, "count", " count %u",
1517 + GET_STAT_U32(COBALT_COUNT));
1518 +
1519 + if (st[TCA_CAKE_STATS_DROPPING] && GET_STAT_U32(DROPPING)) {
1520 + print_bool(PRINT_ANY, "dropping", " dropping", true);
1521 + if (st[TCA_CAKE_STATS_DROP_NEXT_US]) {
1522 + int drop_next = GET_STAT_S32(DROP_NEXT_US);
1523 +
1524 + if (drop_next < 0) {
1525 + print_string(PRINT_FP, NULL, " drop_next -%s",
1526 + sprint_time(drop_next, b1));
1527 + } else {
1528 + print_uint(PRINT_JSON, "drop_next", NULL,
1529 + drop_next);
1530 + print_string(PRINT_FP, NULL, " drop_next %s",
1531 + sprint_time(drop_next, b1));
1532 + }
1533 + }
1534 + }
1535 +
1536 + if (st[TCA_CAKE_STATS_P_DROP]) {
1537 + print_uint(PRINT_ANY, "blue_prob", " blue_prob %u",
1538 + GET_STAT_U32(P_DROP));
1539 + if (st[TCA_CAKE_STATS_BLUE_TIMER_US]) {
1540 + int blue_timer = GET_STAT_S32(BLUE_TIMER_US);
1541 +
1542 + if (blue_timer < 0) {
1543 + print_string(PRINT_FP, NULL, " blue_timer -%s",
1544 + sprint_time(blue_timer, b1));
1545 + } else {
1546 + print_uint(PRINT_JSON, "blue_timer", NULL,
1547 + blue_timer);
1548 + print_string(PRINT_FP, NULL, " blue_timer %s",
1549 + sprint_time(blue_timer, b1));
1550 + }
1551 + }
1552 + }
1553 +
1554 +#undef GET_STAT_U32
1555 +#undef GET_STAT_S32
1556 +#undef GET_STAT_U64
1557 +
1558 + if (st[TCA_CAKE_STATS_TIN_STATS]) {
1559 + struct rtattr *tstat[TC_CAKE_MAX_TINS][TCA_CAKE_TIN_STATS_MAX + 1];
1560 + struct rtattr *tins[TC_CAKE_MAX_TINS + 1];
1561 + int num_tins = 0;
1562 +
1563 + parse_rtattr_nested(tins, TC_CAKE_MAX_TINS,
1564 + st[TCA_CAKE_STATS_TIN_STATS]);
1565 +
1566 + for (i = 1; i <= TC_CAKE_MAX_TINS && tins[i]; i++) {
1567 + parse_rtattr_nested(tstat[i-1], TCA_CAKE_TIN_STATS_MAX,
1568 + tins[i]);
1569 + num_tins++;
1570 + }
1571 +
1572 + if (!num_tins)
1573 + return 0;
1574 +
1575 + if (is_json_context()) {
1576 + open_json_array(PRINT_JSON, "tins");
1577 + for (i = 0; i < num_tins; i++)
1578 + cake_print_json_tin(tstat[i]);
1579 + close_json_array(PRINT_JSON, NULL);
1580 +
1581 + return 0;
1582 + }
1583 +
1584 +
1585 + switch (num_tins) {
1586 + case 3:
1587 + fprintf(f, " Bulk Best Effort Voice\n");
1588 + break;
1589 +
1590 + case 4:
1591 + fprintf(f, " Bulk Best Effort Video Voice\n");
1592 + break;
1593 +
1594 + default:
1595 + fprintf(f, " ");
1596 + for (i = 0; i < num_tins; i++)
1597 + fprintf(f, " Tin %u", i);
1598 + fprintf(f, "\n");
1599 + };
1600 +
1601 +#define GET_TSTAT(i, attr) (tstat[i][TCA_CAKE_TIN_STATS_ ## attr])
1602 +#define PRINT_TSTAT(name, attr, fmts, val) do { \
1603 + if (GET_TSTAT(0, attr)) { \
1604 + fprintf(f, name); \
1605 + for (i = 0; i < num_tins; i++) \
1606 + fprintf(f, " %12" fmts, val); \
1607 + fprintf(f, "\n"); \
1608 + } \
1609 + } while (0)
1610 +
1611 +#define SPRINT_TSTAT(pfunc, type, name, attr) PRINT_TSTAT( \
1612 + name, attr, "s", sprint_ ## pfunc( \
1613 + rta_getattr_ ## type(GET_TSTAT(i, attr)), b1))
1614 +
1615 +#define PRINT_TSTAT_U32(name, attr) PRINT_TSTAT( \
1616 + name, attr, "u", rta_getattr_u32(GET_TSTAT(i, attr)))
1617 +
1618 +#define PRINT_TSTAT_U64(name, attr) PRINT_TSTAT( \
1619 + name, attr, "llu", rta_getattr_u64(GET_TSTAT(i, attr)))
1620 +
1621 + SPRINT_TSTAT(rate, u64, " thresh ", THRESHOLD_RATE64);
1622 + SPRINT_TSTAT(time, u32, " target ", TARGET_US);
1623 + SPRINT_TSTAT(time, u32, " interval", INTERVAL_US);
1624 + SPRINT_TSTAT(time, u32, " pk_delay", PEAK_DELAY_US);
1625 + SPRINT_TSTAT(time, u32, " av_delay", AVG_DELAY_US);
1626 + SPRINT_TSTAT(time, u32, " sp_delay", BASE_DELAY_US);
1627 + SPRINT_TSTAT(size, u32, " backlog ", BACKLOG_BYTES);
1628 +
1629 + PRINT_TSTAT_U32(" pkts ", SENT_PACKETS);
1630 + PRINT_TSTAT_U64(" bytes ", SENT_BYTES64);
1631 +
1632 + PRINT_TSTAT_U32(" way_inds", WAY_INDIRECT_HITS);
1633 + PRINT_TSTAT_U32(" way_miss", WAY_MISSES);
1634 + PRINT_TSTAT_U32(" way_cols", WAY_COLLISIONS);
1635 + PRINT_TSTAT_U32(" drops ", DROPPED_PACKETS);
1636 + PRINT_TSTAT_U32(" marks ", ECN_MARKED_PACKETS);
1637 + PRINT_TSTAT_U32(" ack_drop", ACKS_DROPPED_PACKETS);
1638 + PRINT_TSTAT_U32(" sp_flows", SPARSE_FLOWS);
1639 + PRINT_TSTAT_U32(" bk_flows", BULK_FLOWS);
1640 + PRINT_TSTAT_U32(" un_flows", UNRESPONSIVE_FLOWS);
1641 + PRINT_TSTAT_U32(" max_len ", MAX_SKBLEN);
1642 + PRINT_TSTAT_U32(" quantum ", FLOW_QUANTUM);
1643 +
1644 +#undef GET_STAT
1645 +#undef PRINT_TSTAT
1646 +#undef SPRINT_TSTAT
1647 +#undef PRINT_TSTAT_U32
1648 +#undef PRINT_TSTAT_U64
1649 + }
1650 + return 0;
1651 +}
1652 +
1653 +struct qdisc_util cake_qdisc_util = {
1654 + .id = "cake",
1655 + .parse_qopt = cake_parse_opt,
1656 + .print_qopt = cake_print_opt,
1657 + .print_xstats = cake_print_xstats,
1658 +};