iproute2: support latest cake & restore DSCP washing
authorKevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
Wed, 12 Oct 2016 09:40:05 +0000 (10:40 +0100)
committerJohn Crispin <john@phrozen.org>
Mon, 12 Dec 2016 15:42:06 +0000 (16:42 +0100)
Support new packet overhead passing paradigm in cake qdisc, also restore
DSCP wash/nowash keywords.

Signed-off-by: Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
package/network/utils/iproute2/patches/950-add-cake-to-tc.patch

index ffb3e63e131400046697be2c5fe3e7b8b1b8db88..d0fc66daaaa9832cd1b35fb1922870e16acb6722 100644 (file)
@@ -1,6 +1,8 @@
+diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
+index 8d2530d..028b83c 100644
 --- a/include/linux/pkt_sched.h
 +++ b/include/linux/pkt_sched.h
-@@ -850,4 +850,57 @@ struct tc_pie_xstats {
+@@ -850,4 +850,59 @@ struct tc_pie_xstats {
        __u32 maxq;             /* maximum queue size */
        __u32 ecn_mark;         /* packets marked with ecn*/
  };
@@ -18,6 +20,8 @@
 +      TCA_CAKE_AUTORATE,
 +      TCA_CAKE_MEMORY,
 +      TCA_CAKE_NAT,
++      TCA_CAKE_ETHERNET,
++      TCA_CAKE_WASH,
 +      __TCA_CAKE_MAX
 +};
 +#define TCA_CAKE_MAX  (__TCA_CAKE_MAX - 1)
@@ -58,6 +62,8 @@
 +};
 +
  #endif
+diff --git a/tc/Makefile b/tc/Makefile
+index 56acbaa..d421b8e 100644
 --- a/tc/Makefile
 +++ b/tc/Makefile
 @@ -63,6 +63,7 @@ TCMODULES += q_codel.o
  TCMODULES += q_hhf.o
  TCMODULES += e_bpf.o
  
+diff --git a/tc/q_cake.c b/tc/q_cake.c
+new file mode 100644
+index 0000000..14e5aab
 --- /dev/null
 +++ b/tc/q_cake.c
-@@ -0,0 +1,643 @@
+@@ -0,0 +1,654 @@
 +/*
 + * Common Applications Kept Enhanced  --  CAKE
 + *
 +                      "                [ besteffort | precedence | diffserv8 | diffserv4* ]\n"
 +                      "                [ flowblind | srchost | dsthost | hosts | flows* | dual-srchost | dual-dsthost | triple-isolate ] [ nat | nonat* ]\n"
 +                      "                [ ptm | atm | noatm* ] [ overhead N | conservative | raw* ]\n"
++                      "                [ wash | nowash* ]\n"
 +                      "                [ memlimit LIMIT ]\n"
 +                      "    (* marks defaults)\n");
 +}
 +      unsigned memlimit = 0;
 +      int  overhead = 0;
 +      bool overhead_set = false;
++      bool overhead_override = false;
++      int wash = -1;
 +      int flowmode = -1;
 +      int nat = -1;
 +      int atm = -1;
 +              } else if (strcmp(*argv, "diffserv-llt") == 0) {
 +                      diffserv = 5;
 +
++              } else if (strcmp(*argv, "nowash") == 0) {
++                      wash = 0;
++              } else if (strcmp(*argv, "wash") == 0) {
++                      wash = 1;
++
 +              } else if (strcmp(*argv, "flowblind") == 0) {
 +                      flowmode = 0;
 +              } else if (strcmp(*argv, "srchost") == 0) {
 +                      atm = 0;
 +                      overhead = 0;
 +                      overhead_set = true;
++                      overhead_override = true;
 +              } else if (strcmp(*argv, "conservative") == 0) {
 +                      /*
 +                       * Deliberately over-estimate overhead:
 +                      overhead = 48;
 +                      overhead_set = true;
 +
-+              /*
-+               * DOCSIS overhead figures courtesy of Greg White @ CableLabs.
-+               * The "-ip" versions include the Ethernet frame header, in case
-+               * you are shaping an IP interface instead of an Ethernet one.
-+               */
-+              } else if (strcmp(*argv, "docsis-downstream-ip") == 0) {
-+                      atm = 0;
-+                      overhead += 35;
-+                      overhead_set = true;
-+              } else if (strcmp(*argv, "docsis-downstream") == 0) {
-+                      atm = 0;
-+                      overhead += 35 - 14;
-+                      overhead_set = true;
-+              } else if (strcmp(*argv, "docsis-upstream-ip") == 0) {
-+                      atm = 0;
-+                      overhead += 28;
-+                      overhead_set = true;
-+              } else if (strcmp(*argv, "docsis-upstream") == 0) {
-+                      atm = 0;
-+                      overhead += 28 - 14;
-+                      overhead_set = true;
-+
 +              /* Various ADSL framing schemes, all over ATM cells */
 +              } else if (strcmp(*argv, "ipoa-vcmux") == 0) {
 +                      atm = 1;
 +
 +              } else if (strcmp(*argv, "via-ethernet") == 0) {
 +                      /*
-+                       * The above overheads are relative to an IP packet,
-+                       * but Linux includes Ethernet framing overhead already
-+                       * if we are shaping an Ethernet interface rather than
-+                       * an IP interface.
++                       * We used to use this flag to manually compensate for
++                       * Linux including the Ethernet header on Ethernet-type
++                       * interfaces, but not on IP-type interfaces.
++                       *
++                       * It is no longer needed, because Cake now adjusts for
++                       * that automatically, and is thus ignored.
++                       *
++                       * It would be deleted entirely, but it appears in the
++                       * stats output when the automatic compensation is active.
 +                       */
-+                      overhead -= 14;
-+                      overhead_set = true;
 +
-+              /* Additional Ethernet-related overheads used by some ISPs */
-+              } else if (strcmp(*argv, "ether-phy") == 0) {
-+                      /* ethernet pre-amble & interframe gap 20 bytes
-+                       * Linux will have already accounted for MACs & frame type 14 bytes
-+                       * you probably want to add an FCS as well*/
-+                      overhead += 20;
-+                      overhead_set = true;
-+              } else if (strcmp(*argv, "ether-all") == 0) {
++              } else if (strcmp(*argv, "ethernet") == 0) {
 +                      /* ethernet pre-amble & interframe gap & FCS
-+                       * Linux will have already accounted for MACs & frame type 14 bytes
-+                       * you may need to add vlan tag*/
-+                      overhead += 24;
++                       * you may need to add vlan tag */
++                      overhead += 38;
 +                      overhead_set = true;
 +
-+              } else if (strcmp(*argv, "ether-fcs") == 0) {
-+                      /* Frame Check Sequence */
-+                      /* we ignore the minimum frame size, because IP packets usually meet it */
-+                      overhead += 4;
-+                      overhead_set = true;
++              /* Additional Ethernet-related overhead used by some ISPs */
 +              } else if (strcmp(*argv, "ether-vlan") == 0) {
 +                      /* 802.1q VLAN tag - may be repeated */
 +                      overhead += 4;
 +                      overhead_set = true;
 +
++              /*
++               * DOCSIS cable shapers account for Ethernet frame with FCS,
++               * but not interframe gap nor preamble.
++               */
++              } else if (strcmp(*argv, "docsis") == 0) {
++                      atm = 0;
++                      overhead += 18;
++                      overhead_set = true;
++
 +              } else if (strcmp(*argv, "overhead") == 0) {
 +                      char* p = NULL;
 +                      NEXT_ARG();
 +              addattr_l(n, 1024, TCA_CAKE_FLOW_MODE, &flowmode, sizeof(flowmode));
 +      if (overhead_set)
 +              addattr_l(n, 1024, TCA_CAKE_OVERHEAD, &overhead, sizeof(overhead));
++      if (overhead_override) {
++              unsigned zero = 0;
++              addattr_l(n, 1024, TCA_CAKE_ETHERNET, &zero, sizeof(zero));
++      }
 +      if (interval)
 +              addattr_l(n, 1024, TCA_CAKE_RTT, &interval, sizeof(interval));
 +      if (target)
 +              addattr_l(n, 1024, TCA_CAKE_MEMORY, &memlimit, sizeof(memlimit));
 +      if (nat != -1)
 +              addattr_l(n, 1024, TCA_CAKE_NAT, &nat, sizeof(nat));
++      if (wash != -1)
++              addattr_l(n, 1024, TCA_CAKE_WASH, &wash, sizeof(wash));
 +
 +      tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
 +      return 0;
 +      unsigned interval = 0;
 +      unsigned memlimit = 0;
 +      int overhead = 0;
++      int ethernet = 0;
 +      int atm = 0;
 +      int nat = 0;
 +      int autorate = 0;
++      int wash = 0;
 +      SPRINT_BUF(b1);
 +      SPRINT_BUF(b2);
 +
 +              if(nat)
 +                      fprintf(f, "nat ");
 +      }
++      if (tb[TCA_CAKE_WASH] &&
++          RTA_PAYLOAD(tb[TCA_CAKE_WASH]) >= sizeof(__u32)) {
++              wash = rta_getattr_u32(tb[TCA_CAKE_WASH]);
++      }
 +      if (tb[TCA_CAKE_ATM] &&
 +          RTA_PAYLOAD(tb[TCA_CAKE_ATM]) >= sizeof(__u32)) {
 +              atm = rta_getattr_u32(tb[TCA_CAKE_ATM]);
 +          RTA_PAYLOAD(tb[TCA_CAKE_OVERHEAD]) >= sizeof(__u32)) {
 +              overhead = rta_getattr_u32(tb[TCA_CAKE_OVERHEAD]);
 +      }
++      if (tb[TCA_CAKE_ETHERNET] &&
++          RTA_PAYLOAD(tb[TCA_CAKE_ETHERNET]) >= sizeof(__u32)) {
++              ethernet = rta_getattr_u32(tb[TCA_CAKE_ETHERNET]);
++      }
 +      if (tb[TCA_CAKE_RTT] &&
 +          RTA_PAYLOAD(tb[TCA_CAKE_RTT]) >= sizeof(__u32)) {
 +              interval = rta_getattr_u32(tb[TCA_CAKE_RTT]);
 +      }
 +
++      if (wash)
++              fprintf(f,"wash ");
++
 +      if (interval)
 +              fprintf(f, "rtt %s ", sprint_time(interval, b2));
 +
-+      if (atm == 1)
-+              fprintf(f, "atm ");
-+      else if (atm == 2)
-+              fprintf(f, "ptm ");
-+      else if (overhead)
-+              fprintf(f, "noatm ");
++      if (!atm && overhead == ethernet) {
++              fprintf(f, "raw ");
++      } else {
++              if (atm == 1)
++                      fprintf(f, "atm ");
++              else if (atm == 2)
++                      fprintf(f, "ptm ");
++              else
++                      fprintf(f, "noatm ");
 +
-+      if (overhead || atm)
 +              fprintf(f, "overhead %d ", overhead);
 +
-+      if (!atm && !overhead)
-+              fprintf(f, "raw ");
++              // This is actually the *amount* of automatic compensation, but we only report
++              // its presence as a boolean for now.
++              if (ethernet)
++                      fprintf(f, "via-ethernet ");
++      }
 +
 +      if (memlimit)
 +              fprintf(f, "memlimit %s", sprint_size(memlimit, b1));