Add esfq to iproute2 and 2.4 kernel (#1891)
[openwrt/staging/dedeckeh.git] / target / linux / generic-2.4 / patches / 620-tc_esfq.patch
1 diff -urN target.old/linux/generic-2.4/patches/232-esfq_kmod.patch target/linux/generic-2.4/patches/232-esfq_kmod.patch
2 --- target.old/linux/generic-2.4/patches/232-esfq_kmod.patch 1969-12-31 19:00:00.000000000 -0500
3 +++ target/linux/generic-2.4/patches/232-esfq_kmod.patch 2007-06-08 02:35:06.000000000 -0400
4 @@ -0,0 +1,743 @@
5 +diff -urN linux-2.4.34/Documentation/Configure.help linux-2.4.34/Documentation/Configure.help
6 +--- linux-2.4.34/Documentation/Configure.help 2007-05-10 19:37:42.000000000 -0400
7 ++++ linux-2.4.34/Documentation/Configure.help 2007-05-10 19:49:49.000000000 -0400
8 +@@ -11127,6 +11127,24 @@
9 + whenever you want). If you want to compile it as a module, say M
10 + here and read <file:Documentation/modules.txt>.
11 +
12 ++ESFQ queue
13 ++CONFIG_NET_SCH_ESFQ
14 ++ Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
15 ++ packet scheduling algorithm for some of your network devices or as a
16 ++ leaf discipline for the CBQ scheduling algorithm (see the top of
17 ++ <file:net/sched/sch_esfq.c> for details and references about the SFQ
18 ++ algorithm).
19 ++
20 ++ This is an enchanced SFQ version which allows you to control the
21 ++ hardcoded values in the SFQ scheduler: queue depth, hash table size,
22 ++ queues limit. Also adds control to the hash function used to identify
23 ++ packet flows. Hash by src or dst ip and original sfq hash.
24 ++
25 ++ This code is also available as a module called sch_esfq.o ( = code
26 ++ which can be inserted in and removed from the running kernel
27 ++ whenever you want). If you want to compile it as a module, say M
28 ++ here and read <file:Documentation/modules.txt>.
29 ++
30 + CSZ packet scheduler
31 + CONFIG_NET_SCH_CSZ
32 + Say Y here if you want to use the Clark-Shenker-Zhang (CSZ) packet
33 +diff -urN linux-2.4.34/include/linux/pkt_sched.h linux-2.4.34/include/linux/pkt_sched.h
34 +--- linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:38:19.000000000 -0400
35 ++++ linux-2.4.34/include/linux/pkt_sched.h 2007-05-10 19:53:59.000000000 -0400
36 +@@ -173,8 +173,36 @@
37 + *
38 + * The only reason for this is efficiency, it is possible
39 + * to change these parameters in compile time.
40 ++ *
41 ++ * If you need to play with these values use esfq instead.
42 + */
43 +
44 ++/* ESFQ section */
45 ++
46 ++enum
47 ++{
48 ++ /* traditional */
49 ++ TCA_SFQ_HASH_CLASSIC,
50 ++ TCA_SFQ_HASH_DST,
51 ++ TCA_SFQ_HASH_SRC,
52 ++ /* conntrack */
53 ++ TCA_SFQ_HASH_CTORIGDST,
54 ++ TCA_SFQ_HASH_CTORIGSRC,
55 ++ TCA_SFQ_HASH_CTREPLDST,
56 ++ TCA_SFQ_HASH_CTREPLSRC,
57 ++ TCA_SFQ_HASH_CTNATCHG,
58 ++};
59 ++
60 ++struct tc_esfq_qopt
61 ++{
62 ++ unsigned quantum; /* Bytes per round allocated to flow */
63 ++ int perturb_period; /* Period of hash perturbation */
64 ++ __u32 limit; /* Maximal packets in queue */
65 ++ unsigned divisor; /* Hash divisor */
66 ++ unsigned flows; /* Maximal number of flows */
67 ++ unsigned hash_kind; /* Hash function to use for flow identification */
68 ++};
69 ++
70 + /* RED section */
71 +
72 + enum
73 +diff -urN linux-2.4.34/net/sched/Config.in linux-2.4.34/net/sched/Config.in
74 +--- linux-2.4.34/net/sched/Config.in 2007-05-10 19:38:31.000000000 -0400
75 ++++ linux-2.4.34/net/sched/Config.in 2007-05-10 19:54:45.000000000 -0400
76 +@@ -12,6 +12,7 @@
77 + tristate ' The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO
78 + tristate ' RED queue' CONFIG_NET_SCH_RED
79 + tristate ' SFQ queue' CONFIG_NET_SCH_SFQ
80 ++tristate ' ESFQ queue' CONFIG_NET_SCH_ESFQ
81 + tristate ' TEQL queue' CONFIG_NET_SCH_TEQL
82 + tristate ' TBF queue' CONFIG_NET_SCH_TBF
83 + tristate ' GRED queue' CONFIG_NET_SCH_GRED
84 +diff -urN linux-2.4.34/net/sched/Makefile linux-2.4.34/net/sched/Makefile
85 +--- linux-2.4.34/net/sched/Makefile 2007-05-10 19:38:31.000000000 -0400
86 ++++ linux-2.4.34/net/sched/Makefile 2007-05-10 19:55:13.000000000 -0400
87 +@@ -19,6 +19,7 @@
88 + obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
89 + obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
90 + obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
91 ++obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
92 + obj-$(CONFIG_NET_SCH_RED) += sch_red.o
93 + obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
94 + obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
95 +diff -urN linux-2.4.34/net/sched/sch_esfq.c linux-2.4.34/net/sched/sch_esfq.c
96 +--- linux-2.4.34/net/sched/sch_esfq.c 1969-12-31 19:00:00.000000000 -0500
97 ++++ linux-2.4.34/net/sched/sch_esfq.c 2007-05-10 19:57:15.000000000 -0400
98 +@@ -0,0 +1,649 @@
99 ++/*
100 ++ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
101 ++ *
102 ++ * This program is free software; you can redistribute it and/or
103 ++ * modify it under the terms of the GNU General Public License
104 ++ * as published by the Free Software Foundation; either version
105 ++ * 2 of the License, or (at your option) any later version.
106 ++ *
107 ++ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
108 ++ *
109 ++ * Changes: Alexander Atanasov, <alex@ssi.bg>
110 ++ * Added dynamic depth,limit,divisor,hash_kind options.
111 ++ * Added dst and src hashes.
112 ++ *
113 ++ * Alexander Clouter, <alex@digriz.org.uk>
114 ++ * Ported ESFQ to Linux 2.6.
115 ++ *
116 ++ * Corey Hickey, <bugfood-c@fatooh.org>
117 ++ * Maintenance of the Linux 2.6 port.
118 ++ * Added fwmark hash (thanks to Robert Kurjata).
119 ++ * Added usage of jhash.
120 ++ *
121 ++ */
122 ++
123 ++#include <linux/config.h>
124 ++#include <linux/module.h>
125 ++#include <asm/uaccess.h>
126 ++#include <asm/system.h>
127 ++#include <linux/bitops.h>
128 ++#include <linux/types.h>
129 ++#include <linux/kernel.h>
130 ++#include <linux/sched.h>
131 ++#include <linux/string.h>
132 ++#include <linux/mm.h>
133 ++#include <linux/socket.h>
134 ++#include <linux/sockios.h>
135 ++#include <linux/in.h>
136 ++#include <linux/errno.h>
137 ++#include <linux/interrupt.h>
138 ++#include <linux/if_ether.h>
139 ++#include <linux/inet.h>
140 ++#include <linux/netdevice.h>
141 ++#include <linux/etherdevice.h>
142 ++#include <linux/notifier.h>
143 ++#include <linux/init.h>
144 ++#include <net/ip.h>
145 ++#include <net/route.h>
146 ++#include <linux/skbuff.h>
147 ++#include <net/sock.h>
148 ++#include <net/pkt_sched.h>
149 ++#include <linux/jhash.h>
150 ++
151 ++#define IPPROTO_DCCP 33
152 ++#define qdisc_priv(q) ((void *)(q->data))
153 ++
154 ++#ifdef CONFIG_IP_NF_CONNTRACK
155 ++/* #include <net/netfilter/nf_conntrack.h> */
156 ++#include <linux/netfilter_ipv4/ip_conntrack.h>
157 ++#endif
158 ++
159 ++/* Stochastic Fairness Queuing algorithm.
160 ++ For more comments look at sch_sfq.c.
161 ++ The difference is that you can change limit, depth,
162 ++ hash table size and choose alternate hash types.
163 ++
164 ++ classic: same as in sch_sfq.c
165 ++ dst: destination IP address
166 ++ src: source IP address
167 ++ ctorigdst: original destination IP address
168 ++ ctorigsrc: original source IP address
169 ++ ctrepldst: reply destination IP address
170 ++ ctreplsrc: reply source IP
171 ++ ctnatchg: use the address which changed via nat
172 ++
173 ++*/
174 ++
175 ++
176 ++/* This type should contain at least SFQ_DEPTH*2 values */
177 ++typedef unsigned int esfq_index;
178 ++
179 ++struct esfq_head
180 ++{
181 ++ esfq_index next;
182 ++ esfq_index prev;
183 ++};
184 ++
185 ++struct esfq_sched_data
186 ++{
187 ++/* Parameters */
188 ++ int perturb_period;
189 ++ unsigned quantum; /* Allotment per round: MUST BE >= MTU */
190 ++ int limit;
191 ++ unsigned depth;
192 ++ unsigned hash_divisor;
193 ++ unsigned hash_kind;
194 ++/* Variables */
195 ++ struct timer_list perturb_timer;
196 ++ int perturbation;
197 ++ esfq_index tail; /* Index of current slot in round */
198 ++ esfq_index max_depth; /* Maximal depth */
199 ++
200 ++ esfq_index *ht; /* Hash table */
201 ++ esfq_index *next; /* Active slots link */
202 ++ short *allot; /* Current allotment per slot */
203 ++ unsigned short *hash; /* Hash value indexed by slots */
204 ++ struct sk_buff_head *qs; /* Slot queue */
205 ++ struct esfq_head *dep; /* Linked list of slots, indexed by depth */
206 ++ unsigned dyn_min; /* For dynamic divisor adjustment; minimum value seen */
207 ++ unsigned dyn_max; /* maximum value seen */
208 ++ unsigned dyn_range; /* saved range */
209 ++};
210 ++
211 ++/* This contains the info we will hash. */
212 ++struct esfq_packet_info
213 ++{
214 ++ u32 proto; /* protocol or port */
215 ++ u32 src; /* source from packet header */
216 ++ u32 dst; /* destination from packet header */
217 ++ u32 ctorigsrc; /* original source from conntrack */
218 ++ u32 ctorigdst; /* original destination from conntrack */
219 ++ u32 ctreplsrc; /* reply source from conntrack */
220 ++ u32 ctrepldst; /* reply destination from conntrack */
221 ++};
222 ++
223 ++static __inline__ unsigned esfq_jhash_1word(struct esfq_sched_data *q,u32 a)
224 ++{
225 ++ return jhash_1word(a, q->perturbation) & (q->hash_divisor-1);
226 ++}
227 ++
228 ++static __inline__ unsigned esfq_jhash_2words(struct esfq_sched_data *q, u32 a, u32 b)
229 ++{
230 ++ return jhash_2words(a, b, q->perturbation) & (q->hash_divisor-1);
231 ++}
232 ++
233 ++static __inline__ unsigned esfq_jhash_3words(struct esfq_sched_data *q, u32 a, u32 b, u32 c)
234 ++{
235 ++ return jhash_3words(a, b, c, q->perturbation) & (q->hash_divisor-1);
236 ++}
237 ++
238 ++
239 ++static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
240 ++{
241 ++ struct esfq_packet_info info;
242 ++#ifdef CONFIG_IP_NF_CONNTRACK
243 ++ enum ip_conntrack_info ctinfo;
244 ++ struct ip_conntrack *ct = ip_conntrack_get(skb, &ctinfo);
245 ++#endif
246 ++
247 ++ switch (skb->protocol) {
248 ++ case __constant_htons(ETH_P_IP):
249 ++ {
250 ++ struct iphdr *iph = skb->nh.iph;
251 ++ info.dst = iph->daddr;
252 ++ info.src = iph->saddr;
253 ++ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
254 ++ (iph->protocol == IPPROTO_TCP ||
255 ++ iph->protocol == IPPROTO_UDP ||
256 ++ iph->protocol == IPPROTO_SCTP ||
257 ++ iph->protocol == IPPROTO_DCCP ||
258 ++ iph->protocol == IPPROTO_ESP))
259 ++ info.proto = *(((u32*)iph) + iph->ihl);
260 ++ else
261 ++ info.proto = iph->protocol;
262 ++ break;
263 ++ }
264 ++ default:
265 ++ info.dst = (u32)(unsigned long)skb->dst;
266 ++ info.src = (u32)(unsigned long)skb->sk;
267 ++ info.proto = skb->protocol;
268 ++ }
269 ++
270 ++#ifdef CONFIG_IP_NF_CONNTRACK
271 ++ /* defaults if there is no conntrack info */
272 ++ info.ctorigsrc = info.src;
273 ++ info.ctorigdst = info.dst;
274 ++ info.ctreplsrc = info.dst;
275 ++ info.ctrepldst = info.src;
276 ++ /* collect conntrack info */
277 ++ IP_NF_ASSERT(ct);
278 ++ if (ct) {
279 ++ if (skb->protocol == __constant_htons(ETH_P_IP)) {
280 ++ info.ctorigsrc = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
281 ++ info.ctorigdst = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
282 ++ info.ctreplsrc = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
283 ++ info.ctrepldst = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
284 ++ }
285 ++ }
286 ++#endif
287 ++
288 ++ switch(q->hash_kind)
289 ++ {
290 ++ case TCA_SFQ_HASH_CLASSIC:
291 ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto);
292 ++ case TCA_SFQ_HASH_DST:
293 ++ return esfq_jhash_1word(q, info.dst);
294 ++ case TCA_SFQ_HASH_SRC:
295 ++ return esfq_jhash_1word(q, info.src);
296 ++#ifdef CONFIG_IP_NF_CONNTRACK
297 ++ case TCA_SFQ_HASH_CTORIGDST:
298 ++ return esfq_jhash_1word(q, info.ctorigdst);
299 ++ case TCA_SFQ_HASH_CTORIGSRC:
300 ++ return esfq_jhash_1word(q, info.ctorigsrc);
301 ++ case TCA_SFQ_HASH_CTREPLDST:
302 ++ return esfq_jhash_1word(q, info.ctrepldst);
303 ++ case TCA_SFQ_HASH_CTREPLSRC:
304 ++ return esfq_jhash_1word(q, info.ctreplsrc);
305 ++ case TCA_SFQ_HASH_CTNATCHG:
306 ++ {
307 ++ if (info.ctorigdst == info.ctreplsrc)
308 ++ return esfq_jhash_1word(q, info.ctorigsrc);
309 ++ else
310 ++ return esfq_jhash_1word(q, info.ctreplsrc);
311 ++ }
312 ++#endif
313 ++ default:
314 ++ if (net_ratelimit())
315 ++ printk(KERN_WARNING "ESFQ: Unknown hash method. Falling back to classic.\n");
316 ++ }
317 ++ return esfq_jhash_3words(q, info.dst, info.src, info.proto);
318 ++}
319 ++
320 ++static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
321 ++{
322 ++ esfq_index p, n;
323 ++ int d = q->qs[x].qlen + q->depth;
324 ++
325 ++ p = d;
326 ++ n = q->dep[d].next;
327 ++ q->dep[x].next = n;
328 ++ q->dep[x].prev = p;
329 ++ q->dep[p].next = q->dep[n].prev = x;
330 ++}
331 ++
332 ++static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)
333 ++{
334 ++ esfq_index p, n;
335 ++
336 ++ n = q->dep[x].next;
337 ++ p = q->dep[x].prev;
338 ++ q->dep[p].next = n;
339 ++ q->dep[n].prev = p;
340 ++
341 ++ if (n == p && q->max_depth == q->qs[x].qlen + 1)
342 ++ q->max_depth--;
343 ++
344 ++ esfq_link(q, x);
345 ++}
346 ++
347 ++static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)
348 ++{
349 ++ esfq_index p, n;
350 ++ int d;
351 ++
352 ++ n = q->dep[x].next;
353 ++ p = q->dep[x].prev;
354 ++ q->dep[p].next = n;
355 ++ q->dep[n].prev = p;
356 ++ d = q->qs[x].qlen;
357 ++ if (q->max_depth < d)
358 ++ q->max_depth = d;
359 ++
360 ++ esfq_link(q, x);
361 ++}
362 ++
363 ++static unsigned int esfq_drop(struct Qdisc *sch)
364 ++{
365 ++ struct esfq_sched_data *q = qdisc_priv(sch);
366 ++ esfq_index d = q->max_depth;
367 ++ struct sk_buff *skb;
368 ++ unsigned int len;
369 ++
370 ++ /* Queue is full! Find the longest slot and
371 ++ drop a packet from it */
372 ++
373 ++ if (d > 1) {
374 ++ esfq_index x = q->dep[d+q->depth].next;
375 ++ skb = q->qs[x].prev;
376 ++ len = skb->len;
377 ++ __skb_unlink(skb, &q->qs[x]);
378 ++ kfree_skb(skb);
379 ++ esfq_dec(q, x);
380 ++ sch->q.qlen--;
381 ++ sch->stats.drops++;
382 ++ sch->stats.backlog -= len;
383 ++ return len;
384 ++ }
385 ++
386 ++ if (d == 1) {
387 ++ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
388 ++ d = q->next[q->tail];
389 ++ q->next[q->tail] = q->next[d];
390 ++ q->allot[q->next[d]] += q->quantum;
391 ++ skb = q->qs[d].prev;
392 ++ len = skb->len;
393 ++ __skb_unlink(skb, &q->qs[d]);
394 ++ kfree_skb(skb);
395 ++ esfq_dec(q, d);
396 ++ sch->q.qlen--;
397 ++ q->ht[q->hash[d]] = q->depth;
398 ++ sch->stats.drops++;
399 ++ sch->stats.backlog -= len;
400 ++ return len;
401 ++ }
402 ++
403 ++ return 0;
404 ++}
405 ++
406 ++static int
407 ++esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
408 ++{
409 ++ struct esfq_sched_data *q = qdisc_priv(sch);
410 ++ unsigned hash = esfq_hash(q, skb);
411 ++ unsigned depth = q->depth;
412 ++ esfq_index x;
413 ++
414 ++ x = q->ht[hash];
415 ++ if (x == depth) {
416 ++ q->ht[hash] = x = q->dep[depth].next;
417 ++ q->hash[x] = hash;
418 ++ }
419 ++ sch->stats.backlog += skb->len;
420 ++ __skb_queue_tail(&q->qs[x], skb);
421 ++ esfq_inc(q, x);
422 ++ if (q->qs[x].qlen == 1) { /* The flow is new */
423 ++ if (q->tail == depth) { /* It is the first flow */
424 ++ q->tail = x;
425 ++ q->next[x] = x;
426 ++ q->allot[x] = q->quantum;
427 ++ } else {
428 ++ q->next[x] = q->next[q->tail];
429 ++ q->next[q->tail] = x;
430 ++ q->tail = x;
431 ++ }
432 ++ }
433 ++ if (++sch->q.qlen < q->limit-1) {
434 ++ sch->stats.bytes += skb->len;
435 ++ sch->stats.packets++;
436 ++ return 0;
437 ++ }
438 ++
439 ++ esfq_drop(sch);
440 ++ return NET_XMIT_CN;
441 ++}
442 ++
443 ++static int
444 ++esfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
445 ++{
446 ++ struct esfq_sched_data *q = qdisc_priv(sch);
447 ++ unsigned hash = esfq_hash(q, skb);
448 ++ unsigned depth = q->depth;
449 ++ esfq_index x;
450 ++
451 ++ x = q->ht[hash];
452 ++ if (x == depth) {
453 ++ q->ht[hash] = x = q->dep[depth].next;
454 ++ q->hash[x] = hash;
455 ++ }
456 ++ sch->stats.backlog += skb->len;
457 ++ __skb_queue_head(&q->qs[x], skb);
458 ++ esfq_inc(q, x);
459 ++ if (q->qs[x].qlen == 1) { /* The flow is new */
460 ++ if (q->tail == depth) { /* It is the first flow */
461 ++ q->tail = x;
462 ++ q->next[x] = x;
463 ++ q->allot[x] = q->quantum;
464 ++ } else {
465 ++ q->next[x] = q->next[q->tail];
466 ++ q->next[q->tail] = x;
467 ++ q->tail = x;
468 ++ }
469 ++ }
470 ++ if (++sch->q.qlen < q->limit - 1) {
471 ++ return 0;
472 ++ }
473 ++
474 ++ sch->stats.drops++;
475 ++ esfq_drop(sch);
476 ++ return NET_XMIT_CN;
477 ++}
478 ++
479 ++
480 ++
481 ++
482 ++static struct sk_buff *
483 ++esfq_dequeue(struct Qdisc* sch)
484 ++{
485 ++ struct esfq_sched_data *q = qdisc_priv(sch);
486 ++ struct sk_buff *skb;
487 ++ unsigned depth = q->depth;
488 ++ esfq_index a, old_a;
489 ++
490 ++ /* No active slots */
491 ++ if (q->tail == depth)
492 ++ return NULL;
493 ++
494 ++ a = old_a = q->next[q->tail];
495 ++
496 ++ /* Grab packet */
497 ++ skb = __skb_dequeue(&q->qs[a]);
498 ++ esfq_dec(q, a);
499 ++ sch->q.qlen--;
500 ++ sch->stats.backlog -= skb->len;
501 ++
502 ++ /* Is the slot empty? */
503 ++ if (q->qs[a].qlen == 0) {
504 ++ q->ht[q->hash[a]] = depth;
505 ++ a = q->next[a];
506 ++ if (a == old_a) {
507 ++ q->tail = depth;
508 ++ return skb;
509 ++ }
510 ++ q->next[q->tail] = a;
511 ++ q->allot[a] += q->quantum;
512 ++ } else if ((q->allot[a] -= skb->len) <= 0) {
513 ++ q->tail = a;
514 ++ a = q->next[a];
515 ++ q->allot[a] += q->quantum;
516 ++ }
517 ++
518 ++ return skb;
519 ++}
520 ++
521 ++static void
522 ++esfq_reset(struct Qdisc* sch)
523 ++{
524 ++ struct sk_buff *skb;
525 ++
526 ++ while ((skb = esfq_dequeue(sch)) != NULL)
527 ++ kfree_skb(skb);
528 ++}
529 ++
530 ++static void esfq_perturbation(unsigned long arg)
531 ++{
532 ++ struct Qdisc *sch = (struct Qdisc*)arg;
533 ++ struct esfq_sched_data *q = qdisc_priv(sch);
534 ++
535 ++ q->perturbation = net_random()&0x1F;
536 ++
537 ++ if (q->perturb_period) {
538 ++ q->perturb_timer.expires = jiffies + q->perturb_period;
539 ++ add_timer(&q->perturb_timer);
540 ++ }
541 ++}
542 ++
543 ++static int esfq_change(struct Qdisc *sch, struct rtattr *opt)
544 ++{
545 ++ struct esfq_sched_data *q = qdisc_priv(sch);
546 ++ struct tc_esfq_qopt *ctl = RTA_DATA(opt);
547 ++ int old_perturb = q->perturb_period;
548 ++
549 ++ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
550 ++ return -EINVAL;
551 ++
552 ++ sch_tree_lock(sch);
553 ++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
554 ++ q->perturb_period = ctl->perturb_period*HZ;
555 ++// q->hash_divisor = ctl->divisor;
556 ++// q->tail = q->limit = q->depth = ctl->flows;
557 ++
558 ++ if (ctl->limit)
559 ++ q->limit = min_t(u32, ctl->limit, q->depth);
560 ++
561 ++ if (ctl->hash_kind) {
562 ++ q->hash_kind = ctl->hash_kind;
563 ++ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
564 ++ q->perturb_period = 0;
565 ++ }
566 ++
567 ++ // is sch_tree_lock enough to do this ?
568 ++ while (sch->q.qlen >= q->limit-1)
569 ++ esfq_drop(sch);
570 ++
571 ++ if (old_perturb)
572 ++ del_timer(&q->perturb_timer);
573 ++ if (q->perturb_period) {
574 ++ q->perturb_timer.expires = jiffies + q->perturb_period;
575 ++ add_timer(&q->perturb_timer);
576 ++ } else {
577 ++ q->perturbation = 0;
578 ++ }
579 ++ sch_tree_unlock(sch);
580 ++ return 0;
581 ++}
582 ++
583 ++static int esfq_init(struct Qdisc *sch, struct rtattr *opt)
584 ++{
585 ++ struct esfq_sched_data *q = qdisc_priv(sch);
586 ++ struct tc_esfq_qopt *ctl;
587 ++ esfq_index p = ~0U/2;
588 ++ int i;
589 ++
590 ++ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
591 ++ return -EINVAL;
592 ++
593 ++ init_timer(&q->perturb_timer);
594 ++ q->perturb_timer.data = (unsigned long)sch;
595 ++ q->perturb_timer.function = esfq_perturbation;
596 ++ q->perturbation = 0;
597 ++ q->hash_kind = TCA_SFQ_HASH_CLASSIC;
598 ++ q->max_depth = 0;
599 ++ q->dyn_min = ~0U; /* maximum value for this type */
600 ++ q->dyn_max = 0; /* dyn_min/dyn_max will be set properly upon first packet */
601 ++ if (opt == NULL) {
602 ++ q->quantum = psched_mtu(sch->dev);
603 ++ q->perturb_period = 0;
604 ++ q->hash_divisor = 1024;
605 ++ q->tail = q->limit = q->depth = 128;
606 ++
607 ++ } else {
608 ++ ctl = RTA_DATA(opt);
609 ++ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
610 ++ q->perturb_period = ctl->perturb_period*HZ;
611 ++ q->hash_divisor = ctl->divisor ? : 1024;
612 ++ q->tail = q->limit = q->depth = ctl->flows ? : 128;
613 ++
614 ++ if ( q->depth > p - 1 )
615 ++ return -EINVAL;
616 ++
617 ++ if (ctl->limit)
618 ++ q->limit = min_t(u32, ctl->limit, q->depth);
619 ++
620 ++ if (ctl->hash_kind) {
621 ++ q->hash_kind = ctl->hash_kind;
622 ++ }
623 ++
624 ++ if (q->perturb_period) {
625 ++ q->perturb_timer.expires = jiffies + q->perturb_period;
626 ++ add_timer(&q->perturb_timer);
627 ++ }
628 ++ }
629 ++
630 ++ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
631 ++ if (!q->ht)
632 ++ goto err_case;
633 ++
634 ++ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
635 ++ if (!q->dep)
636 ++ goto err_case;
637 ++ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
638 ++ if (!q->next)
639 ++ goto err_case;
640 ++
641 ++ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
642 ++ if (!q->allot)
643 ++ goto err_case;
644 ++ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
645 ++ if (!q->hash)
646 ++ goto err_case;
647 ++ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
648 ++ if (!q->qs)
649 ++ goto err_case;
650 ++
651 ++ for (i=0; i< q->hash_divisor; i++)
652 ++ q->ht[i] = q->depth;
653 ++ for (i=0; i<q->depth; i++) {
654 ++ skb_queue_head_init(&q->qs[i]);
655 ++ q->dep[i+q->depth].next = i+q->depth;
656 ++ q->dep[i+q->depth].prev = i+q->depth;
657 ++ }
658 ++
659 ++ for (i=0; i<q->depth; i++)
660 ++ esfq_link(q, i);
661 ++ return 0;
662 ++err_case:
663 ++ del_timer(&q->perturb_timer);
664 ++ if (q->ht)
665 ++ kfree(q->ht);
666 ++ if (q->dep)
667 ++ kfree(q->dep);
668 ++ if (q->next)
669 ++ kfree(q->next);
670 ++ if (q->allot)
671 ++ kfree(q->allot);
672 ++ if (q->hash)
673 ++ kfree(q->hash);
674 ++ if (q->qs)
675 ++ kfree(q->qs);
676 ++ return -ENOBUFS;
677 ++}
678 ++
679 ++static void esfq_destroy(struct Qdisc *sch)
680 ++{
681 ++ struct esfq_sched_data *q = qdisc_priv(sch);
682 ++ del_timer(&q->perturb_timer);
683 ++ if(q->ht)
684 ++ kfree(q->ht);
685 ++ if(q->dep)
686 ++ kfree(q->dep);
687 ++ if(q->next)
688 ++ kfree(q->next);
689 ++ if(q->allot)
690 ++ kfree(q->allot);
691 ++ if(q->hash)
692 ++ kfree(q->hash);
693 ++ if(q->qs)
694 ++ kfree(q->qs);
695 ++}
696 ++
697 ++static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
698 ++{
699 ++ struct esfq_sched_data *q = qdisc_priv(sch);
700 ++ unsigned char *b = skb->tail;
701 ++ struct tc_esfq_qopt opt;
702 ++
703 ++ opt.quantum = q->quantum;
704 ++ opt.perturb_period = q->perturb_period/HZ;
705 ++
706 ++ opt.limit = q->limit;
707 ++ opt.divisor = q->hash_divisor;
708 ++ opt.flows = q->depth;
709 ++ opt.hash_kind = q->hash_kind;
710 ++
711 ++ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
712 ++
713 ++ return skb->len;
714 ++
715 ++rtattr_failure:
716 ++ skb_trim(skb, b - skb->data);
717 ++ return -1;
718 ++}
719 ++
720 ++static struct Qdisc_ops esfq_qdisc_ops =
721 ++{
722 ++ .next = NULL,
723 ++ .cl_ops = NULL,
724 ++ .id = "esfq",
725 ++ .priv_size = sizeof(struct esfq_sched_data),
726 ++ .enqueue = esfq_enqueue,
727 ++ .dequeue = esfq_dequeue,
728 ++ .requeue = esfq_requeue,
729 ++ .drop = esfq_drop,
730 ++ .init = esfq_init,
731 ++ .reset = esfq_reset,
732 ++ .destroy = esfq_destroy,
733 ++ .change = NULL, /* esfq_change - needs more work */
734 ++ .dump = esfq_dump,
735 ++};
736 ++
737 ++static int __init esfq_module_init(void)
738 ++{
739 ++ return register_qdisc(&esfq_qdisc_ops);
740 ++}
741 ++static void __exit esfq_module_exit(void)
742 ++{
743 ++ unregister_qdisc(&esfq_qdisc_ops);
744 ++}
745 ++module_init(esfq_module_init)
746 ++module_exit(esfq_module_exit)
747 ++MODULE_LICENSE("GPL");