04637f7141fb1b37ffb6e2ccd83c16f2367526ef
[openwrt/svn-archive/archive.git] / target / linux / generic-2.4 / patches / 609-netfilter_string.patch
1 --- /dev/null
2 +++ b/include/linux/netfilter_ipv4/ipt_string.h
3 @@ -0,0 +1,18 @@
4 +#ifndef _IPT_STRING_H
5 +#define _IPT_STRING_H
6 +
7 +#define IPT_STRING_MAX_PATTERN_SIZE 128
8 +#define IPT_STRING_MAX_ALGO_NAME_SIZE 16
9 +
10 +struct ipt_string_info
11 +{
12 + u_int16_t from_offset;
13 + u_int16_t to_offset;
14 + char algo[IPT_STRING_MAX_ALGO_NAME_SIZE];
15 + char pattern[IPT_STRING_MAX_PATTERN_SIZE];
16 + u_int8_t patlen;
17 + u_int8_t invert;
18 + struct ts_config __attribute__((aligned(8))) *config;
19 +};
20 +
21 +#endif /*_IPT_STRING_H*/
22 --- a/net/ipv4/netfilter/Config.in
23 +++ b/net/ipv4/netfilter/Config.in
24 @@ -63,6 +63,7 @@ if [ "$CONFIG_IP_NF_IPTABLES" != "n" ];
25 fi
26 if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
27 dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_UNCLEAN $CONFIG_IP_NF_IPTABLES
28 + dep_tristate ' String match support (EXPERIMENTAL) ' CONFIG_IP_NF_MATCH_STRING $CONFIG_IP_NF_IPTABLES
29 dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_OWNER $CONFIG_IP_NF_IPTABLES
30 dep_tristate ' Layer 7 match support (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7 $CONFIG_IP_NF_CONNTRACK
31 dep_mbool ' Layer 7 debugging output (EXPERIMENTAL)' CONFIG_IP_NF_MATCH_LAYER7_DEBUG $CONFIG_IP_NF_MATCH_LAYER7
32 --- /dev/null
33 +++ b/net/ipv4/netfilter/ipt_string.c
34 @@ -0,0 +1,99 @@
35 +/* String matching match for iptables
36 + *
37 + * (C) 2005 Pablo Neira Ayuso <pablo@eurodev.net>
38 + *
39 + * This program is free software; you can redistribute it and/or modify
40 + * it under the terms of the GNU General Public License version 2 as
41 + * published by the Free Software Foundation.
42 + */
43 +
44 +#include <linux/init.h>
45 +#include <linux/module.h>
46 +#include <linux/kernel.h>
47 +#include <linux/skbuff.h>
48 +#include <linux/netfilter_ipv4/ip_tables.h>
49 +#include <linux/netfilter_ipv4/ipt_string.h>
50 +#include "textsearch/textsearch.h"
51 +#include "textsearch/textsearch.c"
52 +#include "textsearch/ts_bm.c"
53 +#include "textsearch/ts_kmp.c"
54 +
55 +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@eurodev.net>");
56 +MODULE_DESCRIPTION("IP tables string match module");
57 +MODULE_LICENSE("GPL");
58 +
59 +static int match(const struct sk_buff *skb,
60 + const struct net_device *in,
61 + const struct net_device *out,
62 + const void *matchinfo,
63 + int offset,
64 + int *hotdrop)
65 +{
66 + struct iphdr *ip = skb->nh.iph;
67 + struct ts_state state;
68 + struct ipt_string_info *conf = (struct ipt_string_info *) matchinfo;
69 + char *buf = (char *)ip+(ip->ihl*4);
70 + int len = ntohs(ip->tot_len)-(ip->ihl*4);
71 +
72 + memset(&state, 0, sizeof(struct ts_state));
73 +
74 + return (textsearch_find_continuous(conf->config, &state, buf, len) != UINT_MAX) && !conf->invert;
75 +}
76 +
77 +#define STRING_TEXT_PRIV(m) ((struct ipt_string_info *) m)
78 +
79 +static int checkentry(const char *tablename,
80 + const struct ipt_ip *ip,
81 + void *matchinfo,
82 + unsigned int matchsize,
83 + unsigned int hook_mask)
84 +{
85 + struct ipt_string_info *conf = matchinfo;
86 + struct ts_config *ts_conf;
87 +
88 + if (matchsize != IPT_ALIGN(sizeof(struct ipt_string_info)))
89 + return 0;
90 +
91 + /* Damn, can't handle this case properly with iptables... */
92 + if (conf->from_offset > conf->to_offset)
93 + return 0;
94 +
95 + ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
96 + GFP_KERNEL, TS_AUTOLOAD);
97 + if (IS_ERR(ts_conf))
98 + return 0;
99 +
100 + conf->config = ts_conf;
101 +
102 + return 1;
103 +}
104 +
105 +static void destroy(void *matchinfo, unsigned int matchsize)
106 +{
107 + textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config);
108 +}
109 +
110 +static struct ipt_match string_match = {
111 + .name = "string",
112 + .match = match,
113 + .checkentry = checkentry,
114 + .destroy = destroy,
115 + .me = THIS_MODULE
116 +};
117 +
118 +static int __init init(void)
119 +{
120 + init_bm();
121 + init_kmp();
122 + return ipt_register_match(&string_match);
123 +}
124 +
125 +static void __exit fini(void)
126 +{
127 + exit_kmp();
128 + exit_bm();
129 + ipt_unregister_match(&string_match);
130 +}
131 +
132 +module_init(init);
133 +module_exit(fini);
134 --- a/net/ipv4/netfilter/Makefile
135 +++ b/net/ipv4/netfilter/Makefile
136 @@ -108,6 +108,7 @@ obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_s
137 obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o
138 obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
139 obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o
140 +obj-$(CONFIG_IP_NF_MATCH_STRING) += ipt_string.o
141 obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
142 obj-$(CONFIG_IP_NF_MATCH_LAYER7) += ipt_layer7.o
143
144 --- /dev/null
145 +++ b/net/ipv4/netfilter/textsearch/textsearch.c
146 @@ -0,0 +1,305 @@
147 +/*
148 + * lib/textsearch.c Generic text search interface
149 + *
150 + * This program is free software; you can redistribute it and/or
151 + * modify it under the terms of the GNU General Public License
152 + * as published by the Free Software Foundation; either version
153 + * 2 of the License, or (at your option) any later version.
154 + *
155 + * Authors: Thomas Graf <tgraf@suug.ch>
156 + * Pablo Neira Ayuso <pablo@eurodev.net>
157 + *
158 + * ==========================================================================
159 + *
160 + * INTRODUCTION
161 + *
162 + * The textsearch infrastructure provides text searching facitilies for
163 + * both linear and non-linear data. Individual search algorithms are
164 + * implemented in modules and chosen by the user.
165 + *
166 + * ARCHITECTURE
167 + *
168 + * User
169 + * +----------------+
170 + * | finish()|<--------------(6)-----------------+
171 + * |get_next_block()|<--------------(5)---------------+ |
172 + * | | Algorithm | |
173 + * | | +------------------------------+
174 + * | | | init() find() destroy() |
175 + * | | +------------------------------+
176 + * | | Core API ^ ^ ^
177 + * | | +---------------+ (2) (4) (8)
178 + * | (1)|----->| prepare() |---+ | |
179 + * | (3)|----->| find()/next() |-----------+ |
180 + * | (7)|----->| destroy() |----------------------+
181 + * +----------------+ +---------------+
182 + *
183 + * (1) User configures a search by calling _prepare() specifying the
184 + * search parameters such as the pattern and algorithm name.
185 + * (2) Core requests the algorithm to allocate and initialize a search
186 + * configuration according to the specified parameters.
187 + * (3) User starts the search(es) by calling _find() or _next() to
188 + * fetch subsequent occurrences. A state variable is provided
189 + * to the algorihtm to store persistant variables.
190 + * (4) Core eventually resets the search offset and forwards the find()
191 + * request to the algorithm.
192 + * (5) Algorithm calls get_next_block() provided by the user continously
193 + * to fetch the data to be searched in block by block.
194 + * (6) Algorithm invokes finish() after the last call to get_next_block
195 + * to clean up any leftovers from get_next_block. (Optional)
196 + * (7) User destroys the configuration by calling _destroy().
197 + * (8) Core notifies the algorithm to destroy algorithm specific
198 + * allocations. (Optional)
199 + *
200 + * USAGE
201 + *
202 + * Before a search can be performed, a configuration must be created
203 + * by calling textsearch_prepare() specyfing the searching algorithm and
204 + * the pattern to look for. The returned configuration may then be used
205 + * for an arbitary amount of times and even in parallel as long as a
206 + * separate struct ts_state variable is provided to every instance.
207 + *
208 + * The actual search is performed by either calling textsearch_find_-
209 + * continuous() for linear data or by providing an own get_next_block()
210 + * implementation and calling textsearch_find(). Both functions return
211 + * the position of the first occurrence of the patern or UINT_MAX if
212 + * no match was found. Subsequent occurences can be found by calling
213 + * textsearch_next() regardless of the linearity of the data.
214 + *
215 + * Once you're done using a configuration it must be given back via
216 + * textsearch_destroy.
217 + *
218 + * EXAMPLE
219 + *
220 + * int pos;
221 + * struct ts_config *conf;
222 + * struct ts_state state;
223 + * const char *pattern = "chicken";
224 + * const char *example = "We dance the funky chicken";
225 + *
226 + * conf = textsearch_prepare("kmp", pattern, strlen(pattern),
227 + * GFP_KERNEL, TS_AUTOLOAD);
228 + * if (IS_ERR(conf)) {
229 + * err = PTR_ERR(conf);
230 + * goto errout;
231 + * }
232 + *
233 + * pos = textsearch_find_continuous(conf, &state, example, strlen(example));
234 + * if (pos != UINT_MAX)
235 + * panic("Oh my god, dancing chickens at %d\n", pos);
236 + *
237 + * textsearch_destroy(conf);
238 + *
239 + * ==========================================================================
240 + */
241 +
242 +#include <linux/config.h>
243 +#include <linux/module.h>
244 +#include <linux/types.h>
245 +#include <linux/string.h>
246 +#include <linux/init.h>
247 +#include <linux/netfilter_ipv4/lockhelp.h>
248 +#include "textsearch.h"
249 +
250 +static LIST_HEAD(ts_ops);
251 +static spinlock_t ts_mod_lock = SPIN_LOCK_UNLOCKED;
252 +static DECLARE_RWLOCK(ts_ops_lock);
253 +
254 +static inline struct ts_ops *lookup_ts_algo(const char *name)
255 +{
256 + struct ts_ops *o;
257 +
258 + read_lock(&ts_ops_lock);
259 + list_for_each_entry(o, &ts_ops, list) {
260 + if (!strcmp(name, o->name)) {
261 + MOD_INC_USE_COUNT;
262 + read_unlock(&ts_ops_lock);
263 + return o;
264 + }
265 + }
266 + read_unlock(&ts_ops_lock);
267 +
268 + return NULL;
269 +}
270 +
271 +/**
272 + * textsearch_register - register a textsearch module
273 + * @ops: operations lookup table
274 + *
275 + * This function must be called by textsearch modules to announce
276 + * their presence. The specified &@ops must have %name set to a
277 + * unique identifier and the callbacks find(), init(), get_pattern(),
278 + * and get_pattern_len() must be implemented.
279 + *
280 + * Returns 0 or -EEXISTS if another module has already registered
281 + * with same name.
282 + */
283 +int textsearch_register(struct ts_ops *ops)
284 +{
285 + int err = -EEXIST;
286 + struct ts_ops *o;
287 +
288 + if (ops->name == NULL || ops->find == NULL || ops->init == NULL ||
289 + ops->get_pattern == NULL || ops->get_pattern_len == NULL)
290 + return -EINVAL;
291 +
292 + spin_lock(&ts_mod_lock);
293 + list_for_each_entry(o, &ts_ops, list) {
294 + if (!strcmp(ops->name, o->name))
295 + goto errout;
296 + }
297 +
298 + write_lock(&ts_ops_lock);
299 + list_add_tail(&ops->list, &ts_ops);
300 + write_unlock(&ts_ops_lock);
301 +
302 + err = 0;
303 +errout:
304 + spin_unlock(&ts_mod_lock);
305 + return err;
306 +}
307 +
308 +/**
309 + * textsearch_unregister - unregister a textsearch module
310 + * @ops: operations lookup table
311 + *
312 + * This function must be called by textsearch modules to announce
313 + * their disappearance for examples when the module gets unloaded.
314 + * The &ops parameter must be the same as the one during the
315 + * registration.
316 + *
317 + * Returns 0 on success or -ENOENT if no matching textsearch
318 + * registration was found.
319 + */
320 +int textsearch_unregister(struct ts_ops *ops)
321 +{
322 + int err = 0;
323 + struct ts_ops *o;
324 +
325 + spin_lock(&ts_mod_lock);
326 + list_for_each_entry(o, &ts_ops, list) {
327 + if (o == ops) {
328 + write_lock(&ts_ops_lock);
329 + list_del(&o->list);
330 + write_unlock(&ts_ops_lock);
331 + goto out;
332 + }
333 + }
334 +
335 + err = -ENOENT;
336 +out:
337 + spin_unlock(&ts_mod_lock);
338 + return err;
339 +}
340 +
341 +struct ts_linear_state
342 +{
343 + unsigned int len;
344 + const void *data;
345 +};
346 +
347 +static unsigned int get_linear_data(unsigned int consumed, const u8 **dst,
348 + struct ts_config *conf,
349 + struct ts_state *state)
350 +{
351 + struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
352 +
353 + if (likely(consumed < st->len)) {
354 + *dst = st->data + consumed;
355 + return st->len - consumed;
356 + }
357 +
358 + return 0;
359 +}
360 +
361 +/**
362 + * textsearch_find_continuous - search a pattern in continuous/linear data
363 + * @conf: search configuration
364 + * @state: search state
365 + * @data: data to search in
366 + * @len: length of data
367 + *
368 + * A simplified version of textsearch_find() for continuous/linear data.
369 + * Call textsearch_next() to retrieve subsequent matches.
370 + *
371 + * Returns the position of first occurrence of the pattern or
372 + * UINT_MAX if no occurrence was found.
373 + */
374 +unsigned int textsearch_find_continuous(struct ts_config *conf,
375 + struct ts_state *state,
376 + const void *data, unsigned int len)
377 +{
378 + struct ts_linear_state *st = (struct ts_linear_state *) state->cb;
379 +
380 + conf->get_next_block = get_linear_data;
381 + st->data = data;
382 + st->len = len;
383 +
384 + return textsearch_find(conf, state);
385 +}
386 +
387 +/**
388 + * textsearch_prepare - Prepare a search
389 + * @algo: name of search algorithm
390 + * @pattern: pattern data
391 + * @len: length of pattern
392 + * @gfp_mask: allocation mask
393 + * @flags: search flags
394 + *
395 + * Looks up the search algorithm module and creates a new textsearch
396 + * configuration for the specified pattern. Upon completion all
397 + * necessary refcnts are held and the configuration must be put back
398 + * using textsearch_put() after usage.
399 + *
400 + * Note: The format of the pattern may not be compatible between
401 + * the various search algorithms.
402 + *
403 + * Returns a new textsearch configuration according to the specified
404 + * parameters or a ERR_PTR().
405 + */
406 +struct ts_config *textsearch_prepare(const char *algo, const void *pattern,
407 + unsigned int len, gfp_t gfp_mask, int flags)
408 +{
409 + int err = -ENOENT;
410 + struct ts_config *conf;
411 + struct ts_ops *ops;
412 +
413 + ops = lookup_ts_algo(algo);
414 +
415 + if (ops == NULL)
416 + goto errout;
417 +
418 + conf = ops->init(pattern, len, gfp_mask);
419 + if (IS_ERR(conf)) {
420 + err = PTR_ERR(conf);
421 + goto errout;
422 + }
423 +
424 + conf->ops = ops;
425 + return conf;
426 +
427 +errout:
428 + if (ops)
429 + MOD_DEC_USE_COUNT;
430 +
431 + return ERR_PTR(err);
432 +}
433 +
434 +/**
435 + * textsearch_destroy - destroy a search configuration
436 + * @conf: search configuration
437 + *
438 + * Releases all references of the configuration and frees
439 + * up the memory.
440 + */
441 +void textsearch_destroy(struct ts_config *conf)
442 +{
443 + if (conf->ops) {
444 + if (conf->ops->destroy)
445 + conf->ops->destroy(conf);
446 + MOD_DEC_USE_COUNT;
447 + }
448 +
449 + kfree(conf);
450 +}
451 +
452 --- /dev/null
453 +++ b/net/ipv4/netfilter/textsearch/textsearch.h
454 @@ -0,0 +1,182 @@
455 +#ifndef __LINUX_TEXTSEARCH_H
456 +#define __LINUX_TEXTSEARCH_H
457 +
458 +#ifdef __KERNEL__
459 +
460 +#include <linux/types.h>
461 +#include <linux/list.h>
462 +#include <linux/kernel.h>
463 +#include <linux/module.h>
464 +#include <linux/slab.h>
465 +
466 +typedef int gfp_t;
467 +struct ts_config;
468 +
469 +/**
470 + * TS_AUTOLOAD - Automatically load textsearch modules when needed
471 + */
472 +#define TS_AUTOLOAD 1
473 +
474 +/**
475 + * struct ts_state - search state
476 + * @offset: offset for next match
477 + * @cb: control buffer, for persistant variables of get_next_block()
478 + */
479 +struct ts_state
480 +{
481 + unsigned int offset;
482 + char cb[40];
483 +};
484 +
485 +/**
486 + * struct ts_ops - search module operations
487 + * @name: name of search algorithm
488 + * @init: initialization function to prepare a search
489 + * @find: find the next occurrence of the pattern
490 + * @destroy: destroy algorithm specific parts of a search configuration
491 + * @get_pattern: return head of pattern
492 + * @get_pattern_len: return length of pattern
493 + * @owner: module reference to algorithm
494 + */
495 +struct ts_ops
496 +{
497 + const char *name;
498 + struct ts_config * (*init)(const void *, unsigned int, gfp_t);
499 + unsigned int (*find)(struct ts_config *,
500 + struct ts_state *);
501 + void (*destroy)(struct ts_config *);
502 + void * (*get_pattern)(struct ts_config *);
503 + unsigned int (*get_pattern_len)(struct ts_config *);
504 + struct module *owner;
505 + struct list_head list;
506 +};
507 +
508 +/**
509 + * struct ts_config - search configuration
510 + * @ops: operations of chosen algorithm
511 + * @get_next_block: callback to fetch the next block to search in
512 + * @finish: callback to finalize a search
513 + */
514 +struct ts_config
515 +{
516 + struct ts_ops *ops;
517 +
518 + /**
519 + * get_next_block - fetch next block of data
520 + * @consumed: number of bytes consumed by the caller
521 + * @dst: destination buffer
522 + * @conf: search configuration
523 + * @state: search state
524 + *
525 + * Called repeatedly until 0 is returned. Must assign the
526 + * head of the next block of data to &*dst and return the length
527 + * of the block or 0 if at the end. consumed == 0 indicates
528 + * a new search. May store/read persistant values in state->cb.
529 + */
530 + unsigned int (*get_next_block)(unsigned int consumed,
531 + const u8 **dst,
532 + struct ts_config *conf,
533 + struct ts_state *state);
534 +
535 + /**
536 + * finish - finalize/clean a series of get_next_block() calls
537 + * @conf: search configuration
538 + * @state: search state
539 + *
540 + * Called after the last use of get_next_block(), may be used
541 + * to cleanup any leftovers.
542 + */
543 + void (*finish)(struct ts_config *conf,
544 + struct ts_state *state);
545 +};
546 +
547 +/**
548 + * textsearch_next - continue searching for a pattern
549 + * @conf: search configuration
550 + * @state: search state
551 + *
552 + * Continues a search looking for more occurrences of the pattern.
553 + * textsearch_find() must be called to find the first occurrence
554 + * in order to reset the state.
555 + *
556 + * Returns the position of the next occurrence of the pattern or
557 + * UINT_MAX if not match was found.
558 + */
559 +static inline unsigned int textsearch_next(struct ts_config *conf,
560 + struct ts_state *state)
561 +{
562 + unsigned int ret = conf->ops->find(conf, state);
563 +
564 + if (conf->finish)
565 + conf->finish(conf, state);
566 +
567 + return ret;
568 +}
569 +
570 +/**
571 + * textsearch_find - start searching for a pattern
572 + * @conf: search configuration
573 + * @state: search state
574 + *
575 + * Returns the position of first occurrence of the pattern or
576 + * UINT_MAX if no match was found.
577 + */
578 +static inline unsigned int textsearch_find(struct ts_config *conf,
579 + struct ts_state *state)
580 +{
581 + state->offset = 0;
582 + return textsearch_next(conf, state);
583 +}
584 +
585 +/**
586 + * textsearch_get_pattern - return head of the pattern
587 + * @conf: search configuration
588 + */
589 +static inline void *textsearch_get_pattern(struct ts_config *conf)
590 +{
591 + return conf->ops->get_pattern(conf);
592 +}
593 +
594 +/**
595 + * textsearch_get_pattern_len - return length of the pattern
596 + * @conf: search configuration
597 + */
598 +static inline unsigned int textsearch_get_pattern_len(struct ts_config *conf)
599 +{
600 + return conf->ops->get_pattern_len(conf);
601 +}
602 +
603 +extern int textsearch_register(struct ts_ops *);
604 +extern int textsearch_unregister(struct ts_ops *);
605 +extern struct ts_config *textsearch_prepare(const char *, const void *,
606 + unsigned int, gfp_t, int);
607 +extern void textsearch_destroy(struct ts_config *conf);
608 +extern unsigned int textsearch_find_continuous(struct ts_config *,
609 + struct ts_state *,
610 + const void *, unsigned int);
611 +
612 +
613 +#define TS_PRIV_ALIGNTO 8
614 +#define TS_PRIV_ALIGN(len) (((len) + TS_PRIV_ALIGNTO-1) & ~(TS_PRIV_ALIGNTO-1))
615 +
616 +static inline struct ts_config *alloc_ts_config(size_t payload,
617 + gfp_t gfp_mask)
618 +{
619 + struct ts_config *conf;
620 +
621 + conf = kmalloc(TS_PRIV_ALIGN(sizeof(*conf)) + payload, gfp_mask);
622 + if (conf == NULL)
623 + return ERR_PTR(-ENOMEM);
624 +
625 + memset(conf, 0, TS_PRIV_ALIGN(sizeof(*conf)) + payload);
626 + return conf;
627 +}
628 +
629 +static inline void *ts_config_priv(struct ts_config *conf)
630 +{
631 + return ((u8 *) conf + TS_PRIV_ALIGN(sizeof(struct ts_config)));
632 +}
633 +
634 +#endif /* __KERNEL__ */
635 +
636 +#endif
637 --- /dev/null
638 +++ b/net/ipv4/netfilter/textsearch/ts_bm.c
639 @@ -0,0 +1,190 @@
640 +/*
641 + * lib/ts_bm.c Boyer-Moore text search implementation
642 + *
643 + * This program is free software; you can redistribute it and/or
644 + * modify it under the terms of the GNU General Public License
645 + * as published by the Free Software Foundation; either version
646 + * 2 of the License, or (at your option) any later version.
647 + *
648 + * Authors: Pablo Neira Ayuso <pablo@eurodev.net>
649 + *
650 + * ==========================================================================
651 + *
652 + * Implements Boyer-Moore string matching algorithm:
653 + *
654 + * [1] A Fast String Searching Algorithm, R.S. Boyer and Moore.
655 + * Communications of the Association for Computing Machinery,
656 + * 20(10), 1977, pp. 762-772.
657 + * http://www.cs.utexas.edu/users/moore/publications/fstrpos.pdf
658 + *
659 + * [2] Handbook of Exact String Matching Algorithms, Thierry Lecroq, 2004
660 + * http://www-igm.univ-mlv.fr/~lecroq/string/string.pdf
661 + *
662 + * Note: Since Boyer-Moore (BM) performs searches for matchings from right
663 + * to left, it's still possible that a matching could be spread over
664 + * multiple blocks, in that case this algorithm won't find any coincidence.
665 + *
666 + * If you're willing to ensure that such thing won't ever happen, use the
667 + * Knuth-Pratt-Morris (KMP) implementation instead. In conclusion, choose
668 + * the proper string search algorithm depending on your setting.
669 + *
670 + * Say you're using the textsearch infrastructure for filtering, NIDS or
671 + * any similar security focused purpose, then go KMP. Otherwise, if you
672 + * really care about performance, say you're classifying packets to apply
673 + * Quality of Service (QoS) policies, and you don't mind about possible
674 + * matchings spread over multiple fragments, then go BM.
675 + */
676 +
677 +#include <linux/config.h>
678 +#include <linux/kernel.h>
679 +#include <linux/module.h>
680 +#include <linux/types.h>
681 +#include <linux/string.h>
682 +#include "textsearch.h"
683 +
684 +/* Alphabet size, use ASCII */
685 +#define ASIZE 256
686 +
687 +#if 0
688 +#define DEBUGP printk
689 +#else
690 +#define DEBUGP(args, format...)
691 +#endif
692 +
693 +struct ts_bm
694 +{
695 + u8 * pattern;
696 + unsigned int patlen;
697 + unsigned int bad_shift[ASIZE];
698 + unsigned int good_shift[0];
699 +};
700 +
701 +static unsigned int bm_find(struct ts_config *conf, struct ts_state *state)
702 +{
703 + struct ts_bm *bm = ts_config_priv(conf);
704 + unsigned int i, text_len, consumed = state->offset;
705 + const u8 *text;
706 + int shift = bm->patlen, bs;
707 +
708 + for (;;) {
709 + text_len = conf->get_next_block(consumed, &text, conf, state);
710 +
711 + if (unlikely(text_len == 0))
712 + break;
713 +
714 + while (shift < text_len) {
715 + DEBUGP("Searching in position %d (%c)\n",
716 + shift, text[shift]);
717 + for (i = 0; i < bm->patlen; i++)
718 + if (text[shift-i] != bm->pattern[bm->patlen-1-i])
719 + goto next;
720 +
721 + /* London calling... */
722 + DEBUGP("found!\n");
723 + return consumed += (shift-(bm->patlen-1));
724 +
725 +next: bs = bm->bad_shift[text[shift-i]];
726 +
727 + /* Now jumping to... */
728 + shift = max_t(int, shift-i+bs, shift+bm->good_shift[i]);
729 + }
730 + consumed += text_len;
731 + }
732 +
733 + return UINT_MAX;
734 +}
735 +
736 +static int subpattern(u8 *pattern, int i, int j, int g)
737 +{
738 + int x = i+g-1, y = j+g-1, ret = 0;
739 +
740 + while(pattern[x--] == pattern[y--]) {
741 + if (y < 0) {
742 + ret = 1;
743 + break;
744 + }
745 + if (--g == 0) {
746 + ret = pattern[i-1] != pattern[j-1];
747 + break;
748 + }
749 + }
750 +
751 + return ret;
752 +}
753 +
754 +static void bm_compute_prefix_tbl(struct ts_bm *bm, const u8 *pattern,
755 + unsigned int len)
756 +{
757 + int i, j, g;
758 +
759 + for (i = 0; i < ASIZE; i++)
760 + bm->bad_shift[i] = len;
761 + for (i = 0; i < len - 1; i++)
762 + bm->bad_shift[pattern[i]] = len - 1 - i;
763 +
764 + /* Compute the good shift array, used to match reocurrences
765 + * of a subpattern */
766 + bm->good_shift[0] = 1;
767 + for (i = 1; i < bm->patlen; i++)
768 + bm->good_shift[i] = bm->patlen;
769 + for (i = bm->patlen-1, g = 1; i > 0; g++, i--) {
770 + for (j = i-1; j >= 1-g ; j--)
771 + if (subpattern(bm->pattern, i, j, g)) {
772 + bm->good_shift[g] = bm->patlen-j-g;
773 + break;
774 + }
775 + }
776 +}
777 +
778 +static struct ts_config *bm_init(const void *pattern, unsigned int len,
779 + gfp_t gfp_mask)
780 +{
781 + struct ts_config *conf;
782 + struct ts_bm *bm;
783 + unsigned int prefix_tbl_len = len * sizeof(unsigned int);
784 + size_t priv_size = sizeof(*bm) + len + prefix_tbl_len;
785 +
786 + conf = alloc_ts_config(priv_size, gfp_mask);
787 + if (IS_ERR(conf))
788 + return conf;
789 +
790 + bm = ts_config_priv(conf);
791 + bm->patlen = len;
792 + bm->pattern = (u8 *) bm->good_shift + prefix_tbl_len;
793 + bm_compute_prefix_tbl(bm, pattern, len);
794 + memcpy(bm->pattern, pattern, len);
795 +
796 + return conf;
797 +}
798 +
799 +static void *bm_get_pattern(struct ts_config *conf)
800 +{
801 + struct ts_bm *bm = ts_config_priv(conf);
802 + return bm->pattern;
803 +}
804 +
805 +static unsigned int bm_get_pattern_len(struct ts_config *conf)
806 +{
807 + struct ts_bm *bm = ts_config_priv(conf);
808 + return bm->patlen;
809 +}
810 +
811 +static struct ts_ops bm_ops = {
812 + .name = "bm",
813 + .find = bm_find,
814 + .init = bm_init,
815 + .get_pattern = bm_get_pattern,
816 + .get_pattern_len = bm_get_pattern_len,
817 + .owner = THIS_MODULE,
818 + .list = LIST_HEAD_INIT(bm_ops.list)
819 +};
820 +
821 +static int __init init_bm(void)
822 +{
823 + return textsearch_register(&bm_ops);
824 +}
825 +
826 +static void __exit exit_bm(void)
827 +{
828 + textsearch_unregister(&bm_ops);
829 +}
830 --- /dev/null
831 +++ b/net/ipv4/netfilter/textsearch/ts_kmp.c
832 @@ -0,0 +1,141 @@
833 +/*
834 + * lib/ts_kmp.c Knuth-Morris-Pratt text search implementation
835 + *
836 + * This program is free software; you can redistribute it and/or
837 + * modify it under the terms of the GNU General Public License
838 + * as published by the Free Software Foundation; either version
839 + * 2 of the License, or (at your option) any later version.
840 + *
841 + * Authors: Thomas Graf <tgraf@suug.ch>
842 + *
843 + * ==========================================================================
844 + *
845 + * Implements a linear-time string-matching algorithm due to Knuth,
846 + * Morris, and Pratt [1]. Their algorithm avoids the explicit
847 + * computation of the transition function DELTA altogether. Its
848 + * matching time is O(n), for n being length(text), using just an
849 + * auxiliary function PI[1..m], for m being length(pattern),
850 + * precomputed from the pattern in time O(m). The array PI allows
851 + * the transition function DELTA to be computed efficiently
852 + * "on the fly" as needed. Roughly speaking, for any state
853 + * "q" = 0,1,...,m and any character "a" in SIGMA, the value
854 + * PI["q"] contains the information that is independent of "a" and
855 + * is needed to compute DELTA("q", "a") [2]. Since the array PI
856 + * has only m entries, whereas DELTA has O(m|SIGMA|) entries, we
857 + * save a factor of |SIGMA| in the preprocessing time by computing
858 + * PI rather than DELTA.
859 + *
860 + * [1] Cormen, Leiserson, Rivest, Stein
861 + * Introdcution to Algorithms, 2nd Edition, MIT Press
862 + * [2] See finite automation theory
863 + */
864 +
865 +#include <linux/config.h>
866 +#include <linux/module.h>
867 +#include <linux/types.h>
868 +#include <linux/string.h>
869 +#include "textsearch.h"
870 +
871 +struct ts_kmp
872 +{
873 + u8 * pattern;
874 + unsigned int pattern_len;
875 + unsigned int prefix_tbl[0];
876 +};
877 +
878 +static unsigned int kmp_find(struct ts_config *conf, struct ts_state *state)
879 +{
880 + struct ts_kmp *kmp = ts_config_priv(conf);
881 + unsigned int i, q = 0, text_len, consumed = state->offset;
882 + const u8 *text;
883 +
884 + for (;;) {
885 + text_len = conf->get_next_block(consumed, &text, conf, state);
886 +
887 + if (unlikely(text_len == 0))
888 + break;
889 +
890 + for (i = 0; i < text_len; i++) {
891 + while (q > 0 && kmp->pattern[q] != text[i])
892 + q = kmp->prefix_tbl[q - 1];
893 + if (kmp->pattern[q] == text[i])
894 + q++;
895 + if (unlikely(q == kmp->pattern_len)) {
896 + state->offset = consumed + i + 1;
897 + return state->offset - kmp->pattern_len;
898 + }
899 + }
900 +
901 + consumed += text_len;
902 + }
903 +
904 + return UINT_MAX;
905 +}
906 +
907 +static inline void kmp_compute_prefix_tbl(const u8 *pattern, unsigned int len,
908 + unsigned int *prefix_tbl)
909 +{
910 + unsigned int k, q;
911 +
912 + for (k = 0, q = 1; q < len; q++) {
913 + while (k > 0 && pattern[k] != pattern[q])
914 + k = prefix_tbl[k-1];
915 + if (pattern[k] == pattern[q])
916 + k++;
917 + prefix_tbl[q] = k;
918 + }
919 +}
920 +
921 +static struct ts_config *kmp_init(const void *pattern, unsigned int len,
922 + gfp_t gfp_mask)
923 +{
924 + struct ts_config *conf;
925 + struct ts_kmp *kmp;
926 + unsigned int prefix_tbl_len = len * sizeof(unsigned int);
927 + size_t priv_size = sizeof(*kmp) + len + prefix_tbl_len;
928 +
929 + conf = alloc_ts_config(priv_size, gfp_mask);
930 + if (IS_ERR(conf))
931 + return conf;
932 +
933 + kmp = ts_config_priv(conf);
934 + kmp->pattern_len = len;
935 + kmp_compute_prefix_tbl(pattern, len, kmp->prefix_tbl);
936 + kmp->pattern = (u8 *) kmp->prefix_tbl + prefix_tbl_len;
937 + memcpy(kmp->pattern, pattern, len);
938 +
939 + return conf;
940 +}
941 +
942 +static void *kmp_get_pattern(struct ts_config *conf)
943 +{
944 + struct ts_kmp *kmp = ts_config_priv(conf);
945 + return kmp->pattern;
946 +}
947 +
948 +static unsigned int kmp_get_pattern_len(struct ts_config *conf)
949 +{
950 + struct ts_kmp *kmp = ts_config_priv(conf);
951 + return kmp->pattern_len;
952 +}
953 +
954 +static struct ts_ops kmp_ops = {
955 + .name = "kmp",
956 + .find = kmp_find,
957 + .init = kmp_init,
958 + .get_pattern = kmp_get_pattern,
959 + .get_pattern_len = kmp_get_pattern_len,
960 + .owner = THIS_MODULE,
961 + .list = LIST_HEAD_INIT(kmp_ops.list)
962 +};
963 +
964 +static int __init init_kmp(void)
965 +{
966 + return textsearch_register(&kmp_ops);
967 +}
968 +
969 +static void __exit exit_kmp(void)
970 +{
971 + textsearch_unregister(&kmp_ops);
972 +}
973 +