--- /dev/null
+--- a/Makefile
++++ b/Makefile
+@@ -559,6 +559,9 @@ endif
+ NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
+ CHECKFLAGS += $(NOSTDINC_FLAGS)
+
++# improve gcc optimization
++CFLAGS += $(call cc-option,-funit-at-a-time,)
++
+ # warn about C99 declaration after statement
+ KBUILD_CFLAGS += $(call cc-option,-Wdeclaration-after-statement,)
+
--- /dev/null
+--- a/arch/mips/include/asm/system.h
++++ b/arch/mips/include/asm/system.h
+@@ -187,7 +187,7 @@ extern __u64 __xchg_u64_unsupported_on_3
+ if something tries to do an invalid xchg(). */
+ extern void __xchg_called_with_bad_pointer(void);
+
+-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
++static __always_inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
+ {
+ switch (size) {
+ case 4:
--- /dev/null
+--- a/drivers/mtd/chips/cfi_cmdset_0002.c
++++ b/drivers/mtd/chips/cfi_cmdset_0002.c
+@@ -51,6 +51,7 @@
+ #define SST49LF040B 0x0050
+ #define SST49LF008A 0x005a
+ #define AT49BV6416 0x00d6
++#define MANUFACTURER_SAMSUNG 0x00ec
+
+ static int cfi_amdstd_read (struct mtd_info *, loff_t, size_t, size_t *, u_char *);
+ static int cfi_amdstd_write_words(struct mtd_info *, loff_t, size_t, size_t *, const u_char *);
+@@ -386,12 +387,19 @@ struct mtd_info *cfi_cmdset_0002(struct
+
+ if (extp->MajorVersion != '1' ||
+ (extp->MinorVersion < '0' || extp->MinorVersion > '4')) {
+- printk(KERN_ERR " Unknown Amd/Fujitsu Extended Query "
+- "version %c.%c.\n", extp->MajorVersion,
+- extp->MinorVersion);
+- kfree(extp);
+- kfree(mtd);
+- return NULL;
++ if (cfi->mfr == MANUFACTURER_SAMSUNG &&
++ (extp->MajorVersion == '3' && extp->MinorVersion == '3')) {
++ printk(KERN_NOTICE " Newer Samsung flash detected, "
++ "should be compatibile with Amd/Fujitsu.\n");
++ }
++ else {
++ printk(KERN_ERR " Unknown Amd/Fujitsu Extended Query "
++ "version %c.%c.\n", extp->MajorVersion,
++ extp->MinorVersion);
++ kfree(extp);
++ kfree(mtd);
++ return NULL;
++ }
+ }
+
+ /* Install our own private info structure */
--- /dev/null
+--- a/arch/mips/kernel/head.S
++++ b/arch/mips/kernel/head.S
+@@ -121,6 +121,8 @@
+ #endif
+ .endm
+
++ j kernel_entry
++ nop
+ #ifndef CONFIG_NO_EXCEPT_FILL
+ /*
+ * Reserved space for exception handlers.
--- /dev/null
+--- /dev/null
++++ b/include/asm-mips/mips_machine.h
+@@ -0,0 +1,46 @@
++/*
++ * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ *
++ */
++
++#ifndef __ASM_MIPS_MACHINE_H
++#define __ASM_MIPS_MACHINE_H
++
++#include <linux/init.h>
++#include <linux/list.h>
++
++struct mips_machine {
++ unsigned long mach_type;
++ void (*mach_setup)(void);
++ char *mach_name;
++ struct list_head list;
++};
++
++void mips_machine_register(struct mips_machine *) __init;
++void mips_machine_setup(unsigned long machtype) __init;
++
++extern char *mips_machine_name;
++
++#define MIPS_MACHINE(_type, _name, _setup) \
++static char machine_name_##_type[] __initdata = _name; \
++static struct mips_machine machine_##_type __initdata = \
++{ \
++ .mach_type = _type, \
++ .mach_name = machine_name_##_type, \
++ .mach_setup = _setup, \
++}; \
++ \
++static int __init register_machine_##_type(void) \
++{ \
++ mips_machine_register(&machine_##_type); \
++ return 0; \
++} \
++ \
++pure_initcall(register_machine_##_type)
++
++#endif /* __ASM_MIPS_MACHINE_H */
++
+--- /dev/null
++++ b/arch/mips/kernel/mips_machine.c
+@@ -0,0 +1,70 @@
++/*
++ * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ *
++ */
++#include <linux/mm.h>
++
++#include <asm/mips_machine.h>
++#include <asm/bootinfo.h>
++
++static struct list_head mips_machines __initdata =
++ LIST_HEAD_INIT(mips_machines);
++
++char *mips_machine_name = "Unknown";
++
++static struct mips_machine * __init mips_machine_find(unsigned long machtype)
++{
++ struct list_head *this;
++
++ list_for_each(this, &mips_machines) {
++ struct mips_machine *mach;
++
++ mach = list_entry(this, struct mips_machine, list);
++ if (mach->mach_type == machtype)
++ return mach;
++ }
++
++ return NULL;
++}
++
++void __init mips_machine_register(struct mips_machine *mach)
++{
++ list_add_tail(&mach->list, &mips_machines);
++}
++
++void __init mips_machine_setup(unsigned long machtype)
++{
++ struct mips_machine *mach;
++
++ mach = mips_machine_find(machtype);
++ if (!mach) {
++ printk(KERN_ALERT "MIPS: no machine registered for "
++ "machtype %lu\n", machtype);
++ return;
++ }
++
++ if (mach->mach_name) {
++ char *name;
++ unsigned int len;
++
++ len = strlen(mach->mach_name);
++ name = kmalloc(len + 1, GFP_KERNEL);
++ if (name) {
++ strncpy(name, mach->mach_name,len);
++ name[len] = '\0';
++ mips_machine_name = name;
++ } else {
++ printk(KERN_WARNING "MIPS: no memory for machine_name\n");
++ }
++ }
++
++ printk(KERN_INFO "MIPS: machine is %s\n", mips_machine_name);
++
++ if (mach->mach_setup)
++ mach->mach_setup();
++}
++
+--- a/arch/mips/kernel/Makefile
++++ b/arch/mips/kernel/Makefile
+@@ -85,6 +85,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o
+
+ obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
+ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
++obj-$(CONFIG_MIPS_MACHINE) += mips_machine.o
+
+ CFLAGS_cpu-bugs64.o = $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
+
+--- a/arch/mips/Kconfig
++++ b/arch/mips/Kconfig
+@@ -837,6 +837,9 @@ config MIPS_DISABLE_OBSOLETE_IDE
+ config SYNC_R4K
+ bool
+
++config MIPS_MACHINE
++ def_bool n
++
+ config NO_IOPORT
+ def_bool n
+
+--- a/arch/mips/kernel/proc.c
++++ b/arch/mips/kernel/proc.c
+@@ -12,6 +12,7 @@
+ #include <asm/cpu-features.h>
+ #include <asm/mipsregs.h>
+ #include <asm/processor.h>
++#include <asm/mips_machine.h>
+
+ unsigned int vced_count, vcei_count;
+
+@@ -31,8 +32,12 @@ static int show_cpuinfo(struct seq_file
+ /*
+ * For the first processor also print the system type
+ */
+- if (n == 0)
++ if (n == 0) {
+ seq_printf(m, "system type\t\t: %s\n", get_system_type());
++#ifdef CONFIG_MIPS_MACHINE
++ seq_printf(m, "machine\t\t\t: %s\n", mips_machine_name);
++#endif
++ }
+
+ seq_printf(m, "processor\t\t: %ld\n", n);
+ sprintf(fmt, "cpu model\t\t: %%s V%%d.%%d%s\n",
--- /dev/null
+--- a/arch/mips/Kconfig
++++ b/arch/mips/Kconfig
+@@ -839,6 +839,9 @@ config SYNC_R4K
+
+ config MIPS_MACHINE
+ def_bool n
++
++config PROM_EMU
++ def_bool n
+
+ config NO_IOPORT
+ def_bool n
+--- a/arch/mips/kernel/head.S
++++ b/arch/mips/kernel/head.S
+@@ -143,6 +143,15 @@ FEXPORT(__kernel_entry)
+ j kernel_entry
+ #endif
+
++#ifdef CONFIG_PROM_EMU
++EXPORT(prom_emu_argv)
++ .word 0
++ .word prom_emu_cmdline
++ .ascii "CMDLINE:"
++EXPORT(prom_emu_cmdline)
++ .fill 0x400
++#endif
++
+ __REF
+
+ NESTED(kernel_entry, 16, sp) # kernel entry point
+@@ -183,6 +192,19 @@ NESTED(kernel_entry, 16, sp) # kernel
+ LONG_S zero, (t0)
+ bne t0, t1, 1b
+
++#ifdef CONFIG_PROM_EMU
++ PTR_LA t0, prom_emu_cmdline
++ LONG_L t1, 0(t0)
++ beqz t1, 1f
++
++ li a0, 2
++ PTR_LA a1, prom_emu_argv
++ move a2, zero
++ move a3, zero
++
++1:
++#endif /* CONFIG_PROM_EMU */
++
+ LONG_S a0, fw_arg0 # firmware arguments
+ LONG_S a1, fw_arg1
+ LONG_S a2, fw_arg2
--- /dev/null
+MIPS: allow disabling the kernel FPU emulator
+
+This patch allows turning off the in-kernel Algorithmics
+FPU emulator support, which allows one to save a couple of
+precious blocks on an embedded system.
+
+Signed-off-by: Florian Fainelli <florian@openwrt.org>
+--
+--- a/arch/mips/Kconfig
++++ b/arch/mips/Kconfig
+@@ -825,6 +825,17 @@ config I8259
+ config MIPS_BONITO64
+ bool
+
++config MIPS_FPU_EMU
++ bool
++ default n
++ help
++ This option allows building a kernel with or without the Algorithmics
++ FPU emulator enabled. Turning off this option results in a kernel which
++ does not catch floating operations exceptions. Make sure that your toolchain
++ is configured to enable software floating point emulation in that case.
++
++ If unsure say Y here.
++
+ config MIPS_MSC
+ bool
+
+--- a/arch/mips/math-emu/Makefile
++++ b/arch/mips/math-emu/Makefile
+@@ -2,12 +2,14 @@
+ # Makefile for the Linux/MIPS kernel FPU emulation.
+ #
+
+-obj-y := cp1emu.o ieee754m.o ieee754d.o ieee754dp.o ieee754sp.o ieee754.o \
++obj-y := kernel_linkage.o dsemul.o cp1emu.o
++
++obj-$(CONFIG_MIPS_FPU_EMU) += ieee754m.o ieee754d.o ieee754dp.o ieee754sp.o ieee754.o \
+ ieee754xcpt.o dp_frexp.o dp_modf.o dp_div.o dp_mul.o dp_sub.o \
+ dp_add.o dp_fsp.o dp_cmp.o dp_logb.o dp_scalb.o dp_simple.o \
+ dp_tint.o dp_fint.o dp_tlong.o dp_flong.o sp_frexp.o sp_modf.o \
+ sp_div.o sp_mul.o sp_sub.o sp_add.o sp_fdp.o sp_cmp.o sp_logb.o \
+ sp_scalb.o sp_simple.o sp_tint.o sp_fint.o sp_tlong.o sp_flong.o \
+- dp_sqrt.o sp_sqrt.o kernel_linkage.o dsemul.o
++ dp_sqrt.o sp_sqrt.o
+
+ EXTRA_CFLAGS += -Werror
+--- a/arch/mips/math-emu/cp1emu.c
++++ b/arch/mips/math-emu/cp1emu.c
+@@ -56,6 +56,12 @@
+ #endif
+ #define __mips 4
+
++/* Further private data for which no space exists in mips_fpu_struct */
++
++struct mips_fpu_emulator_stats fpuemustats;
++
++#ifdef CONFIG_MIPS_FPU_EMU
++
+ /* Function which emulates a floating point instruction. */
+
+ static int fpu_emu(struct pt_regs *, struct mips_fpu_struct *,
+@@ -66,10 +72,6 @@ static int fpux_emu(struct pt_regs *,
+ struct mips_fpu_struct *, mips_instruction);
+ #endif
+
+-/* Further private data for which no space exists in mips_fpu_struct */
+-
+-struct mips_fpu_emulator_stats fpuemustats;
+-
+ /* Control registers */
+
+ #define FPCREG_RID 0 /* $0 = revision id */
+@@ -1273,6 +1275,13 @@ int fpu_emulator_cop1Handler(struct pt_r
+
+ return sig;
+ }
++#else
++int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx,
++ int has_fpu)
++{
++ return 0;
++}
++#endif /* CONFIG_MIPS_FPU_EMU */
+
+ #ifdef CONFIG_DEBUG_FS
+ extern struct dentry *mips_debugfs_dir;
+--- a/arch/mips/math-emu/dsemul.c
++++ b/arch/mips/math-emu/dsemul.c
+@@ -109,6 +109,7 @@ int mips_dsemul(struct pt_regs *regs, mi
+ return SIGILL; /* force out of emulation loop */
+ }
+
++#ifdef CONFIG_MIPS_FPU_EMU
+ int do_dsemulret(struct pt_regs *xcp)
+ {
+ struct emuframe __user *fr;
+@@ -165,3 +166,9 @@ int do_dsemulret(struct pt_regs *xcp)
+
+ return 1;
+ }
++#else
++int do_dsemulret(struct pt_regs *xcp)
++{
++ return 0;
++}
++#endif /* CONFIG_MIPS_FPU_EMU */
+--- a/arch/mips/math-emu/kernel_linkage.c
++++ b/arch/mips/math-emu/kernel_linkage.c
+@@ -29,6 +29,7 @@
+
+ #define SIGNALLING_NAN 0x7ff800007ff80000LL
+
++#ifdef CONFIG_MIPS_FPU_EMU
+ void fpu_emulator_init_fpu(void)
+ {
+ static int first = 1;
+@@ -112,4 +113,34 @@ int fpu_emulator_restore_context32(struc
+
+ return err;
+ }
+-#endif
++#endif /* CONFIG_64BIT */
++#else
++
++void fpu_emulator_init_fpu(void)
++{
++ return;
++}
++
++int fpu_emulator_save_context(struct sigcontext __user *sc)
++{
++ return 0;
++}
++
++int fpu_emulator_restore_context(struct sigcontext __user *sc)
++{
++ return 0;
++}
++
++int fpu_emulator_save_context32(struct sigcontext32 __user *sc)
++{
++ return 0;
++}
++
++int fpu_emulator_restore_context32(struct sigcontext32 __user *sc)
++{
++ return 0;
++}
++
++#ifdef CONFIG_64BIT
++#endif /* CONFIG_64BIT */
++#endif /* CONFIG_MIPS_FPU_EMU */
--- /dev/null
+--- a/arch/mips/Makefile
++++ b/arch/mips/Makefile
+@@ -83,7 +83,7 @@ all-$(CONFIG_BOOT_ELF64) := $(vmlinux-64
+ cflags-y += -G 0 -mno-abicalls -fno-pic -pipe
+ cflags-y += -msoft-float
+ LDFLAGS_vmlinux += -G 0 -static -n -nostdlib
+-MODFLAGS += -mlong-calls
++MODFLAGS += -mno-long-calls
+
+ cflags-y += -ffreestanding
+
+--- a/arch/mips/include/asm/module.h
++++ b/arch/mips/include/asm/module.h
+@@ -9,6 +9,11 @@ struct mod_arch_specific {
+ struct list_head dbe_list;
+ const struct exception_table_entry *dbe_start;
+ const struct exception_table_entry *dbe_end;
++
++ void *plt_tbl;
++ unsigned int core_plt_offset;
++ unsigned int core_plt_size;
++ unsigned int init_plt_offset;
+ };
+
+ typedef uint8_t Elf64_Byte; /* Type for a 8-bit quantity. */
+--- a/arch/mips/kernel/module.c
++++ b/arch/mips/kernel/module.c
+@@ -43,6 +43,117 @@ static struct mips_hi16 *mips_hi16_list;
+ static LIST_HEAD(dbe_list);
+ static DEFINE_SPINLOCK(dbe_lock);
+
++/*
++ * Get the potential max trampolines size required of the init and
++ * non-init sections. Only used if we cannot find enough contiguous
++ * physically mapped memory to put the module into.
++ */
++static unsigned int
++get_plt_size(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
++ const char *secstrings, unsigned int symindex, bool is_init)
++{
++ unsigned long ret = 0;
++ unsigned int i, j;
++ Elf_Sym *syms;
++
++ /* Everything marked ALLOC (this includes the exported symbols) */
++ for (i = 1; i < hdr->e_shnum; ++i) {
++ unsigned int info = sechdrs[i].sh_info;
++
++ if (sechdrs[i].sh_type != SHT_REL
++ && sechdrs[i].sh_type != SHT_RELA)
++ continue;
++
++ /* Not a valid relocation section? */
++ if (info >= hdr->e_shnum)
++ continue;
++
++ /* Don't bother with non-allocated sections */
++ if (!(sechdrs[info].sh_flags & SHF_ALLOC))
++ continue;
++
++ /* If it's called *.init*, and we're not init, we're
++ not interested */
++ if ((strstr(secstrings + sechdrs[i].sh_name, ".init") != 0)
++ != is_init)
++ continue;
++
++ syms = (Elf_Sym *) sechdrs[symindex].sh_addr;
++ if (sechdrs[i].sh_type == SHT_REL) {
++ Elf_Mips_Rel *rel = (void *) sechdrs[i].sh_addr;
++ unsigned int size = sechdrs[i].sh_size / sizeof(*rel);
++
++ for (j = 0; j < size; ++j) {
++ Elf_Sym *sym;
++
++ if (ELF_MIPS_R_TYPE(rel[j]) != R_MIPS_26)
++ continue;
++
++ sym = syms + ELF_MIPS_R_SYM(rel[j]);
++ if (!is_init && sym->st_shndx != SHN_UNDEF)
++ continue;
++
++ ret += 4 * sizeof(int);
++ }
++ } else {
++ Elf_Mips_Rela *rela = (void *) sechdrs[i].sh_addr;
++ unsigned int size = sechdrs[i].sh_size / sizeof(*rela);
++
++ for (j = 0; j < size; ++j) {
++ Elf_Sym *sym;
++
++ if (ELF_MIPS_R_TYPE(rela[j]) != R_MIPS_26)
++ continue;
++
++ sym = syms + ELF_MIPS_R_SYM(rela[j]);
++ if (!is_init && sym->st_shndx != SHN_UNDEF)
++ continue;
++
++ ret += 4 * sizeof(int);
++ }
++ }
++ }
++
++ return ret;
++}
++
++#ifndef MODULE_START
++static void *alloc_phys(unsigned long size)
++{
++ unsigned order;
++ struct page *page;
++ struct page *p;
++
++ size = PAGE_ALIGN(size);
++ order = get_order(size);
++
++ page = alloc_pages(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN |
++ __GFP_THISNODE, order);
++ if (!page)
++ return NULL;
++
++ split_page(page, order);
++
++ for (p = page + (size >> PAGE_SHIFT); p < page + (1 << order); ++p)
++ __free_page(p);
++
++ return page_address(page);
++}
++#endif
++
++static void free_phys(void *ptr, unsigned long size)
++{
++ struct page *page;
++ struct page *end;
++
++ page = virt_to_page(ptr);
++ end = page + (PAGE_ALIGN(size) >> PAGE_SHIFT);
++
++ for (; page < end; ++page)
++ __free_page(page);
++}
++
++
+ void *module_alloc(unsigned long size)
+ {
+ #ifdef MODULE_START
+@@ -58,21 +169,68 @@ void *module_alloc(unsigned long size)
+
+ return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL);
+ #else
++ void *ptr;
++
+ if (size == 0)
+ return NULL;
+- return vmalloc(size);
++
++ ptr = alloc_phys(size);
++
++ /* If we failed to allocate physically contiguous memory,
++ * fall back to regular vmalloc. The module loader code will
++ * create jump tables to handle long jumps */
++ if (!ptr)
++ return vmalloc(size);
++
++ return ptr;
++#endif
++}
++
++static inline bool is_phys_addr(void *ptr)
++{
++#ifdef CONFIG_64BIT
++ return (KSEGX((unsigned long)ptr) == CKSEG0);
++#else
++ return (KSEGX(ptr) == KSEG0);
+ #endif
+ }
+
+ /* Free memory returned from module_alloc */
+ void module_free(struct module *mod, void *module_region)
+ {
+- vfree(module_region);
++ if (is_phys_addr(module_region)) {
++ if (mod->module_init == module_region)
++ free_phys(module_region, mod->init_size);
++ else if (mod->module_core == module_region)
++ free_phys(module_region, mod->core_size);
++ else
++ BUG();
++ } else {
++ vfree(module_region);
++ }
+ }
+
+ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+ {
++ unsigned int symindex = 0;
++ unsigned int core_size, init_size;
++ int i;
++
++ for (i = 1; i < hdr->e_shnum; i++)
++ if (sechdrs[i].sh_type == SHT_SYMTAB)
++ symindex = i;
++
++ core_size = get_plt_size(hdr, sechdrs, secstrings, symindex, false);
++ init_size = get_plt_size(hdr, sechdrs, secstrings, symindex, true);
++
++ mod->arch.core_plt_offset = 0;
++ mod->arch.core_plt_size = core_size;
++ mod->arch.init_plt_offset = core_size;
++ mod->arch.plt_tbl = kmalloc(core_size + init_size, GFP_KERNEL);
++ if (!mod->arch.plt_tbl)
++ return -ENOMEM;
++
+ return 0;
+ }
+
+@@ -95,28 +253,40 @@ static int apply_r_mips_32_rela(struct m
+ return 0;
+ }
+
+-static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
++static Elf_Addr add_plt_entry_to(unsigned *plt_offset,
++ void *start, Elf_Addr v)
+ {
+- if (v % 4) {
+- pr_err("module %s: dangerous R_MIPS_26 REL relocation\n",
+- me->name);
+- return -ENOEXEC;
+- }
++ unsigned *tramp = start + *plt_offset;
++ *plt_offset += 4 * sizeof(int);
+
+- if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
+- printk(KERN_ERR
+- "module %s: relocation overflow\n",
+- me->name);
+- return -ENOEXEC;
+- }
++ /* adjust carry for addiu */
++ if (v & 0x00008000)
++ v += 0x10000;
++
++ tramp[0] = 0x3c190000 | (v >> 16); /* lui t9, hi16 */
++ tramp[1] = 0x27390000 | (v & 0xffff); /* addiu t9, t9, lo16 */
++ tramp[2] = 0x03200008; /* jr t9 */
++ tramp[3] = 0x00000000; /* nop */
+
+- *location = (*location & ~0x03ffffff) |
+- ((*location + (v >> 2)) & 0x03ffffff);
++ return (Elf_Addr) tramp;
++}
++
++static Elf_Addr add_plt_entry(struct module *me, void *location, Elf_Addr v)
++{
++ if (location >= me->module_core &&
++ location < me->module_core + me->core_size)
++ return add_plt_entry_to(&me->arch.core_plt_offset,
++ me->arch.plt_tbl, v);
++
++ if (location >= me->module_init &&
++ location < me->module_init + me->init_size)
++ return add_plt_entry_to(&me->arch.init_plt_offset,
++ me->arch.plt_tbl, v);
+
+ return 0;
+ }
+
+-static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v)
++static int set_r_mips_26(struct module *me, u32 *location, u32 ofs, Elf_Addr v)
+ {
+ if (v % 4) {
+ pr_err("module %s: dangerous R_MIPS_26 RELArelocation\n",
+@@ -125,17 +295,31 @@ static int apply_r_mips_26_rela(struct m
+ }
+
+ if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
+- printk(KERN_ERR
++ v = add_plt_entry(me, location, v + (ofs << 2));
++ if (!v) {
++ printk(KERN_ERR
+ "module %s: relocation overflow\n",
+ me->name);
+- return -ENOEXEC;
++ return -ENOEXEC;
++ }
++ ofs = 0;
+ }
+
+- *location = (*location & ~0x03ffffff) | ((v >> 2) & 0x03ffffff);
++ *location = (*location & ~0x03ffffff) | ((ofs + (v >> 2)) & 0x03ffffff);
+
+ return 0;
+ }
+
++static int apply_r_mips_26_rel(struct module *me, u32 *location, Elf_Addr v)
++{
++ return set_r_mips_26(me, location, *location & 0x03ffffff, v);
++}
++
++static int apply_r_mips_26_rela(struct module *me, u32 *location, Elf_Addr v)
++{
++ return set_r_mips_26(me, location, 0, v);
++}
++
+ static int apply_r_mips_hi16_rel(struct module *me, u32 *location, Elf_Addr v)
+ {
+ struct mips_hi16 *n;
+@@ -400,11 +584,23 @@ int module_finalize(const Elf_Ehdr *hdr,
+ list_add(&me->arch.dbe_list, &dbe_list);
+ spin_unlock_irq(&dbe_lock);
+ }
++
++ /* Get rid of the fixup trampoline if we're running the module
++ * from physically mapped address space */
++ if (me->arch.core_plt_offset == 0 &&
++ me->arch.init_plt_offset == me->arch.core_plt_size &&
++ is_phys_addr(me->module_core)) {
++ kfree(me->arch.plt_tbl);
++ me->arch.plt_tbl = NULL;
++ }
++
+ return 0;
+ }
+
+ void module_arch_cleanup(struct module *mod)
+ {
++ if (mod->arch.plt_tbl)
++ kfree(mod->arch.plt_tbl);
+ spin_lock_irq(&dbe_lock);
+ list_del(&mod->arch.dbe_list);
+ spin_unlock_irq(&dbe_lock);
--- /dev/null
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -55,6 +55,27 @@
+ #define LOAD_OFFSET 0
+ #endif
+
++#ifndef SYMTAB_KEEP_STR
++#define SYMTAB_KEEP_STR *(__ksymtab_strings.*)
++#define SYMTAB_DISCARD_STR
++#else
++#define SYMTAB_DISCARD_STR *(__ksymtab_strings.*)
++#endif
++
++#ifndef SYMTAB_KEEP
++#define SYMTAB_KEEP *(__ksymtab.*)
++#define SYMTAB_DISCARD
++#else
++#define SYMTAB_DISCARD *(__ksymtab.*)
++#endif
++
++#ifndef SYMTAB_KEEP_GPL
++#define SYMTAB_KEEP_GPL *(__ksymtab_gpl.*)
++#define SYMTAB_DISCARD_GPL
++#else
++#define SYMTAB_DISCARD_GPL *(__ksymtab_gpl.*)
++#endif
++
+ #ifndef VMLINUX_SYMBOL
+ #define VMLINUX_SYMBOL(_sym_) _sym_
+ #endif
+@@ -256,35 +277,35 @@
+ /* Kernel symbol table: Normal symbols */ \
+ __ksymtab : AT(ADDR(__ksymtab) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___ksymtab) = .; \
+- *(__ksymtab) \
++ SYMTAB_KEEP \
+ VMLINUX_SYMBOL(__stop___ksymtab) = .; \
+ } \
+ \
+ /* Kernel symbol table: GPL-only symbols */ \
+ __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___ksymtab_gpl) = .; \
+- *(__ksymtab_gpl) \
++ SYMTAB_KEEP_GPL \
+ VMLINUX_SYMBOL(__stop___ksymtab_gpl) = .; \
+ } \
+ \
+ /* Kernel symbol table: Normal unused symbols */ \
+ __ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___ksymtab_unused) = .; \
+- *(__ksymtab_unused) \
++ *(__ksymtab_unused.*) \
+ VMLINUX_SYMBOL(__stop___ksymtab_unused) = .; \
+ } \
+ \
+ /* Kernel symbol table: GPL-only unused symbols */ \
+ __ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___ksymtab_unused_gpl) = .; \
+- *(__ksymtab_unused_gpl) \
++ *(__ksymtab_unused_gpl.*) \
+ VMLINUX_SYMBOL(__stop___ksymtab_unused_gpl) = .; \
+ } \
+ \
+ /* Kernel symbol table: GPL-future-only symbols */ \
+ __ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
+ VMLINUX_SYMBOL(__start___ksymtab_gpl_future) = .; \
+- *(__ksymtab_gpl_future) \
++ *(__ksymtab_gpl_future.*) \
+ VMLINUX_SYMBOL(__stop___ksymtab_gpl_future) = .; \
+ } \
+ \
+@@ -325,7 +346,13 @@
+ \
+ /* Kernel symbol table: strings */ \
+ __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \
+- *(__ksymtab_strings) \
++ SYMTAB_KEEP_STR \
++ } \
++ \
++ /DISCARD/ : { \
++ SYMTAB_DISCARD \
++ SYMTAB_DISCARD_GPL \
++ SYMTAB_DISCARD_STR \
+ } \
+ \
+ /* __*init sections */ \
+--- a/include/linux/module.h
++++ b/include/linux/module.h
+@@ -188,16 +188,24 @@ void *__symbol_get_gpl(const char *symbo
+ #define __CRC_SYMBOL(sym, sec)
+ #endif
+
++#ifdef MODULE
++#define __EXPORT_SUFFIX(sym)
++#else
++#define __EXPORT_SUFFIX(sym) "." #sym
++#endif
++
+ /* For every exported symbol, place a struct in the __ksymtab section */
+ #define __EXPORT_SYMBOL(sym, sec) \
+ extern typeof(sym) sym; \
+ __CRC_SYMBOL(sym, sec) \
+ static const char __kstrtab_##sym[] \
+- __attribute__((section("__ksymtab_strings"), aligned(1))) \
++ __attribute__((section("__ksymtab_strings" \
++ __EXPORT_SUFFIX(sym)), aligned(1))) \
+ = MODULE_SYMBOL_PREFIX #sym; \
+ static const struct kernel_symbol __ksymtab_##sym \
+ __used \
+- __attribute__((section("__ksymtab" sec), unused)) \
++ __attribute__((section("__ksymtab" sec \
++ __EXPORT_SUFFIX(sym)), unused)) \
+ = { (unsigned long)&sym, __kstrtab_##sym }
+
+ #define EXPORT_SYMBOL(sym) \
+--- a/Makefile
++++ b/Makefile
+@@ -986,7 +986,7 @@ prepare: prepare0
+ # Leave this as default for preprocessing vmlinux.lds.S, which is now
+ # done in arch/$(ARCH)/kernel/Makefile
+
+-export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH)
++export CPPFLAGS_vmlinux.lds += -P -C -U$(ARCH) $(EXTRA_LDSFLAGS)
+
+ # The asm symlink changes when $(ARCH) changes.
+ # Detect this and ask user to run make mrproper
+--- a/arch/arm/kernel/vmlinux.lds.S
++++ b/arch/arm/kernel/vmlinux.lds.S
+@@ -79,26 +79,6 @@ SECTIONS
+ #endif
+ }
+
+- /DISCARD/ : { /* Exit code and data */
+- EXIT_TEXT
+- EXIT_DATA
+- *(.exitcall.exit)
+- *(.ARM.exidx.exit.text)
+- *(.ARM.extab.exit.text)
+-#ifndef CONFIG_HOTPLUG_CPU
+- *(.ARM.exidx.cpuexit.text)
+- *(.ARM.extab.cpuexit.text)
+-#endif
+-#ifndef CONFIG_HOTPLUG
+- *(.ARM.exidx.devexit.text)
+- *(.ARM.extab.devexit.text)
+-#endif
+-#ifndef CONFIG_MMU
+- *(.fixup)
+- *(__ex_table)
+-#endif
+- }
+-
+ .text : { /* Real text segment */
+ _text = .; /* Text and read-only data */
+ __exception_text_start = .;
+@@ -205,6 +185,28 @@ SECTIONS
+ __bss_stop = .;
+ _end = .;
+ }
++
++ /DISCARD/ : { /* Exit code and data */
++ EXIT_TEXT
++ EXIT_DATA
++ *(.discard)
++ *(.exitcall.exit)
++ *(.ARM.exidx.exit.text)
++ *(.ARM.extab.exit.text)
++#ifndef CONFIG_HOTPLUG_CPU
++ *(.ARM.exidx.cpuexit.text)
++ *(.ARM.extab.cpuexit.text)
++#endif
++#ifndef CONFIG_HOTPLUG
++ *(.ARM.exidx.devexit.text)
++ *(.ARM.extab.devexit.text)
++#endif
++#ifndef CONFIG_MMU
++ *(.fixup)
++ *(__ex_table)
++#endif
++ }
++
+ /* Stabs debugging sections. */
+ .stab 0 : { *(.stab) }
+ .stabstr 0 : { *(.stabstr) }
+--- a/arch/powerpc/kernel/vmlinux.lds.S
++++ b/arch/powerpc/kernel/vmlinux.lds.S
+@@ -37,12 +37,6 @@ jiffies = jiffies_64 + 4;
+ #endif
+ SECTIONS
+ {
+- /* Sections to be discarded. */
+- /DISCARD/ : {
+- *(.exitcall.exit)
+- EXIT_DATA
+- }
+-
+ . = KERNELBASE;
+
+ /*
+@@ -295,6 +289,12 @@ SECTIONS
+ __bss_stop = .;
+ }
+
++ /* Sections to be discarded. */
++ /DISCARD/ : {
++ *(.exitcall.exit)
++ EXIT_DATA
++ }
++
+ . = ALIGN(PAGE_SIZE);
+ _end = . ;
+ PROVIDE32 (end = .);
--- /dev/null
+--- a/drivers/pci/Kconfig
++++ b/drivers/pci/Kconfig
+@@ -51,6 +51,12 @@ config PCI_STUB
+
+ When in doubt, say N.
+
++config PCI_DISABLE_COMMON_QUIRKS
++ bool "PCI disable common quirks"
++ depends on PCI
++ help
++ If you don't know what to do here, say N.
++
+ config HT_IRQ
+ bool "Interrupts on hypertransport devices"
+ default y
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -98,6 +98,7 @@ static void __devinit quirk_resource_ali
+ }
+ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment);
+
++#ifndef CONFIG_PCI_DISABLE_COMMON_QUIRKS
+ /* The Mellanox Tavor device gives false positive parity errors
+ * Mark this device with a broken_parity_status, to allow
+ * PCI scanning code to "skip" this now blacklisted device.
+@@ -1866,7 +1867,9 @@ static void __devinit fixup_rev1_53c810(
+ }
+ }
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NCR, PCI_DEVICE_ID_NCR_53C810, fixup_rev1_53c810);
++#endif /* !CONFIG_PCI_DISABLE_COMMON_QUIRKS */
+
++#ifndef CONFIG_PCI_DISABLE_COMMON_QUIRKS
+ /* Enable 1k I/O space granularity on the Intel P64H2 */
+ static void __devinit quirk_p64h2_1k_io(struct pci_dev *dev)
+ {
+@@ -2494,6 +2497,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x150a, quirk_i82576_sriov);
+
+ #endif /* CONFIG_PCI_IOV */
++#endif /* !CONFIG_PCI_DISABLE_COMMON_QUIRKS */
+
+ static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
+ struct pci_fixup *end)
--- /dev/null
+--- a/fs/squashfs/Kconfig
++++ b/fs/squashfs/Kconfig
+@@ -1,7 +1,8 @@
+ config SQUASHFS
+ tristate "SquashFS 4.0 - Squashed file system support"
+ depends on BLOCK
+- select ZLIB_INFLATE
++ select CRYPTO
++ select CRYPTO_ZLIB
+ help
+ Saying Y here includes support for SquashFS 4.0 (a Compressed
+ Read-Only File System). Squashfs is a highly compressed read-only
+--- a/fs/squashfs/block.c
++++ b/fs/squashfs/block.c
+@@ -32,7 +32,8 @@
+ #include <linux/mutex.h>
+ #include <linux/string.h>
+ #include <linux/buffer_head.h>
+-#include <linux/zlib.h>
++
++#include <crypto/compress.h>
+
+ #include "squashfs_fs.h"
+ #include "squashfs_fs_sb.h"
+@@ -153,7 +154,8 @@ int squashfs_read_data(struct super_bloc
+ }
+
+ if (compressed) {
+- int zlib_err = 0, zlib_init = 0;
++ int res = 0, decomp_init = 0;
++ struct comp_request req;
+
+ /*
+ * Uncompress block.
+@@ -161,12 +163,13 @@ int squashfs_read_data(struct super_bloc
+
+ mutex_lock(&msblk->read_data_mutex);
+
+- msblk->stream.avail_out = 0;
+- msblk->stream.avail_in = 0;
++ req.avail_out = 0;
++ req.avail_in = 0;
+
+ bytes = length;
++ length = 0;
+ do {
+- if (msblk->stream.avail_in == 0 && k < b) {
++ if (req.avail_in == 0 && k < b) {
+ avail = min(bytes, msblk->devblksize - offset);
+ bytes -= avail;
+ wait_on_buffer(bh[k]);
+@@ -179,45 +182,47 @@ int squashfs_read_data(struct super_bloc
+ continue;
+ }
+
+- msblk->stream.next_in = bh[k]->b_data + offset;
+- msblk->stream.avail_in = avail;
++ req.next_in = bh[k]->b_data + offset;
++ req.avail_in = avail;
+ offset = 0;
+ }
+
+- if (msblk->stream.avail_out == 0 && page < pages) {
+- msblk->stream.next_out = buffer[page++];
+- msblk->stream.avail_out = PAGE_CACHE_SIZE;
++ if (req.avail_out == 0 && page < pages) {
++ req.next_out = buffer[page++];
++ req.avail_out = PAGE_CACHE_SIZE;
+ }
+
+- if (!zlib_init) {
+- zlib_err = zlib_inflateInit(&msblk->stream);
+- if (zlib_err != Z_OK) {
+- ERROR("zlib_inflateInit returned"
+- " unexpected result 0x%x,"
+- " srclength %d\n", zlib_err,
+- srclength);
++ if (!decomp_init) {
++ res = crypto_decompress_init(msblk->tfm);
++ if (res) {
++ ERROR("crypto_decompress_init "
++ "returned %d, srclength %d\n",
++ res, srclength);
+ goto release_mutex;
+ }
+- zlib_init = 1;
++ decomp_init = 1;
+ }
+
+- zlib_err = zlib_inflate(&msblk->stream, Z_SYNC_FLUSH);
++ res = crypto_decompress_update(msblk->tfm, &req);
++ if (res < 0) {
++ ERROR("crypto_decompress_update returned %d, "
++ "data probably corrupt\n", res);
++ goto release_mutex;
++ }
++ length += res;
+
+- if (msblk->stream.avail_in == 0 && k < b)
++ if (req.avail_in == 0 && k < b)
+ put_bh(bh[k++]);
+- } while (zlib_err == Z_OK);
++ } while (bytes || res);
+
+- if (zlib_err != Z_STREAM_END) {
+- ERROR("zlib_inflate error, data probably corrupt\n");
++ res = crypto_decompress_final(msblk->tfm, &req);
++ if (res < 0) {
++ ERROR("crypto_decompress_final returned %d, data "
++ "probably corrupt\n", res);
+ goto release_mutex;
+ }
++ length += res;
+
+- zlib_err = zlib_inflateEnd(&msblk->stream);
+- if (zlib_err != Z_OK) {
+- ERROR("zlib_inflate error, data probably corrupt\n");
+- goto release_mutex;
+- }
+- length = msblk->stream.total_out;
+ mutex_unlock(&msblk->read_data_mutex);
+ } else {
+ /*
+--- a/fs/squashfs/squashfs_fs_sb.h
++++ b/fs/squashfs/squashfs_fs_sb.h
+@@ -64,7 +64,7 @@ struct squashfs_sb_info {
+ struct mutex read_data_mutex;
+ struct mutex meta_index_mutex;
+ struct meta_index *meta_index;
+- z_stream stream;
++ struct crypto_pcomp *tfm;
+ __le64 *inode_lookup_table;
+ u64 inode_table;
+ u64 directory_table;
+--- a/fs/squashfs/super.c
++++ b/fs/squashfs/super.c
+@@ -38,11 +38,19 @@
+ #include <linux/zlib.h>
+ #include <linux/magic.h>
+
++#include <crypto/compress.h>
++
++#include <net/netlink.h>
++
+ #include "squashfs_fs.h"
+ #include "squashfs_fs_sb.h"
+ #include "squashfs_fs_i.h"
+ #include "squashfs.h"
+
++
++#define SQUASHFS_CRYPTO_ALG "zlib"
++
++
+ static struct file_system_type squashfs_fs_type;
+ static struct super_operations squashfs_super_ops;
+
+@@ -76,6 +84,16 @@ static int squashfs_fill_super(struct su
+ unsigned short flags;
+ unsigned int fragments;
+ u64 lookup_table_start;
++ struct {
++ struct nlattr nla;
++ int val;
++ } params = {
++ .nla = {
++ .nla_len = nla_attr_size(sizeof(int)),
++ .nla_type = ZLIB_DECOMP_WINDOWBITS,
++ },
++ .val = DEF_WBITS,
++ };
+ int err;
+
+ TRACE("Entered squashfs_fill_superblock\n");
+@@ -87,16 +105,25 @@ static int squashfs_fill_super(struct su
+ }
+ msblk = sb->s_fs_info;
+
+- msblk->stream.workspace = kmalloc(zlib_inflate_workspacesize(),
+- GFP_KERNEL);
+- if (msblk->stream.workspace == NULL) {
+- ERROR("Failed to allocate zlib workspace\n");
++ msblk->tfm = crypto_alloc_pcomp(SQUASHFS_CRYPTO_ALG, 0,
++ CRYPTO_ALG_ASYNC);
++ if (IS_ERR(msblk->tfm)) {
++ ERROR("Failed to load %s crypto module\n",
++ SQUASHFS_CRYPTO_ALG);
++ err = PTR_ERR(msblk->tfm);
++ goto failed_pcomp;
++ }
++
++ err = crypto_decompress_setup(msblk->tfm, ¶ms, sizeof(params));
++ if (err) {
++ ERROR("Failed to set up decompression parameters\n");
+ goto failure;
+ }
+
+ sblk = kzalloc(sizeof(*sblk), GFP_KERNEL);
+ if (sblk == NULL) {
+ ERROR("Failed to allocate squashfs_super_block\n");
++ err = -ENOMEM;
+ goto failure;
+ }
+
+@@ -295,17 +322,18 @@ failed_mount:
+ kfree(msblk->inode_lookup_table);
+ kfree(msblk->fragment_index);
+ kfree(msblk->id_table);
+- kfree(msblk->stream.workspace);
++ crypto_free_pcomp(msblk->tfm);
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+ kfree(sblk);
+ return err;
+
+ failure:
+- kfree(msblk->stream.workspace);
++ crypto_free_pcomp(msblk->tfm);
++failed_pcomp:
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+- return -ENOMEM;
++ return err;
+ }
+
+
+@@ -349,7 +377,7 @@ static void squashfs_put_super(struct su
+ kfree(sbi->id_table);
+ kfree(sbi->fragment_index);
+ kfree(sbi->meta_index);
+- kfree(sbi->stream.workspace);
++ crypto_free_pcomp(sbi->tfm);
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+ }
--- /dev/null
+--- /dev/null
++++ b/crypto/unlzma.c
+@@ -0,0 +1,723 @@
++/*
++ * LZMA uncompresion module for pcomp
++ * Copyright (C) 2009 Felix Fietkau <nbd@openwrt.org>
++ *
++ * Based on:
++ * Initial Linux kernel adaptation
++ * Copyright (C) 2006 Alain < alain@knaff.lu >
++ *
++ * Based on small lzma deflate implementation/Small range coder
++ * implementation for lzma.
++ * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
++ *
++ * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
++ * Copyright (C) 1999-2005 Igor Pavlov
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ *
++ * FIXME: the current implementation assumes that the caller will
++ * not free any output buffers until the whole decompression has been
++ * completed. This is necessary, because LZMA looks back at old output
++ * instead of doing a separate dictionary allocation, which saves RAM.
++ */
++
++#include <linux/init.h>
++#include <linux/module.h>
++#include <linux/vmalloc.h>
++#include <linux/interrupt.h>
++#include <linux/mm.h>
++#include <linux/net.h>
++#include <linux/slab.h>
++#include <linux/kthread.h>
++
++#include <crypto/internal/compress.h>
++#include "unlzma.h"
++
++static int instance = 0;
++
++struct unlzma_buffer {
++ struct unlzma_buffer *last;
++ int offset;
++ int size;
++ u8 *ptr;
++};
++
++struct unlzma_ctx {
++ struct task_struct *thread;
++ wait_queue_head_t next_req;
++ struct mutex mutex;
++ bool active;
++ bool cancel;
++
++ const u8 *next_in;
++ int avail_in;
++
++ u8 *next_out;
++ int avail_out;
++
++ /* reader state */
++ u32 code;
++ u32 range;
++ u32 bound;
++
++ /* writer state */
++ u8 previous_byte;
++ ssize_t pos;
++ struct unlzma_buffer *head;
++ int buf_full;
++
++ /* cstate */
++ int state;
++ u32 rep0, rep1, rep2, rep3;
++
++ u32 dict_size;
++
++ void *workspace;
++ int workspace_size;
++};
++
++static inline bool
++unlzma_should_stop(struct unlzma_ctx *ctx)
++{
++ return unlikely(kthread_should_stop() || ctx->cancel);
++}
++
++static void
++get_buffer(struct unlzma_ctx *ctx)
++{
++ struct unlzma_buffer *bh;
++
++ bh = kzalloc(sizeof(struct unlzma_buffer), GFP_KERNEL);
++ bh->ptr = ctx->next_out;
++ bh->offset = ctx->pos;
++ bh->last = ctx->head;
++ bh->size = ctx->avail_out;
++ ctx->head = bh;
++ ctx->buf_full = 0;
++}
++
++static void
++unlzma_request_buffer(struct unlzma_ctx *ctx, int *avail)
++{
++ do {
++ mutex_unlock(&ctx->mutex);
++ if (wait_event_interruptible(ctx->next_req,
++ unlzma_should_stop(ctx) || (*avail > 0)))
++ schedule();
++ mutex_lock(&ctx->mutex);
++ } while (*avail <= 0 && !unlzma_should_stop(ctx));
++
++ if (!unlzma_should_stop(ctx) && ctx->buf_full)
++ get_buffer(ctx);
++}
++
++static u8
++rc_read(struct unlzma_ctx *ctx)
++{
++ if (unlikely(ctx->avail_in <= 0))
++ unlzma_request_buffer(ctx, &ctx->avail_in);
++
++ if (unlzma_should_stop(ctx))
++ return 0;
++
++ ctx->avail_in--;
++ return *(ctx->next_in++);
++}
++
++
++static inline void
++rc_get_code(struct unlzma_ctx *ctx)
++{
++ ctx->code = (ctx->code << 8) | rc_read(ctx);
++}
++
++static void
++rc_normalize(struct unlzma_ctx *ctx)
++{
++ if (ctx->range < (1 << RC_TOP_BITS)) {
++ ctx->range <<= 8;
++ rc_get_code(ctx);
++ }
++}
++
++static int
++rc_is_bit_0(struct unlzma_ctx *ctx, u16 *p)
++{
++ rc_normalize(ctx);
++ ctx->bound = *p * (ctx->range >> RC_MODEL_TOTAL_BITS);
++ return ctx->code < ctx->bound;
++}
++
++static void
++rc_update_bit_0(struct unlzma_ctx *ctx, u16 *p)
++{
++ ctx->range = ctx->bound;
++ *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
++}
++
++static void
++rc_update_bit_1(struct unlzma_ctx *ctx, u16 *p)
++{
++ ctx->range -= ctx->bound;
++ ctx->code -= ctx->bound;
++ *p -= *p >> RC_MOVE_BITS;
++}
++
++static bool
++rc_get_bit(struct unlzma_ctx *ctx, u16 *p, int *symbol)
++{
++ if (rc_is_bit_0(ctx, p)) {
++ rc_update_bit_0(ctx, p);
++ *symbol *= 2;
++ return 0;
++ } else {
++ rc_update_bit_1(ctx, p);
++ *symbol = *symbol * 2 + 1;
++ return 1;
++ }
++}
++
++static int
++rc_direct_bit(struct unlzma_ctx *ctx)
++{
++ rc_normalize(ctx);
++ ctx->range >>= 1;
++ if (ctx->code >= ctx->range) {
++ ctx->code -= ctx->range;
++ return 1;
++ }
++ return 0;
++}
++
++static void
++rc_bit_tree_decode(struct unlzma_ctx *ctx, u16 *p, int num_levels, int *symbol)
++{
++ int i = num_levels;
++
++ *symbol = 1;
++ while (i--)
++ rc_get_bit(ctx, p + *symbol, symbol);
++ *symbol -= 1 << num_levels;
++}
++
++static u8
++peek_old_byte(struct unlzma_ctx *ctx, u32 offs)
++{
++ struct unlzma_buffer *bh = ctx->head;
++ u32 pos;
++
++ pos = ctx->pos - offs;
++ if (pos >= ctx->dict_size) {
++ pos = (~pos % ctx->dict_size);
++ }
++
++ while (bh->offset > pos) {
++ bh = bh->last;
++ BUG_ON(!bh);
++ }
++
++ pos -= bh->offset;
++ BUG_ON(pos >= bh->size);
++
++ return bh->ptr[pos];
++}
++
++static void
++write_byte(struct unlzma_ctx *ctx, u8 byte)
++{
++ if (unlikely(ctx->avail_out <= 0)) {
++ unlzma_request_buffer(ctx, &ctx->avail_out);
++ }
++
++ if (!ctx->avail_out)
++ return;
++
++ ctx->previous_byte = byte;
++ *(ctx->next_out++) = byte;
++ ctx->avail_out--;
++ if (ctx->avail_out == 0)
++ ctx->buf_full = 1;
++ ctx->pos++;
++}
++
++
++static inline void
++copy_byte(struct unlzma_ctx *ctx, u32 offs)
++{
++ write_byte(ctx, peek_old_byte(ctx, offs));
++}
++
++static void
++copy_bytes(struct unlzma_ctx *ctx, u32 rep0, int len)
++{
++ do {
++ copy_byte(ctx, rep0);
++ len--;
++ if (unlzma_should_stop(ctx))
++ break;
++ } while (len != 0);
++}
++
++static void
++process_bit0(struct unlzma_ctx *ctx, u16 *p, int pos_state, u16 *prob,
++ int lc, u32 literal_pos_mask)
++{
++ int mi = 1;
++ rc_update_bit_0(ctx, prob);
++ prob = (p + LZMA_LITERAL +
++ (LZMA_LIT_SIZE
++ * (((ctx->pos & literal_pos_mask) << lc)
++ + (ctx->previous_byte >> (8 - lc))))
++ );
++
++ if (ctx->state >= LZMA_NUM_LIT_STATES) {
++ int match_byte = peek_old_byte(ctx, ctx->rep0);
++ do {
++ u16 bit;
++ u16 *prob_lit;
++
++ match_byte <<= 1;
++ bit = match_byte & 0x100;
++ prob_lit = prob + 0x100 + bit + mi;
++ if (rc_get_bit(ctx, prob_lit, &mi) != !!bit)
++ break;
++ } while (mi < 0x100);
++ }
++ while (mi < 0x100) {
++ u16 *prob_lit = prob + mi;
++ rc_get_bit(ctx, prob_lit, &mi);
++ }
++ write_byte(ctx, mi);
++ if (ctx->state < 4)
++ ctx->state = 0;
++ else if (ctx->state < 10)
++ ctx->state -= 3;
++ else
++ ctx->state -= 6;
++}
++
++static void
++process_bit1(struct unlzma_ctx *ctx, u16 *p, int pos_state, u16 *prob)
++{
++ int offset;
++ u16 *prob_len;
++ int num_bits;
++ int len;
++
++ rc_update_bit_1(ctx, prob);
++ prob = p + LZMA_IS_REP + ctx->state;
++ if (rc_is_bit_0(ctx, prob)) {
++ rc_update_bit_0(ctx, prob);
++ ctx->rep3 = ctx->rep2;
++ ctx->rep2 = ctx->rep1;
++ ctx->rep1 = ctx->rep0;
++ ctx->state = ctx->state < LZMA_NUM_LIT_STATES ? 0 : 3;
++ prob = p + LZMA_LEN_CODER;
++ } else {
++ rc_update_bit_1(ctx, prob);
++ prob = p + LZMA_IS_REP_G0 + ctx->state;
++ if (rc_is_bit_0(ctx, prob)) {
++ rc_update_bit_0(ctx, prob);
++ prob = (p + LZMA_IS_REP_0_LONG
++ + (ctx->state <<
++ LZMA_NUM_POS_BITS_MAX) +
++ pos_state);
++ if (rc_is_bit_0(ctx, prob)) {
++ rc_update_bit_0(ctx, prob);
++
++ ctx->state = ctx->state < LZMA_NUM_LIT_STATES ?
++ 9 : 11;
++ copy_byte(ctx, ctx->rep0);
++ return;
++ } else {
++ rc_update_bit_1(ctx, prob);
++ }
++ } else {
++ u32 distance;
++
++ rc_update_bit_1(ctx, prob);
++ prob = p + LZMA_IS_REP_G1 + ctx->state;
++ if (rc_is_bit_0(ctx, prob)) {
++ rc_update_bit_0(ctx, prob);
++ distance = ctx->rep1;
++ } else {
++ rc_update_bit_1(ctx, prob);
++ prob = p + LZMA_IS_REP_G2 + ctx->state;
++ if (rc_is_bit_0(ctx, prob)) {
++ rc_update_bit_0(ctx, prob);
++ distance = ctx->rep2;
++ } else {
++ rc_update_bit_1(ctx, prob);
++ distance = ctx->rep3;
++ ctx->rep3 = ctx->rep2;
++ }
++ ctx->rep2 = ctx->rep1;
++ }
++ ctx->rep1 = ctx->rep0;
++ ctx->rep0 = distance;
++ }
++ ctx->state = ctx->state < LZMA_NUM_LIT_STATES ? 8 : 11;
++ prob = p + LZMA_REP_LEN_CODER;
++ }
++
++ prob_len = prob + LZMA_LEN_CHOICE;
++ if (rc_is_bit_0(ctx, prob_len)) {
++ rc_update_bit_0(ctx, prob_len);
++ prob_len = (prob + LZMA_LEN_LOW
++ + (pos_state <<
++ LZMA_LEN_NUM_LOW_BITS));
++ offset = 0;
++ num_bits = LZMA_LEN_NUM_LOW_BITS;
++ } else {
++ rc_update_bit_1(ctx, prob_len);
++ prob_len = prob + LZMA_LEN_CHOICE_2;
++ if (rc_is_bit_0(ctx, prob_len)) {
++ rc_update_bit_0(ctx, prob_len);
++ prob_len = (prob + LZMA_LEN_MID
++ + (pos_state <<
++ LZMA_LEN_NUM_MID_BITS));
++ offset = 1 << LZMA_LEN_NUM_LOW_BITS;
++ num_bits = LZMA_LEN_NUM_MID_BITS;
++ } else {
++ rc_update_bit_1(ctx, prob_len);
++ prob_len = prob + LZMA_LEN_HIGH;
++ offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
++ + (1 << LZMA_LEN_NUM_MID_BITS));
++ num_bits = LZMA_LEN_NUM_HIGH_BITS;
++ }
++ }
++
++ rc_bit_tree_decode(ctx, prob_len, num_bits, &len);
++ len += offset;
++
++ if (ctx->state < 4) {
++ int pos_slot;
++
++ ctx->state += LZMA_NUM_LIT_STATES;
++ prob =
++ p + LZMA_POS_SLOT +
++ ((len <
++ LZMA_NUM_LEN_TO_POS_STATES ? len :
++ LZMA_NUM_LEN_TO_POS_STATES - 1)
++ << LZMA_NUM_POS_SLOT_BITS);
++ rc_bit_tree_decode(ctx, prob,
++ LZMA_NUM_POS_SLOT_BITS,
++ &pos_slot);
++ if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
++ int i, mi;
++ num_bits = (pos_slot >> 1) - 1;
++ ctx->rep0 = 2 | (pos_slot & 1);
++ if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
++ ctx->rep0 <<= num_bits;
++ prob = p + LZMA_SPEC_POS +
++ ctx->rep0 - pos_slot - 1;
++ } else {
++ num_bits -= LZMA_NUM_ALIGN_BITS;
++ while (num_bits--)
++ ctx->rep0 = (ctx->rep0 << 1) |
++ rc_direct_bit(ctx);
++ prob = p + LZMA_ALIGN;
++ ctx->rep0 <<= LZMA_NUM_ALIGN_BITS;
++ num_bits = LZMA_NUM_ALIGN_BITS;
++ }
++ i = 1;
++ mi = 1;
++ while (num_bits--) {
++ if (rc_get_bit(ctx, prob + mi, &mi))
++ ctx->rep0 |= i;
++ i <<= 1;
++ }
++ } else
++ ctx->rep0 = pos_slot;
++ if (++(ctx->rep0) == 0)
++ return;
++ }
++
++ len += LZMA_MATCH_MIN_LEN;
++
++ copy_bytes(ctx, ctx->rep0, len);
++}
++
++
++static int
++do_unlzma(struct unlzma_ctx *ctx)
++{
++ u8 hdr_buf[sizeof(struct lzma_header)];
++ struct lzma_header *header = (struct lzma_header *)hdr_buf;
++ u32 pos_state_mask;
++ u32 literal_pos_mask;
++ int lc, pb, lp;
++ int num_probs;
++ int i, mi;
++ u16 *p;
++
++ for (i = 0; i < sizeof(struct lzma_header); i++) {
++ hdr_buf[i] = rc_read(ctx);
++ }
++
++ ctx->pos = 0;
++ get_buffer(ctx);
++ ctx->active = true;
++ ctx->state = 0;
++ ctx->rep0 = ctx->rep1 = ctx->rep2 = ctx->rep3 = 1;
++
++ ctx->previous_byte = 0;
++ ctx->code = 0;
++ ctx->range = 0xFFFFFFFF;
++
++ ctx->dict_size = le32_to_cpu(header->dict_size);
++
++ if (header->pos >= (9 * 5 * 5))
++ return -1;
++
++ mi = 0;
++ lc = header->pos;
++ while (lc >= 9) {
++ mi++;
++ lc -= 9;
++ }
++ pb = 0;
++ lp = mi;
++ while (lp >= 5) {
++ pb++;
++ lp -= 5;
++ }
++ pos_state_mask = (1 << pb) - 1;
++ literal_pos_mask = (1 << lp) - 1;
++
++ if (ctx->dict_size == 0)
++ ctx->dict_size = 1;
++
++ num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
++ if (ctx->workspace_size < num_probs * sizeof(*p)) {
++ if (ctx->workspace)
++ vfree(ctx->workspace);
++ ctx->workspace_size = num_probs * sizeof(*p);
++ ctx->workspace = vmalloc(ctx->workspace_size);
++ }
++ p = (u16 *) ctx->workspace;
++ if (!p)
++ return -1;
++
++ num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
++ for (i = 0; i < num_probs; i++)
++ p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
++
++ for (i = 0; i < 5; i++)
++ rc_get_code(ctx);
++
++ while (1) {
++ int pos_state = ctx->pos & pos_state_mask;
++ u16 *prob = p + LZMA_IS_MATCH +
++ (ctx->state << LZMA_NUM_POS_BITS_MAX) + pos_state;
++ if (rc_is_bit_0(ctx, prob))
++ process_bit0(ctx, p, pos_state, prob,
++ lc, literal_pos_mask);
++ else {
++ process_bit1(ctx, p, pos_state, prob);
++ if (ctx->rep0 == 0)
++ break;
++ }
++ if (unlzma_should_stop(ctx))
++ break;
++ }
++ if (likely(!unlzma_should_stop(ctx)))
++ rc_normalize(ctx);
++
++ return ctx->pos;
++}
++
++
++static void
++unlzma_reset_buf(struct unlzma_ctx *ctx)
++{
++ ctx->avail_in = 0;
++ ctx->next_in = NULL;
++ ctx->avail_out = 0;
++ ctx->next_out = NULL;
++}
++
++static int
++unlzma_thread(void *data)
++{
++ struct unlzma_ctx *ctx = data;
++
++ mutex_lock(&ctx->mutex);
++ do {
++ if (do_unlzma(ctx) < 0)
++ ctx->pos = 0;
++ unlzma_reset_buf(ctx);
++ ctx->cancel = false;
++ ctx->active = false;
++ while (ctx->head) {
++ struct unlzma_buffer *bh = ctx->head;
++ ctx->head = bh->last;
++ kfree(bh);
++ }
++ } while (!kthread_should_stop());
++ mutex_unlock(&ctx->mutex);
++ return 0;
++}
++
++
++static int
++unlzma_init(struct crypto_tfm *tfm)
++{
++ return 0;
++}
++
++static void
++unlzma_cancel(struct unlzma_ctx *ctx)
++{
++ unlzma_reset_buf(ctx);
++
++ if (!ctx->active)
++ return;
++
++ ctx->cancel = true;
++ do {
++ mutex_unlock(&ctx->mutex);
++ wake_up(&ctx->next_req);
++ schedule();
++ mutex_lock(&ctx->mutex);
++ } while (ctx->cancel);
++}
++
++
++static void
++unlzma_exit(struct crypto_tfm *tfm)
++{
++ struct unlzma_ctx *ctx = crypto_tfm_ctx(tfm);
++
++ if (ctx->thread) {
++ unlzma_cancel(ctx);
++ kthread_stop(ctx->thread);
++ ctx->thread = NULL;
++ }
++}
++
++static int
++unlzma_decompress_setup(struct crypto_pcomp *tfm, void *p, unsigned int len)
++{
++ struct unlzma_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
++ int ret = 0;
++
++ if (ctx->thread)
++ return 0;
++
++ mutex_init(&ctx->mutex);
++ init_waitqueue_head(&ctx->next_req);
++ ctx->thread = kthread_run(unlzma_thread, ctx, "unlzma/%d", instance++);
++ if (IS_ERR(ctx->thread)) {
++ ret = PTR_ERR(ctx->thread);
++ ctx->thread = NULL;
++ }
++
++ return ret;
++}
++
++static int
++unlzma_decompress_init(struct crypto_pcomp *tfm)
++{
++ struct unlzma_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
++
++ ctx->pos = 0;
++ return 0;
++}
++
++static void
++unlzma_wait_complete(struct unlzma_ctx *ctx, bool finish)
++{
++ do {
++ mutex_unlock(&ctx->mutex);
++ wake_up(&ctx->next_req);
++ schedule();
++ mutex_lock(&ctx->mutex);
++ } while (ctx->active && (ctx->avail_in > 0) && (ctx->avail_out > 0));
++}
++
++static int
++unlzma_decompress_update(struct crypto_pcomp *tfm, struct comp_request *req)
++{
++ struct unlzma_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
++ size_t pos = 0;
++
++ mutex_lock(&ctx->mutex);
++ if (!ctx->active && !req->avail_in)
++ goto out;
++
++ pos = ctx->pos;
++ ctx->next_in = req->next_in;
++ ctx->avail_in = req->avail_in;
++ ctx->next_out = req->next_out;
++ ctx->avail_out = req->avail_out;
++
++ unlzma_wait_complete(ctx, false);
++
++ req->next_in = ctx->next_in;
++ req->avail_in = ctx->avail_in;
++ req->next_out = ctx->next_out;
++ req->avail_out = ctx->avail_out;
++ ctx->next_in = 0;
++ ctx->avail_in = 0;
++ pos = ctx->pos - pos;
++
++out:
++ mutex_unlock(&ctx->mutex);
++ return pos;
++}
++
++static int
++unlzma_decompress_final(struct crypto_pcomp *tfm, struct comp_request *req)
++{
++ struct unlzma_ctx *ctx = crypto_tfm_ctx(crypto_pcomp_tfm(tfm));
++ int ret = 0;
++
++ /* cancel pending operation */
++ mutex_lock(&ctx->mutex);
++ if (ctx->active) {
++ // ret = -EINVAL;
++ unlzma_cancel(ctx);
++ }
++ ctx->pos = 0;
++ mutex_unlock(&ctx->mutex);
++ return ret;
++}
++
++
++static struct pcomp_alg unlzma_alg = {
++ .decompress_setup = unlzma_decompress_setup,
++ .decompress_init = unlzma_decompress_init,
++ .decompress_update = unlzma_decompress_update,
++ .decompress_final = unlzma_decompress_final,
++
++ .base = {
++ .cra_name = "lzma",
++ .cra_flags = CRYPTO_ALG_TYPE_PCOMPRESS,
++ .cra_ctxsize = sizeof(struct unlzma_ctx),
++ .cra_module = THIS_MODULE,
++ .cra_init = unlzma_init,
++ .cra_exit = unlzma_exit,
++ }
++};
++
++static int __init
++unlzma_mod_init(void)
++{
++ return crypto_register_pcomp(&unlzma_alg);
++}
++
++static void __exit
++unlzma_mod_exit(void)
++{
++ crypto_unregister_pcomp(&unlzma_alg);
++}
++
++module_init(unlzma_mod_init);
++module_exit(unlzma_mod_exit);
++
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("LZMA Decompression Algorithm");
++MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -768,6 +768,12 @@ config CRYPTO_ZLIB
+ help
+ This is the zlib algorithm.
+
++config CRYPTO_UNLZMA
++ tristate "LZMA decompression"
++ select CRYPTO_PCOMP
++ help
++ This is the lzma decompression module.
++
+ config CRYPTO_LZO
+ tristate "LZO compression algorithm"
+ select CRYPTO_ALGAPI
+--- a/crypto/Makefile
++++ b/crypto/Makefile
+@@ -75,6 +75,7 @@ obj-$(CONFIG_CRYPTO_SEED) += seed.o
+ obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o
+ obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o
+ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
++obj-$(CONFIG_CRYPTO_UNLZMA) += unlzma.o
+ obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
+ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
+ obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o
+--- /dev/null
++++ b/crypto/unlzma.h
+@@ -0,0 +1,80 @@
++/* LZMA uncompresion module for pcomp
++ * Copyright (C) 2009 Felix Fietkau <nbd@openwrt.org>
++ *
++ * Based on:
++ * Initial Linux kernel adaptation
++ * Copyright (C) 2006 Alain < alain@knaff.lu >
++ *
++ * Based on small lzma deflate implementation/Small range coder
++ * implementation for lzma.
++ * Copyright (C) 2006 Aurelien Jacobs < aurel@gnuage.org >
++ *
++ * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
++ * Copyright (C) 1999-2005 Igor Pavlov
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ */
++#ifndef __UNLZMA_H
++#define __UNLZMA_H
++
++struct lzma_header {
++ __u8 pos;
++ __le32 dict_size;
++} __attribute__ ((packed)) ;
++
++
++#define RC_TOP_BITS 24
++#define RC_MOVE_BITS 5
++#define RC_MODEL_TOTAL_BITS 11
++
++#define LZMA_BASE_SIZE 1846
++#define LZMA_LIT_SIZE 768
++
++#define LZMA_NUM_POS_BITS_MAX 4
++
++#define LZMA_LEN_NUM_LOW_BITS 3
++#define LZMA_LEN_NUM_MID_BITS 3
++#define LZMA_LEN_NUM_HIGH_BITS 8
++
++#define LZMA_LEN_CHOICE 0
++#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
++#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
++#define LZMA_LEN_MID (LZMA_LEN_LOW \
++ + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
++#define LZMA_LEN_HIGH (LZMA_LEN_MID \
++ +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
++#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
++
++#define LZMA_NUM_STATES 12
++#define LZMA_NUM_LIT_STATES 7
++
++#define LZMA_START_POS_MODEL_INDEX 4
++#define LZMA_END_POS_MODEL_INDEX 14
++#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
++
++#define LZMA_NUM_POS_SLOT_BITS 6
++#define LZMA_NUM_LEN_TO_POS_STATES 4
++
++#define LZMA_NUM_ALIGN_BITS 4
++
++#define LZMA_MATCH_MIN_LEN 2
++
++#define LZMA_IS_MATCH 0
++#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
++#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
++#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
++#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
++#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
++#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
++ + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
++#define LZMA_SPEC_POS (LZMA_POS_SLOT \
++ +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
++#define LZMA_ALIGN (LZMA_SPEC_POS \
++ + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
++#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
++#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
++#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
++
++#endif
--- /dev/null
+--- a/fs/squashfs/Kconfig
++++ b/fs/squashfs/Kconfig
+@@ -2,7 +2,6 @@ config SQUASHFS
+ tristate "SquashFS 4.0 - Squashed file system support"
+ depends on BLOCK
+ select CRYPTO
+- select CRYPTO_ZLIB
+ help
+ Saying Y here includes support for SquashFS 4.0 (a Compressed
+ Read-Only File System). Squashfs is a highly compressed read-only
+@@ -37,6 +36,26 @@ config SQUASHFS_EMBEDDED
+
+ If unsure, say N.
+
++config SQUASHFS_SUPPORT_ZLIB
++ bool
++ prompt "Support ZLIB compression" if SQUASHFS_SUPPORT_LZMA
++ depends on SQUASHFS
++ select CRYPTO_ZLIB
++ default y
++ help
++ ZLIB is the default compression used in squashfs. If you are
++ using LZMA compression instead, you can remove support for ZLIB
++ entirely.
++
++config SQUASHFS_SUPPORT_LZMA
++ bool "Support LZMA compression"
++ depends on SQUASHFS
++ select CRYPTO_UNLZMA
++ help
++ By default SquashFS uses ZLIB compression, however (if your tools
++ support it, you can use LZMA instead, which saves space.
++
++
+ config SQUASHFS_FRAGMENT_CACHE_SIZE
+ int "Number of fragments cached" if SQUASHFS_EMBEDDED
+ depends on SQUASHFS
+--- a/fs/squashfs/squashfs_fs.h
++++ b/fs/squashfs/squashfs_fs.h
+@@ -212,6 +212,7 @@ struct meta_index {
+ * definitions for structures on disk
+ */
+ #define ZLIB_COMPRESSION 1
++#define LZMA_COMPRESSION 2
+
+ struct squashfs_super_block {
+ __le32 s_magic;
+--- a/fs/squashfs/super.c
++++ b/fs/squashfs/super.c
+@@ -48,13 +48,65 @@
+ #include "squashfs.h"
+
+
+-#define SQUASHFS_CRYPTO_ALG "zlib"
++static int squashfs_setup_zlib(struct squashfs_sb_info *msblk)
++{
++ int err = -EOPNOTSUPP;
++
++#ifdef CONFIG_SQUASHFS_SUPPORT_ZLIB
++ struct {
++ struct nlattr nla;
++ int val;
++ } params = {
++ .nla = {
++ .nla_len = nla_attr_size(sizeof(int)),
++ .nla_type = ZLIB_DECOMP_WINDOWBITS,
++ },
++ .val = DEF_WBITS,
++ };
++
++ msblk->tfm = crypto_alloc_pcomp("zlib", 0,
++ CRYPTO_ALG_ASYNC);
++ if (IS_ERR(msblk->tfm)) {
++ ERROR("Failed to load zlib crypto module\n");
++ return PTR_ERR(msblk->tfm);
++ }
++
++ err = crypto_decompress_setup(msblk->tfm, ¶ms, sizeof(params));
++ if (err) {
++ ERROR("Failed to set up decompression parameters\n");
++ crypto_free_pcomp(msblk->tfm);
++ }
++#endif
+
++ return err;
++}
++
++static int squashfs_setup_lzma(struct squashfs_sb_info *msblk)
++{
++ int err = -EOPNOTSUPP;
++
++#ifdef CONFIG_SQUASHFS_SUPPORT_LZMA
++ msblk->tfm = crypto_alloc_pcomp("lzma", 0,
++ CRYPTO_ALG_ASYNC);
++ if (IS_ERR(msblk->tfm)) {
++ ERROR("Failed to load lzma crypto module\n");
++ return PTR_ERR(msblk->tfm);
++ }
++
++ err = crypto_decompress_setup(msblk->tfm, NULL, 0);
++ if (err) {
++ ERROR("Failed to set up decompression parameters\n");
++ crypto_free_pcomp(msblk->tfm);
++ }
++#endif
++
++ return err;
++}
+
+ static struct file_system_type squashfs_fs_type;
+ static struct super_operations squashfs_super_ops;
+
+-static int supported_squashfs_filesystem(short major, short minor, short comp)
++static int supported_squashfs_filesystem(short major, short minor)
+ {
+ if (major < SQUASHFS_MAJOR) {
+ ERROR("Major/Minor mismatch, older Squashfs %d.%d "
+@@ -67,9 +119,6 @@ static int supported_squashfs_filesystem
+ return -EINVAL;
+ }
+
+- if (comp != ZLIB_COMPRESSION)
+- return -EINVAL;
+-
+ return 0;
+ }
+
+@@ -84,16 +133,6 @@ static int squashfs_fill_super(struct su
+ unsigned short flags;
+ unsigned int fragments;
+ u64 lookup_table_start;
+- struct {
+- struct nlattr nla;
+- int val;
+- } params = {
+- .nla = {
+- .nla_len = nla_attr_size(sizeof(int)),
+- .nla_type = ZLIB_DECOMP_WINDOWBITS,
+- },
+- .val = DEF_WBITS,
+- };
+ int err;
+
+ TRACE("Entered squashfs_fill_superblock\n");
+@@ -105,21 +144,6 @@ static int squashfs_fill_super(struct su
+ }
+ msblk = sb->s_fs_info;
+
+- msblk->tfm = crypto_alloc_pcomp(SQUASHFS_CRYPTO_ALG, 0,
+- CRYPTO_ALG_ASYNC);
+- if (IS_ERR(msblk->tfm)) {
+- ERROR("Failed to load %s crypto module\n",
+- SQUASHFS_CRYPTO_ALG);
+- err = PTR_ERR(msblk->tfm);
+- goto failed_pcomp;
+- }
+-
+- err = crypto_decompress_setup(msblk->tfm, ¶ms, sizeof(params));
+- if (err) {
+- ERROR("Failed to set up decompression parameters\n");
+- goto failure;
+- }
+-
+ sblk = kzalloc(sizeof(*sblk), GFP_KERNEL);
+ if (sblk == NULL) {
+ ERROR("Failed to allocate squashfs_super_block\n");
+@@ -159,8 +183,21 @@ static int squashfs_fill_super(struct su
+
+ /* Check the MAJOR & MINOR versions and compression type */
+ err = supported_squashfs_filesystem(le16_to_cpu(sblk->s_major),
+- le16_to_cpu(sblk->s_minor),
+- le16_to_cpu(sblk->compression));
++ le16_to_cpu(sblk->s_minor));
++ if (err < 0)
++ goto failed_mount;
++
++ switch(le16_to_cpu(sblk->compression)) {
++ case ZLIB_COMPRESSION:
++ err = squashfs_setup_zlib(msblk);
++ break;
++ case LZMA_COMPRESSION:
++ err = squashfs_setup_lzma(msblk);
++ break;
++ default:
++ err = -EINVAL;
++ break;
++ }
+ if (err < 0)
+ goto failed_mount;
+
+@@ -316,21 +353,16 @@ allocate_root:
+ return 0;
+
+ failed_mount:
++ if (msblk->tfm)
++ crypto_free_pcomp(msblk->tfm);
+ squashfs_cache_delete(msblk->block_cache);
+ squashfs_cache_delete(msblk->fragment_cache);
+ squashfs_cache_delete(msblk->read_page);
+ kfree(msblk->inode_lookup_table);
+ kfree(msblk->fragment_index);
+ kfree(msblk->id_table);
+- crypto_free_pcomp(msblk->tfm);
+- kfree(sb->s_fs_info);
+- sb->s_fs_info = NULL;
+ kfree(sblk);
+- return err;
+-
+ failure:
+- crypto_free_pcomp(msblk->tfm);
+-failed_pcomp:
+ kfree(sb->s_fs_info);
+ sb->s_fs_info = NULL;
+ return err;
--- /dev/null
+--- a/drivers/mtd/devices/block2mtd.c
++++ b/drivers/mtd/devices/block2mtd.c
+@@ -14,6 +14,7 @@
+ #include <linux/list.h>
+ #include <linux/init.h>
+ #include <linux/mtd/mtd.h>
++#include <linux/mtd/partitions.h>
+ #include <linux/buffer_head.h>
+ #include <linux/mutex.h>
+ #include <linux/mount.h>
+@@ -232,10 +233,11 @@ static void block2mtd_free_device(struct
+
+
+ /* FIXME: ensure that mtd->size % erase_size == 0 */
+-static struct block2mtd_dev *add_device(char *devname, int erase_size)
++static struct block2mtd_dev *add_device(char *devname, int erase_size, const char *mtdname)
+ {
+ struct block_device *bdev;
+ struct block2mtd_dev *dev;
++ struct mtd_partition *part;
+ char *name;
+
+ if (!devname)
+@@ -273,17 +275,17 @@ static struct block2mtd_dev *add_device(
+
+ mutex_init(&dev->write_mutex);
+
+- /* Setup the MTD structure */
+- /* make the name contain the block device in */
+- name = kmalloc(sizeof("block2mtd: ") + strlen(devname) + 1,
+- GFP_KERNEL);
++ if (!mtdname)
++ mtdname = devname;
++
++ name = kmalloc(strlen(mtdname) + 1, GFP_KERNEL);
+ if (!name)
+ goto devinit_err;
+
+- sprintf(name, "block2mtd: %s", devname);
++ strcpy(name, mtdname);
+ dev->mtd.name = name;
+
+- dev->mtd.size = dev->blkdev->bd_inode->i_size & PAGE_MASK;
++ dev->mtd.size = dev->blkdev->bd_inode->i_size & PAGE_MASK & ~(erase_size - 1);
+ dev->mtd.erasesize = erase_size;
+ dev->mtd.writesize = 1;
+ dev->mtd.type = MTD_RAM;
+@@ -296,14 +298,17 @@ static struct block2mtd_dev *add_device(
+ dev->mtd.priv = dev;
+ dev->mtd.owner = THIS_MODULE;
+
+- if (add_mtd_device(&dev->mtd)) {
++ part = kzalloc(sizeof(struct mtd_partition), GFP_KERNEL);
++ part->name = dev->mtd.name;
++ part->offset = 0;
++ part->size = dev->mtd.size;
++ if (add_mtd_partitions(&dev->mtd, part, 1)) {
+ /* Device didnt get added, so free the entry */
+ goto devinit_err;
+ }
+ list_add(&dev->list, &blkmtd_device_list);
+ INFO("mtd%d: [%s] erase_size = %dKiB [%d]", dev->mtd.index,
+- dev->mtd.name + strlen("block2mtd: "),
+- dev->mtd.erasesize >> 10, dev->mtd.erasesize);
++ mtdname, dev->mtd.erasesize >> 10, dev->mtd.erasesize);
+ return dev;
+
+ devinit_err:
+@@ -376,9 +381,9 @@ static char block2mtd_paramline[80 + 12]
+
+ static int block2mtd_setup2(const char *val)
+ {
+- char buf[80 + 12]; /* 80 for device, 12 for erase size */
++ char buf[80 + 12 + 80]; /* 80 for device, 12 for erase size, 80 for name */
+ char *str = buf;
+- char *token[2];
++ char *token[3];
+ char *name;
+ size_t erase_size = PAGE_SIZE;
+ int i, ret;
+@@ -389,7 +394,7 @@ static int block2mtd_setup2(const char *
+ strcpy(str, val);
+ kill_final_newline(str);
+
+- for (i = 0; i < 2; i++)
++ for (i = 0; i < 3; i++)
+ token[i] = strsep(&str, ",");
+
+ if (str)
+@@ -408,8 +413,10 @@ static int block2mtd_setup2(const char *
+ parse_err("illegal erase size");
+ }
+ }
++ if (token[2] && (strlen(token[2]) + 1 > 80))
++ parse_err("mtd device name too long");
+
+- add_device(name, erase_size);
++ add_device(name, erase_size, token[2]);
+
+ return 0;
+ }
+@@ -443,7 +450,7 @@ static int block2mtd_setup(const char *v
+
+
+ module_param_call(block2mtd, block2mtd_setup, NULL, NULL, 0200);
+-MODULE_PARM_DESC(block2mtd, "Device to use. \"block2mtd=<dev>[,<erasesize>]\"");
++MODULE_PARM_DESC(block2mtd, "Device to use. \"block2mtd=<dev>[,<erasesize>[,<name>]]\"");
+
+ static int __init block2mtd_init(void)
+ {
--- /dev/null
+--- a/drivers/mtd/Kconfig
++++ b/drivers/mtd/Kconfig
+@@ -53,6 +53,16 @@ config MTD_TESTS
+ should normally be compiled as kernel modules. The modules perform
+ various checks and verifications when loaded.
+
++config MTD_ROOTFS_ROOT_DEV
++ bool "Automatically set 'rootfs' partition to be root filesystem"
++ depends on MTD_PARTITIONS
++ default y
++
++config MTD_ROOTFS_SPLIT
++ bool "Automatically split 'rootfs' partition for squashfs"
++ depends on MTD_PARTITIONS
++ default y
++
+ config MTD_REDBOOT_PARTS
+ tristate "RedBoot partition table parsing"
+ depends on MTD_PARTITIONS
+--- a/drivers/mtd/mtdpart.c
++++ b/drivers/mtd/mtdpart.c
+@@ -18,6 +18,8 @@
+ #include <linux/mtd/mtd.h>
+ #include <linux/mtd/partitions.h>
+ #include <linux/mtd/compatmac.h>
++#include <linux/root_dev.h>
++#include <linux/magic.h>
+
+ /* Our partition linked list */
+ static LIST_HEAD(mtd_partitions);
+@@ -35,7 +37,7 @@ struct mtd_part {
+ * the pointer to that structure with this macro.
+ */
+ #define PART(x) ((struct mtd_part *)(x))
+-
++#define IS_PART(mtd) (mtd->read == part_read)
+
+ /*
+ * MTD methods which simply translate the effective address and pass through
+@@ -502,6 +504,150 @@ out_register:
+ return slave;
+ }
+
++#ifdef CONFIG_MTD_ROOTFS_SPLIT
++#define ROOTFS_SPLIT_NAME "rootfs_data"
++#define ROOTFS_REMOVED_NAME "<removed>"
++
++struct squashfs_super_block {
++ __le32 s_magic;
++ __le32 pad0[9];
++ __le64 bytes_used;
++};
++
++
++static int split_squashfs(struct mtd_info *master, int offset, int *split_offset)
++{
++ struct squashfs_super_block sb;
++ int len, ret;
++
++ ret = master->read(master, offset, sizeof(sb), &len, (void *) &sb);
++ if (ret || (len != sizeof(sb))) {
++ printk(KERN_ALERT "split_squashfs: error occured while reading "
++ "from \"%s\"\n", master->name);
++ return -EINVAL;
++ }
++
++ if (SQUASHFS_MAGIC != le32_to_cpu(sb.s_magic) ) {
++ printk(KERN_ALERT "split_squashfs: no squashfs found in \"%s\"\n",
++ master->name);
++ *split_offset = 0;
++ return 0;
++ }
++
++ if (le64_to_cpu((sb.bytes_used)) <= 0) {
++ printk(KERN_ALERT "split_squashfs: squashfs is empty in \"%s\"\n",
++ master->name);
++ *split_offset = 0;
++ return 0;
++ }
++
++ len = (u32) le64_to_cpu(sb.bytes_used);
++ len += (offset & 0x000fffff);
++ len += (master->erasesize - 1);
++ len &= ~(master->erasesize - 1);
++ len -= (offset & 0x000fffff);
++ *split_offset = offset + len;
++
++ return 0;
++}
++
++static int split_rootfs_data(struct mtd_info *master, struct mtd_info *rpart, const struct mtd_partition *part)
++{
++ struct mtd_partition *dpart;
++ struct mtd_part *slave = NULL;
++ int split_offset = 0;
++ int ret;
++
++ ret = split_squashfs(master, part->offset, &split_offset);
++ if (ret)
++ return ret;
++
++ if (split_offset <= 0)
++ return 0;
++
++ dpart = kmalloc(sizeof(*part)+sizeof(ROOTFS_SPLIT_NAME)+1, GFP_KERNEL);
++ if (dpart == NULL) {
++ printk(KERN_INFO "split_squashfs: no memory for partition \"%s\"\n",
++ ROOTFS_SPLIT_NAME);
++ return -ENOMEM;
++ }
++
++ memcpy(dpart, part, sizeof(*part));
++ dpart->name = (unsigned char *)&dpart[1];
++ strcpy(dpart->name, ROOTFS_SPLIT_NAME);
++
++ dpart->size -= split_offset - dpart->offset;
++ dpart->offset = split_offset;
++
++ if (dpart == NULL)
++ return 1;
++
++ printk(KERN_INFO "mtd: partition \"%s\" created automatically, ofs=%llX, len=%llX \n",
++ ROOTFS_SPLIT_NAME, dpart->offset, dpart->size);
++
++ slave = add_one_partition(master, dpart, 0, split_offset);
++ if (!slave) {
++ kfree(dpart);
++ return -ENOMEM;
++ }
++ rpart->split = &slave->mtd;
++
++ return 0;
++}
++
++static int refresh_rootfs_split(struct mtd_info *mtd)
++{
++ struct mtd_partition tpart;
++ struct mtd_part *part;
++ char *name;
++ //int index = 0;
++ int offset, size;
++ int ret;
++
++ part = PART(mtd);
++
++ /* check for the new squashfs offset first */
++ ret = split_squashfs(part->master, part->offset, &offset);
++ if (ret)
++ return ret;
++
++ if ((offset > 0) && !mtd->split) {
++ printk(KERN_INFO "%s: creating new split partition for \"%s\"\n", __func__, mtd->name);
++ /* if we don't have a rootfs split partition, create a new one */
++ tpart.name = (char *) mtd->name;
++ tpart.size = mtd->size;
++ tpart.offset = part->offset;
++
++ return split_rootfs_data(part->master, &part->mtd, &tpart);
++ } else if ((offset > 0) && mtd->split) {
++ /* update the offsets of the existing partition */
++ size = mtd->size + part->offset - offset;
++
++ part = PART(mtd->split);
++ part->offset = offset;
++ part->mtd.size = size;
++ printk(KERN_INFO "%s: %s partition \"" ROOTFS_SPLIT_NAME "\", offset: 0x%06x (0x%06x)\n",
++ __func__, (!strcmp(part->mtd.name, ROOTFS_SPLIT_NAME) ? "updating" : "creating"),
++ (u32) part->offset, (u32) part->mtd.size);
++ name = kmalloc(sizeof(ROOTFS_SPLIT_NAME) + 1, GFP_KERNEL);
++ strcpy(name, ROOTFS_SPLIT_NAME);
++ part->mtd.name = name;
++ } else if ((offset <= 0) && mtd->split) {
++ printk(KERN_INFO "%s: removing partition \"%s\"\n", __func__, mtd->split->name);
++
++ /* mark existing partition as removed */
++ part = PART(mtd->split);
++ name = kmalloc(sizeof(ROOTFS_SPLIT_NAME) + 1, GFP_KERNEL);
++ strcpy(name, ROOTFS_REMOVED_NAME);
++ part->mtd.name = name;
++ part->offset = 0;
++ part->mtd.size = 0;
++ }
++
++ return 0;
++}
++#endif /* CONFIG_MTD_ROOTFS_SPLIT */
++
+ /*
+ * This function, given a master MTD object and a partition table, creates
+ * and registers slave MTD objects which are bound to the master according to
+@@ -517,7 +663,7 @@ int add_mtd_partitions(struct mtd_info *
+ {
+ struct mtd_part *slave;
+ uint64_t cur_offset = 0;
+- int i;
++ int i, ret;
+
+ printk(KERN_NOTICE "Creating %d MTD partitions on \"%s\":\n", nbparts, master->name);
+
+@@ -525,6 +671,21 @@ int add_mtd_partitions(struct mtd_info *
+ slave = add_one_partition(master, parts + i, i, cur_offset);
+ if (!slave)
+ return -ENOMEM;
++
++ if (!strcmp(parts[i].name, "rootfs")) {
++#ifdef CONFIG_MTD_ROOTFS_ROOT_DEV
++ if (ROOT_DEV == 0) {
++ printk(KERN_NOTICE "mtd: partition \"rootfs\" "
++ "set to be root filesystem\n");
++ ROOT_DEV = MKDEV(MTD_BLOCK_MAJOR, i);
++ }
++#endif
++#ifdef CONFIG_MTD_ROOTFS_SPLIT
++ ret = split_rootfs_data(master, &slave->mtd, &parts[i]);
++ /* if (ret == 0)
++ j++; */
++#endif
++ }
+ cur_offset = slave->offset + slave->mtd.size;
+ }
+
+@@ -532,6 +693,32 @@ int add_mtd_partitions(struct mtd_info *
+ }
+ EXPORT_SYMBOL(add_mtd_partitions);
+
++int refresh_mtd_partitions(struct mtd_info *mtd)
++{
++ int ret = 0;
++
++ if (IS_PART(mtd)) {
++ struct mtd_part *part;
++ struct mtd_info *master;
++
++ part = PART(mtd);
++ master = part->master;
++ if (master->refresh_device)
++ ret = master->refresh_device(master);
++ }
++
++ if (!ret && mtd->refresh_device)
++ ret = mtd->refresh_device(mtd);
++
++#ifdef CONFIG_MTD_ROOTFS_SPLIT
++ if (!ret && IS_PART(mtd) && !strcmp(mtd->name, "rootfs"))
++ refresh_rootfs_split(mtd);
++#endif
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(refresh_mtd_partitions);
++
+ static DEFINE_SPINLOCK(part_parser_lock);
+ static LIST_HEAD(part_parsers);
+
+--- a/drivers/mtd/devices/block2mtd.c
++++ b/drivers/mtd/devices/block2mtd.c
+@@ -29,6 +29,8 @@ struct block2mtd_dev {
+ struct block_device *blkdev;
+ struct mtd_info mtd;
+ struct mutex write_mutex;
++ rwlock_t bdev_mutex;
++ char devname[0];
+ };
+
+
+@@ -81,6 +83,12 @@ static int block2mtd_erase(struct mtd_in
+ size_t len = instr->len;
+ int err;
+
++ read_lock(&dev->bdev_mutex);
++ if (!dev->blkdev) {
++ err = -EINVAL;
++ goto done;
++ }
++
+ instr->state = MTD_ERASING;
+ mutex_lock(&dev->write_mutex);
+ err = _block2mtd_erase(dev, from, len);
+@@ -93,6 +101,10 @@ static int block2mtd_erase(struct mtd_in
+
+ instr->state = MTD_ERASE_DONE;
+ mtd_erase_callback(instr);
++
++done:
++ read_unlock(&dev->bdev_mutex);
++
+ return err;
+ }
+
+@@ -104,10 +116,14 @@ static int block2mtd_read(struct mtd_inf
+ struct page *page;
+ int index = from >> PAGE_SHIFT;
+ int offset = from & (PAGE_SIZE-1);
+- int cpylen;
++ int cpylen, err = 0;
++
++ read_lock(&dev->bdev_mutex);
++ if (!dev->blkdev || (from > mtd->size)) {
++ err = -EINVAL;
++ goto done;
++ }
+
+- if (from > mtd->size)
+- return -EINVAL;
+ if (from + len > mtd->size)
+ len = mtd->size - from;
+
+@@ -122,10 +138,14 @@ static int block2mtd_read(struct mtd_inf
+ len = len - cpylen;
+
+ page = page_read(dev->blkdev->bd_inode->i_mapping, index);
+- if (!page)
+- return -ENOMEM;
+- if (IS_ERR(page))
+- return PTR_ERR(page);
++ if (!page) {
++ err = -ENOMEM;
++ goto done;
++ }
++ if (IS_ERR(page)) {
++ err = PTR_ERR(page);
++ goto done;
++ }
+
+ memcpy(buf, page_address(page) + offset, cpylen);
+ page_cache_release(page);
+@@ -136,7 +156,10 @@ static int block2mtd_read(struct mtd_inf
+ offset = 0;
+ index++;
+ }
+- return 0;
++
++done:
++ read_unlock(&dev->bdev_mutex);
++ return err;
+ }
+
+
+@@ -188,12 +211,22 @@ static int block2mtd_write(struct mtd_in
+ size_t *retlen, const u_char *buf)
+ {
+ struct block2mtd_dev *dev = mtd->priv;
+- int err;
++ int err = 0;
++
++ read_lock(&dev->bdev_mutex);
++ if (!dev->blkdev) {
++ err = -EINVAL;
++ goto done;
++ }
+
+ if (!len)
+- return 0;
+- if (to >= mtd->size)
+- return -ENOSPC;
++ goto done;
++
++ if (to >= mtd->size) {
++ err = -ENOSPC;
++ goto done;
++ }
++
+ if (to + len > mtd->size)
+ len = mtd->size - to;
+
+@@ -202,6 +235,9 @@ static int block2mtd_write(struct mtd_in
+ mutex_unlock(&dev->write_mutex);
+ if (err > 0)
+ err = 0;
++
++done:
++ read_unlock(&dev->bdev_mutex);
+ return err;
+ }
+
+@@ -210,52 +246,29 @@ static int block2mtd_write(struct mtd_in
+ static void block2mtd_sync(struct mtd_info *mtd)
+ {
+ struct block2mtd_dev *dev = mtd->priv;
+- sync_blockdev(dev->blkdev);
+- return;
+-}
+-
+-
+-static void block2mtd_free_device(struct block2mtd_dev *dev)
+-{
+- if (!dev)
+- return;
+-
+- kfree(dev->mtd.name);
+
+- if (dev->blkdev) {
+- invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping,
+- 0, -1);
+- close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE);
+- }
++ read_lock(&dev->bdev_mutex);
++ if (dev->blkdev)
++ sync_blockdev(dev->blkdev);
++ read_unlock(&dev->bdev_mutex);
+
+- kfree(dev);
++ return;
+ }
+
+
+-/* FIXME: ensure that mtd->size % erase_size == 0 */
+-static struct block2mtd_dev *add_device(char *devname, int erase_size, const char *mtdname)
++static int _open_bdev(struct block2mtd_dev *dev)
+ {
+ struct block_device *bdev;
+- struct block2mtd_dev *dev;
+- struct mtd_partition *part;
+- char *name;
+-
+- if (!devname)
+- return NULL;
+-
+- dev = kzalloc(sizeof(struct block2mtd_dev), GFP_KERNEL);
+- if (!dev)
+- return NULL;
+
+ /* Get a handle on the device */
+- bdev = open_bdev_exclusive(devname, FMODE_READ|FMODE_WRITE, NULL);
++ bdev = open_bdev_exclusive(dev->devname, FMODE_READ|FMODE_WRITE, NULL);
+ #ifndef MODULE
+ if (IS_ERR(bdev)) {
+
+ /* We might not have rootfs mounted at this point. Try
+ to resolve the device name by other means. */
+
+- dev_t devt = name_to_dev_t(devname);
++ dev_t devt = name_to_dev_t(dev->devname);
+ if (devt) {
+ bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ);
+ }
+@@ -263,17 +276,97 @@ static struct block2mtd_dev *add_device(
+ #endif
+
+ if (IS_ERR(bdev)) {
+- ERROR("error: cannot open device %s", devname);
+- goto devinit_err;
++ ERROR("error: cannot open device %s", dev->devname);
++ return 1;
+ }
+ dev->blkdev = bdev;
+
+ if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
+ ERROR("attempting to use an MTD device as a block device");
+- goto devinit_err;
++ return 1;
+ }
+
++ return 0;
++}
++
++static void _close_bdev(struct block2mtd_dev *dev)
++{
++ struct block_device *bdev;
++
++ if (!dev->blkdev)
++ return;
++
++ bdev = dev->blkdev;
++ invalidate_mapping_pages(dev->blkdev->bd_inode->i_mapping, 0, -1);
++ close_bdev_exclusive(dev->blkdev, FMODE_READ|FMODE_WRITE);
++ dev->blkdev = NULL;
++}
++
++static void block2mtd_free_device(struct block2mtd_dev *dev)
++{
++ if (!dev)
++ return;
++
++ kfree(dev->mtd.name);
++ _close_bdev(dev);
++ kfree(dev);
++}
++
++
++static int block2mtd_refresh(struct mtd_info *mtd)
++{
++ struct block2mtd_dev *dev = mtd->priv;
++ struct block_device *bdev;
++ dev_t devt;
++ int err = 0;
++
++ /* no other mtd function can run at this point */
++ write_lock(&dev->bdev_mutex);
++
++ /* get the device number for the whole disk */
++ devt = MKDEV(MAJOR(dev->blkdev->bd_dev), 0);
++
++ /* close the old block device */
++ _close_bdev(dev);
++
++ /* open the whole disk, issue a partition rescan, then */
++ bdev = open_by_devnum(devt, FMODE_WRITE | FMODE_READ);
++ if (!bdev || !bdev->bd_disk)
++ err = -EINVAL;
++ else {
++ err = rescan_partitions(bdev->bd_disk, bdev);
++ }
++ if (bdev)
++ close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
++
++ /* try to open the partition block device again */
++ _open_bdev(dev);
++ write_unlock(&dev->bdev_mutex);
++
++ return err;
++}
++
++/* FIXME: ensure that mtd->size % erase_size == 0 */
++static struct block2mtd_dev *add_device(char *devname, int erase_size, char *mtdname)
++{
++ struct block2mtd_dev *dev;
++ struct mtd_partition *part;
++ char *name;
++
++ if (!devname)
++ return NULL;
++
++ dev = kzalloc(sizeof(struct block2mtd_dev) + strlen(devname) + 1, GFP_KERNEL);
++ if (!dev)
++ return NULL;
++
++ strcpy(dev->devname, devname);
++
++ if (_open_bdev(dev))
++ goto devinit_err;
++
+ mutex_init(&dev->write_mutex);
++ rwlock_init(&dev->bdev_mutex);
+
+ if (!mtdname)
+ mtdname = devname;
+@@ -297,6 +390,7 @@ static struct block2mtd_dev *add_device(
+ dev->mtd.read = block2mtd_read;
+ dev->mtd.priv = dev;
+ dev->mtd.owner = THIS_MODULE;
++ dev->mtd.refresh_device = block2mtd_refresh;
+
+ part = kzalloc(sizeof(struct mtd_partition), GFP_KERNEL);
+ part->name = dev->mtd.name;
+--- a/drivers/mtd/mtdchar.c
++++ b/drivers/mtd/mtdchar.c
+@@ -18,6 +18,7 @@
+
+ #include <linux/mtd/mtd.h>
+ #include <linux/mtd/compatmac.h>
++#include <linux/mtd/partitions.h>
+
+ #include <asm/uaccess.h>
+
+@@ -814,6 +815,13 @@ static int mtd_ioctl(struct inode *inode
+ file->f_pos = 0;
+ break;
+ }
++#ifdef CONFIG_MTD_PARTITIONS
++ case MTDREFRESH:
++ {
++ ret = refresh_mtd_partitions(mtd);
++ break;
++ }
++#endif
+
+ default:
+ ret = -ENOTTY;
+--- a/include/linux/mtd/mtd.h
++++ b/include/linux/mtd/mtd.h
+@@ -101,6 +101,7 @@ struct mtd_oob_ops {
+ uint8_t *oobbuf;
+ };
+
++struct mtd_info;
+ struct mtd_info {
+ u_char type;
+ uint32_t flags;
+@@ -241,6 +242,9 @@ struct mtd_info {
+ struct device dev;
+ int usecount;
+
++ int (*refresh_device)(struct mtd_info *mtd);
++ struct mtd_info *split;
++
+ /* If the driver is something smart, like UBI, it may need to maintain
+ * its own reference counting. The below functions are only for driver.
+ * The driver may register its callbacks. These callbacks are not
+--- a/include/linux/mtd/partitions.h
++++ b/include/linux/mtd/partitions.h
+@@ -34,12 +34,14 @@
+ * erasesize aligned (e.g. use MTDPART_OFS_NEXTBLK).
+ */
+
++struct mtd_partition;
+ struct mtd_partition {
+ char *name; /* identifier string */
+ uint64_t size; /* partition size */
+ uint64_t offset; /* offset within the master MTD space */
+ uint32_t mask_flags; /* master MTD flags to mask out for this partition */
+ struct nand_ecclayout *ecclayout; /* out of band layout for this partition (NAND only)*/
++ int (*refresh_partition)(struct mtd_info *);
+ };
+
+ #define MTDPART_OFS_NXTBLK (-2)
+@@ -51,6 +53,7 @@ struct mtd_info;
+
+ int add_mtd_partitions(struct mtd_info *, const struct mtd_partition *, int);
+ int del_mtd_partitions(struct mtd_info *);
++int refresh_mtd_partitions(struct mtd_info *);
+
+ /*
+ * Functions dealing with the various ways of partitioning the space
+--- a/include/mtd/mtd-abi.h
++++ b/include/mtd/mtd-abi.h
+@@ -110,6 +110,7 @@ struct otp_info {
+ #define MEMERASE64 _IOW('M', 20, struct erase_info_user64)
+ #define MEMWRITEOOB64 _IOWR('M', 21, struct mtd_oob_buf64)
+ #define MEMREADOOB64 _IOWR('M', 22, struct mtd_oob_buf64)
++#define MTDREFRESH _IO('M', 23)
+
+ /*
+ * Obsolete legacy interface. Keep it in order not to break userspace
--- /dev/null
+--- a/drivers/mtd/redboot.c
++++ b/drivers/mtd/redboot.c
+@@ -249,14 +249,21 @@ static int parse_redboot_partitions(stru
+ #endif
+ names += strlen(names)+1;
+
+-#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED
+ if(fl->next && fl->img->flash_base + fl->img->size + master->erasesize <= fl->next->img->flash_base) {
+- i++;
+- parts[i].offset = parts[i-1].size + parts[i-1].offset;
+- parts[i].size = fl->next->img->flash_base - parts[i].offset;
+- parts[i].name = nullname;
+- }
++ if (!strcmp(parts[i].name, "rootfs")) {
++ parts[i].size = fl->next->img->flash_base;
++ parts[i].size &= ~(master->erasesize - 1);
++ parts[i].size -= parts[i].offset;
++#ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED
++ nrparts--;
++ } else {
++ i++;
++ parts[i].offset = parts[i-1].size + parts[i-1].offset;
++ parts[i].size = fl->next->img->flash_base - parts[i].offset;
++ parts[i].name = nullname;
+ #endif
++ }
++ }
+ tmp_fl = fl;
+ fl = fl->next;
+ kfree(tmp_fl);
--- /dev/null
+--- a/drivers/mtd/redboot.c
++++ b/drivers/mtd/redboot.c
+@@ -11,6 +11,8 @@
+ #include <linux/mtd/mtd.h>
+ #include <linux/mtd/partitions.h>
+
++#define BOARD_CONFIG_PART "boardconfig"
++
+ struct fis_image_desc {
+ unsigned char name[16]; // Null terminated name
+ uint32_t flash_base; // Address within FLASH of image
+@@ -41,6 +43,7 @@ static int parse_redboot_partitions(stru
+ struct mtd_partition **pparts,
+ unsigned long fis_origin)
+ {
++ unsigned long max_offset = 0;
+ int nrparts = 0;
+ struct fis_image_desc *buf;
+ struct mtd_partition *parts;
+@@ -209,14 +212,14 @@ static int parse_redboot_partitions(stru
+ }
+ }
+ #endif
+- parts = kzalloc(sizeof(*parts)*nrparts + nulllen + namelen, GFP_KERNEL);
++ parts = kzalloc(sizeof(*parts) * (nrparts + 1) + nulllen + namelen + sizeof(BOARD_CONFIG_PART), GFP_KERNEL);
+
+ if (!parts) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+- nullname = (char *)&parts[nrparts];
++ nullname = (char *)&parts[nrparts + 1];
+ #ifdef CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED
+ if (nulllen > 0) {
+ strcpy(nullname, nullstring);
+@@ -235,6 +238,8 @@ static int parse_redboot_partitions(stru
+ }
+ #endif
+ for ( ; i<nrparts; i++) {
++ if(max_offset < buf[i].flash_base + buf[i].size)
++ max_offset = buf[i].flash_base + buf[i].size;
+ parts[i].size = fl->img->size;
+ parts[i].offset = fl->img->flash_base;
+ parts[i].name = names;
+@@ -268,6 +273,14 @@ static int parse_redboot_partitions(stru
+ fl = fl->next;
+ kfree(tmp_fl);
+ }
++ if(master->size - max_offset >= master->erasesize)
++ {
++ parts[nrparts].size = master->size - max_offset;
++ parts[nrparts].offset = max_offset;
++ parts[nrparts].name = names;
++ strcpy(names, BOARD_CONFIG_PART);
++ nrparts++;
++ }
+ ret = nrparts;
+ *pparts = parts;
+ out:
--- /dev/null
+--- a/include/linux/mtd/nand.h
++++ b/include/linux/mtd/nand.h
+@@ -575,6 +575,7 @@ struct platform_nand_chip {
+ int chip_delay;
+ unsigned int options;
+ const char **part_probe_types;
++ int (*chip_fixup)(struct mtd_info *mtd);
+ void (*set_parts)(uint64_t size,
+ struct platform_nand_chip *chip);
+ void *priv;
+--- a/drivers/mtd/nand/plat_nand.c
++++ b/drivers/mtd/nand/plat_nand.c
+@@ -80,7 +80,18 @@ static int __devinit plat_nand_probe(str
+ }
+
+ /* Scan to find existance of the device */
+- if (nand_scan(&data->mtd, 1)) {
++ if (nand_scan_ident(&data->mtd, 1)) {
++ res = -ENXIO;
++ goto out;
++ }
++
++ if (pdata->chip.chip_fixup) {
++ res = pdata->chip.chip_fixup(&data->mtd);
++ if (res)
++ goto out;
++ }
++
++ if (nand_scan_tail(&data->mtd)) {
+ res = -ENXIO;
+ goto out;
+ }
--- /dev/null
+--- a/drivers/mtd/Kconfig
++++ b/drivers/mtd/Kconfig
+@@ -181,6 +181,22 @@ config MTD_AR7_PARTS
+ ---help---
+ TI AR7 partitioning support
+
++config MTD_MYLOADER_PARTS
++ tristate "MyLoader partition parsing"
++ depends on MTD_PARTITIONS && (ADM5120 || ATHEROS_AR231X || ATHEROS_AR71XX)
++ ---help---
++ MyLoader is a bootloader which allows the user to define partitions
++ in flash devices, by putting a table in the second erase block
++ on the device, similar to a partition table. This table gives the
++ offsets and lengths of the user defined partitions.
++
++ If you need code which can detect and parse these tables, and
++ register MTD 'partitions' corresponding to each image detected,
++ enable this option.
++
++ You will still need the parsing functions to be called by the driver
++ for your particular device. It won't happen automatically.
++
+ comment "User Modules And Translation Layers"
+
+ config MTD_CHAR
+--- a/drivers/mtd/Makefile
++++ b/drivers/mtd/Makefile
+@@ -13,6 +13,7 @@ obj-$(CONFIG_MTD_CMDLINE_PARTS) += cmdli
+ obj-$(CONFIG_MTD_AFS_PARTS) += afs.o
+ obj-$(CONFIG_MTD_AR7_PARTS) += ar7part.o
+ obj-$(CONFIG_MTD_OF_PARTS) += ofpart.o
++obj-$(CONFIG_MTD_MYLOADER_PARTS) += myloader.o
+
+ # 'Users' - code which presents functionality to userspace.
+ obj-$(CONFIG_MTD_CHAR) += mtdchar.o
--- /dev/null
+--- a/drivers/mtd/nand/nand_ecc.c
++++ b/drivers/mtd/nand/nand_ecc.c
+@@ -492,8 +492,7 @@ int nand_correct_data(struct mtd_info *m
+ if ((bitsperbyte[b0] + bitsperbyte[b1] + bitsperbyte[b2]) == 1)
+ return 1; /* error in ecc data; no action needed */
+
+- printk(KERN_ERR "uncorrectable error : ");
+- return -1;
++ return -EBADMSG;
+ }
+ EXPORT_SYMBOL(nand_correct_data);
+
--- /dev/null
+--- /dev/null
++++ b/include/linux/netfilter/xt_layer7.h
+@@ -0,0 +1,13 @@
++#ifndef _XT_LAYER7_H
++#define _XT_LAYER7_H
++
++#define MAX_PATTERN_LEN 8192
++#define MAX_PROTOCOL_LEN 256
++
++struct xt_layer7_info {
++ char protocol[MAX_PROTOCOL_LEN];
++ char pattern[MAX_PATTERN_LEN];
++ u_int8_t invert;
++};
++
++#endif /* _XT_LAYER7_H */
+--- a/include/net/netfilter/nf_conntrack.h
++++ b/include/net/netfilter/nf_conntrack.h
+@@ -116,6 +116,22 @@ struct nf_conn {
+ u_int32_t secmark;
+ #endif
+
++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || \
++ defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++ struct {
++ /*
++ * e.g. "http". NULL before decision. "unknown" after decision
++ * if no match.
++ */
++ char *app_proto;
++ /*
++ * application layer data so far. NULL after match decision.
++ */
++ char *app_data;
++ unsigned int app_data_len;
++ } layer7;
++#endif
++
+ /* Storage reserved for other modules: */
+ union nf_conntrack_proto proto;
+
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -858,6 +858,27 @@ config NETFILTER_XT_MATCH_STATE
+
+ To compile it as a module, choose M here. If unsure, say N.
+
++config NETFILTER_XT_MATCH_LAYER7
++ tristate '"layer7" match support'
++ depends on NETFILTER_XTABLES
++ depends on EXPERIMENTAL && (IP_NF_CONNTRACK || NF_CONNTRACK)
++ depends on NF_CT_ACCT
++ help
++ Say Y if you want to be able to classify connections (and their
++ packets) based on regular expression matching of their application
++ layer data. This is one way to classify applications such as
++ peer-to-peer filesharing systems that do not always use the same
++ port.
++
++ To compile it as a module, choose M here. If unsure, say N.
++
++config NETFILTER_XT_MATCH_LAYER7_DEBUG
++ bool 'Layer 7 debugging output'
++ depends on NETFILTER_XT_MATCH_LAYER7
++ help
++ Say Y to get lots of debugging output.
++
++
+ config NETFILTER_XT_MATCH_STATISTIC
+ tristate '"statistic" match support'
+ depends on NETFILTER_ADVANCED
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -89,6 +89,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
+ obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
++obj-$(CONFIG_NETFILTER_XT_MATCH_LAYER7) += xt_layer7.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
+ obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
+--- a/net/netfilter/nf_conntrack_core.c
++++ b/net/netfilter/nf_conntrack_core.c
+@@ -200,6 +200,14 @@ destroy_conntrack(struct nf_conntrack *n
+ * too. */
+ nf_ct_remove_expectations(ct);
+
++ #if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++ if(ct->layer7.app_proto)
++ kfree(ct->layer7.app_proto);
++ if(ct->layer7.app_data)
++ kfree(ct->layer7.app_data);
++ #endif
++
++
+ /* We overload first tuple to link into unconfirmed list. */
+ if (!nf_ct_is_confirmed(ct)) {
+ BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
+--- a/net/netfilter/nf_conntrack_standalone.c
++++ b/net/netfilter/nf_conntrack_standalone.c
+@@ -171,6 +171,12 @@ static int ct_seq_show(struct seq_file *
+ goto release;
+ #endif
+
++#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
++ if(ct->layer7.app_proto &&
++ seq_printf(s, "l7proto=%s ", ct->layer7.app_proto))
++ return -ENOSPC;
++#endif
++
+ if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ goto release;
+
+--- /dev/null
++++ b/net/netfilter/regexp/regexp.c
+@@ -0,0 +1,1197 @@
++/*
++ * regcomp and regexec -- regsub and regerror are elsewhere
++ * @(#)regexp.c 1.3 of 18 April 87
++ *
++ * Copyright (c) 1986 by University of Toronto.
++ * Written by Henry Spencer. Not derived from licensed software.
++ *
++ * Permission is granted to anyone to use this software for any
++ * purpose on any computer system, and to redistribute it freely,
++ * subject to the following restrictions:
++ *
++ * 1. The author is not responsible for the consequences of use of
++ * this software, no matter how awful, even if they arise
++ * from defects in it.
++ *
++ * 2. The origin of this software must not be misrepresented, either
++ * by explicit claim or by omission.
++ *
++ * 3. Altered versions must be plainly marked as such, and must not
++ * be misrepresented as being the original software.
++ *
++ * Beware that some of this code is subtly aware of the way operator
++ * precedence is structured in regular expressions. Serious changes in
++ * regular-expression syntax might require a total rethink.
++ *
++ * This code was modified by Ethan Sommer to work within the kernel
++ * (it now uses kmalloc etc..)
++ *
++ * Modified slightly by Matthew Strait to use more modern C.
++ */
++
++#include "regexp.h"
++#include "regmagic.h"
++
++/* added by ethan and matt. Lets it work in both kernel and user space.
++(So iptables can use it, for instance.) Yea, it goes both ways... */
++#if __KERNEL__
++ #define malloc(foo) kmalloc(foo,GFP_ATOMIC)
++#else
++ #define printk(format,args...) printf(format,##args)
++#endif
++
++void regerror(char * s)
++{
++ printk("<3>Regexp: %s\n", s);
++ /* NOTREACHED */
++}
++
++/*
++ * The "internal use only" fields in regexp.h are present to pass info from
++ * compile to execute that permits the execute phase to run lots faster on
++ * simple cases. They are:
++ *
++ * regstart char that must begin a match; '\0' if none obvious
++ * reganch is the match anchored (at beginning-of-line only)?
++ * regmust string (pointer into program) that match must include, or NULL
++ * regmlen length of regmust string
++ *
++ * Regstart and reganch permit very fast decisions on suitable starting points
++ * for a match, cutting down the work a lot. Regmust permits fast rejection
++ * of lines that cannot possibly match. The regmust tests are costly enough
++ * that regcomp() supplies a regmust only if the r.e. contains something
++ * potentially expensive (at present, the only such thing detected is * or +
++ * at the start of the r.e., which can involve a lot of backup). Regmlen is
++ * supplied because the test in regexec() needs it and regcomp() is computing
++ * it anyway.
++ */
++
++/*
++ * Structure for regexp "program". This is essentially a linear encoding
++ * of a nondeterministic finite-state machine (aka syntax charts or
++ * "railroad normal form" in parsing technology). Each node is an opcode
++ * plus a "next" pointer, possibly plus an operand. "Next" pointers of
++ * all nodes except BRANCH implement concatenation; a "next" pointer with
++ * a BRANCH on both ends of it is connecting two alternatives. (Here we
++ * have one of the subtle syntax dependencies: an individual BRANCH (as
++ * opposed to a collection of them) is never concatenated with anything
++ * because of operator precedence.) The operand of some types of node is
++ * a literal string; for others, it is a node leading into a sub-FSM. In
++ * particular, the operand of a BRANCH node is the first node of the branch.
++ * (NB this is *not* a tree structure: the tail of the branch connects
++ * to the thing following the set of BRANCHes.) The opcodes are:
++ */
++
++/* definition number opnd? meaning */
++#define END 0 /* no End of program. */
++#define BOL 1 /* no Match "" at beginning of line. */
++#define EOL 2 /* no Match "" at end of line. */
++#define ANY 3 /* no Match any one character. */
++#define ANYOF 4 /* str Match any character in this string. */
++#define ANYBUT 5 /* str Match any character not in this string. */
++#define BRANCH 6 /* node Match this alternative, or the next... */
++#define BACK 7 /* no Match "", "next" ptr points backward. */
++#define EXACTLY 8 /* str Match this string. */
++#define NOTHING 9 /* no Match empty string. */
++#define STAR 10 /* node Match this (simple) thing 0 or more times. */
++#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
++#define OPEN 20 /* no Mark this point in input as start of #n. */
++ /* OPEN+1 is number 1, etc. */
++#define CLOSE 30 /* no Analogous to OPEN. */
++
++/*
++ * Opcode notes:
++ *
++ * BRANCH The set of branches constituting a single choice are hooked
++ * together with their "next" pointers, since precedence prevents
++ * anything being concatenated to any individual branch. The
++ * "next" pointer of the last BRANCH in a choice points to the
++ * thing following the whole choice. This is also where the
++ * final "next" pointer of each individual branch points; each
++ * branch starts with the operand node of a BRANCH node.
++ *
++ * BACK Normal "next" pointers all implicitly point forward; BACK
++ * exists to make loop structures possible.
++ *
++ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
++ * BRANCH structures using BACK. Simple cases (one character
++ * per match) are implemented with STAR and PLUS for speed
++ * and to minimize recursive plunges.
++ *
++ * OPEN,CLOSE ...are numbered at compile time.
++ */
++
++/*
++ * A node is one char of opcode followed by two chars of "next" pointer.
++ * "Next" pointers are stored as two 8-bit pieces, high order first. The
++ * value is a positive offset from the opcode of the node containing it.
++ * An operand, if any, simply follows the node. (Note that much of the
++ * code generation knows about this implicit relationship.)
++ *
++ * Using two bytes for the "next" pointer is vast overkill for most things,
++ * but allows patterns to get big without disasters.
++ */
++#define OP(p) (*(p))
++#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
++#define OPERAND(p) ((p) + 3)
++
++/*
++ * See regmagic.h for one further detail of program structure.
++ */
++
++
++/*
++ * Utility definitions.
++ */
++#ifndef CHARBITS
++#define UCHARAT(p) ((int)*(unsigned char *)(p))
++#else
++#define UCHARAT(p) ((int)*(p)&CHARBITS)
++#endif
++
++#define FAIL(m) { regerror(m); return(NULL); }
++#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
++#define META "^$.[()|?+*\\"
++
++/*
++ * Flags to be passed up and down.
++ */
++#define HASWIDTH 01 /* Known never to match null string. */
++#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
++#define SPSTART 04 /* Starts with * or +. */
++#define WORST 0 /* Worst case. */
++
++/*
++ * Global work variables for regcomp().
++ */
++struct match_globals {
++char *reginput; /* String-input pointer. */
++char *regbol; /* Beginning of input, for ^ check. */
++char **regstartp; /* Pointer to startp array. */
++char **regendp; /* Ditto for endp. */
++char *regparse; /* Input-scan pointer. */
++int regnpar; /* () count. */
++char regdummy;
++char *regcode; /* Code-emit pointer; ®dummy = don't. */
++long regsize; /* Code size. */
++};
++
++/*
++ * Forward declarations for regcomp()'s friends.
++ */
++#ifndef STATIC
++#define STATIC static
++#endif
++STATIC char *reg(struct match_globals *g, int paren,int *flagp);
++STATIC char *regbranch(struct match_globals *g, int *flagp);
++STATIC char *regpiece(struct match_globals *g, int *flagp);
++STATIC char *regatom(struct match_globals *g, int *flagp);
++STATIC char *regnode(struct match_globals *g, char op);
++STATIC char *regnext(struct match_globals *g, char *p);
++STATIC void regc(struct match_globals *g, char b);
++STATIC void reginsert(struct match_globals *g, char op, char *opnd);
++STATIC void regtail(struct match_globals *g, char *p, char *val);
++STATIC void regoptail(struct match_globals *g, char *p, char *val);
++
++
++__kernel_size_t my_strcspn(const char *s1,const char *s2)
++{
++ char *scan1;
++ char *scan2;
++ int count;
++
++ count = 0;
++ for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
++ for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */
++ if (*scan1 == *scan2++)
++ return(count);
++ count++;
++ }
++ return(count);
++}
++
++/*
++ - regcomp - compile a regular expression into internal code
++ *
++ * We can't allocate space until we know how big the compiled form will be,
++ * but we can't compile it (and thus know how big it is) until we've got a
++ * place to put the code. So we cheat: we compile it twice, once with code
++ * generation turned off and size counting turned on, and once "for real".
++ * This also means that we don't allocate space until we are sure that the
++ * thing really will compile successfully, and we never have to move the
++ * code and thus invalidate pointers into it. (Note that it has to be in
++ * one piece because free() must be able to free it all.)
++ *
++ * Beware that the optimization-preparation code in here knows about some
++ * of the structure of the compiled regexp.
++ */
++regexp *
++regcomp(char *exp,int *patternsize)
++{
++ register regexp *r;
++ register char *scan;
++ register char *longest;
++ register int len;
++ int flags;
++ struct match_globals g;
++
++ /* commented out by ethan
++ extern char *malloc();
++ */
++
++ if (exp == NULL)
++ FAIL("NULL argument");
++
++ /* First pass: determine size, legality. */
++ g.regparse = exp;
++ g.regnpar = 1;
++ g.regsize = 0L;
++ g.regcode = &g.regdummy;
++ regc(&g, MAGIC);
++ if (reg(&g, 0, &flags) == NULL)
++ return(NULL);
++
++ /* Small enough for pointer-storage convention? */
++ if (g.regsize >= 32767L) /* Probably could be 65535L. */
++ FAIL("regexp too big");
++
++ /* Allocate space. */
++ *patternsize=sizeof(regexp) + (unsigned)g.regsize;
++ r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize);
++ if (r == NULL)
++ FAIL("out of space");
++
++ /* Second pass: emit code. */
++ g.regparse = exp;
++ g.regnpar = 1;
++ g.regcode = r->program;
++ regc(&g, MAGIC);
++ if (reg(&g, 0, &flags) == NULL)
++ return(NULL);
++
++ /* Dig out information for optimizations. */
++ r->regstart = '\0'; /* Worst-case defaults. */
++ r->reganch = 0;
++ r->regmust = NULL;
++ r->regmlen = 0;
++ scan = r->program+1; /* First BRANCH. */
++ if (OP(regnext(&g, scan)) == END) { /* Only one top-level choice. */
++ scan = OPERAND(scan);
++
++ /* Starting-point info. */
++ if (OP(scan) == EXACTLY)
++ r->regstart = *OPERAND(scan);
++ else if (OP(scan) == BOL)
++ r->reganch++;
++
++ /*
++ * If there's something expensive in the r.e., find the
++ * longest literal string that must appear and make it the
++ * regmust. Resolve ties in favor of later strings, since
++ * the regstart check works with the beginning of the r.e.
++ * and avoiding duplication strengthens checking. Not a
++ * strong reason, but sufficient in the absence of others.
++ */
++ if (flags&SPSTART) {
++ longest = NULL;
++ len = 0;
++ for (; scan != NULL; scan = regnext(&g, scan))
++ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
++ longest = OPERAND(scan);
++ len = strlen(OPERAND(scan));
++ }
++ r->regmust = longest;
++ r->regmlen = len;
++ }
++ }
++
++ return(r);
++}
++
++/*
++ - reg - regular expression, i.e. main body or parenthesized thing
++ *
++ * Caller must absorb opening parenthesis.
++ *
++ * Combining parenthesis handling with the base level of regular expression
++ * is a trifle forced, but the need to tie the tails of the branches to what
++ * follows makes it hard to avoid.
++ */
++static char *
++reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ )
++{
++ register char *ret;
++ register char *br;
++ register char *ender;
++ register int parno = 0; /* 0 makes gcc happy */
++ int flags;
++
++ *flagp = HASWIDTH; /* Tentatively. */
++
++ /* Make an OPEN node, if parenthesized. */
++ if (paren) {
++ if (g->regnpar >= NSUBEXP)
++ FAIL("too many ()");
++ parno = g->regnpar;
++ g->regnpar++;
++ ret = regnode(g, OPEN+parno);
++ } else
++ ret = NULL;
++
++ /* Pick up the branches, linking them together. */
++ br = regbranch(g, &flags);
++ if (br == NULL)
++ return(NULL);
++ if (ret != NULL)
++ regtail(g, ret, br); /* OPEN -> first. */
++ else
++ ret = br;
++ if (!(flags&HASWIDTH))
++ *flagp &= ~HASWIDTH;
++ *flagp |= flags&SPSTART;
++ while (*g->regparse == '|') {
++ g->regparse++;
++ br = regbranch(g, &flags);
++ if (br == NULL)
++ return(NULL);
++ regtail(g, ret, br); /* BRANCH -> BRANCH. */
++ if (!(flags&HASWIDTH))
++ *flagp &= ~HASWIDTH;
++ *flagp |= flags&SPSTART;
++ }
++
++ /* Make a closing node, and hook it on the end. */
++ ender = regnode(g, (paren) ? CLOSE+parno : END);
++ regtail(g, ret, ender);
++
++ /* Hook the tails of the branches to the closing node. */
++ for (br = ret; br != NULL; br = regnext(g, br))
++ regoptail(g, br, ender);
++
++ /* Check for proper termination. */
++ if (paren && *g->regparse++ != ')') {
++ FAIL("unmatched ()");
++ } else if (!paren && *g->regparse != '\0') {
++ if (*g->regparse == ')') {
++ FAIL("unmatched ()");
++ } else
++ FAIL("junk on end"); /* "Can't happen". */
++ /* NOTREACHED */
++ }
++
++ return(ret);
++}
++
++/*
++ - regbranch - one alternative of an | operator
++ *
++ * Implements the concatenation operator.
++ */
++static char *
++regbranch(struct match_globals *g, int *flagp)
++{
++ register char *ret;
++ register char *chain;
++ register char *latest;
++ int flags;
++
++ *flagp = WORST; /* Tentatively. */
++
++ ret = regnode(g, BRANCH);
++ chain = NULL;
++ while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') {
++ latest = regpiece(g, &flags);
++ if (latest == NULL)
++ return(NULL);
++ *flagp |= flags&HASWIDTH;
++ if (chain == NULL) /* First piece. */
++ *flagp |= flags&SPSTART;
++ else
++ regtail(g, chain, latest);
++ chain = latest;
++ }
++ if (chain == NULL) /* Loop ran zero times. */
++ (void) regnode(g, NOTHING);
++
++ return(ret);
++}
++
++/*
++ - regpiece - something followed by possible [*+?]
++ *
++ * Note that the branching code sequences used for ? and the general cases
++ * of * and + are somewhat optimized: they use the same NOTHING node as
++ * both the endmarker for their branch list and the body of the last branch.
++ * It might seem that this node could be dispensed with entirely, but the
++ * endmarker role is not redundant.
++ */
++static char *
++regpiece(struct match_globals *g, int *flagp)
++{
++ register char *ret;
++ register char op;
++ register char *next;
++ int flags;
++
++ ret = regatom(g, &flags);
++ if (ret == NULL)
++ return(NULL);
++
++ op = *g->regparse;
++ if (!ISMULT(op)) {
++ *flagp = flags;
++ return(ret);
++ }
++
++ if (!(flags&HASWIDTH) && op != '?')
++ FAIL("*+ operand could be empty");
++ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
++
++ if (op == '*' && (flags&SIMPLE))
++ reginsert(g, STAR, ret);
++ else if (op == '*') {
++ /* Emit x* as (x&|), where & means "self". */
++ reginsert(g, BRANCH, ret); /* Either x */
++ regoptail(g, ret, regnode(g, BACK)); /* and loop */
++ regoptail(g, ret, ret); /* back */
++ regtail(g, ret, regnode(g, BRANCH)); /* or */
++ regtail(g, ret, regnode(g, NOTHING)); /* null. */
++ } else if (op == '+' && (flags&SIMPLE))
++ reginsert(g, PLUS, ret);
++ else if (op == '+') {
++ /* Emit x+ as x(&|), where & means "self". */
++ next = regnode(g, BRANCH); /* Either */
++ regtail(g, ret, next);
++ regtail(g, regnode(g, BACK), ret); /* loop back */
++ regtail(g, next, regnode(g, BRANCH)); /* or */
++ regtail(g, ret, regnode(g, NOTHING)); /* null. */
++ } else if (op == '?') {
++ /* Emit x? as (x|) */
++ reginsert(g, BRANCH, ret); /* Either x */
++ regtail(g, ret, regnode(g, BRANCH)); /* or */
++ next = regnode(g, NOTHING); /* null. */
++ regtail(g, ret, next);
++ regoptail(g, ret, next);
++ }
++ g->regparse++;
++ if (ISMULT(*g->regparse))
++ FAIL("nested *?+");
++
++ return(ret);
++}
++
++/*
++ - regatom - the lowest level
++ *
++ * Optimization: gobbles an entire sequence of ordinary characters so that
++ * it can turn them into a single node, which is smaller to store and
++ * faster to run. Backslashed characters are exceptions, each becoming a
++ * separate node; the code is simpler that way and it's not worth fixing.
++ */
++static char *
++regatom(struct match_globals *g, int *flagp)
++{
++ register char *ret;
++ int flags;
++
++ *flagp = WORST; /* Tentatively. */
++
++ switch (*g->regparse++) {
++ case '^':
++ ret = regnode(g, BOL);
++ break;
++ case '$':
++ ret = regnode(g, EOL);
++ break;
++ case '.':
++ ret = regnode(g, ANY);
++ *flagp |= HASWIDTH|SIMPLE;
++ break;
++ case '[': {
++ register int class;
++ register int classend;
++
++ if (*g->regparse == '^') { /* Complement of range. */
++ ret = regnode(g, ANYBUT);
++ g->regparse++;
++ } else
++ ret = regnode(g, ANYOF);
++ if (*g->regparse == ']' || *g->regparse == '-')
++ regc(g, *g->regparse++);
++ while (*g->regparse != '\0' && *g->regparse != ']') {
++ if (*g->regparse == '-') {
++ g->regparse++;
++ if (*g->regparse == ']' || *g->regparse == '\0')
++ regc(g, '-');
++ else {
++ class = UCHARAT(g->regparse-2)+1;
++ classend = UCHARAT(g->regparse);
++ if (class > classend+1)
++ FAIL("invalid [] range");
++ for (; class <= classend; class++)
++ regc(g, class);
++ g->regparse++;
++ }
++ } else
++ regc(g, *g->regparse++);
++ }
++ regc(g, '\0');
++ if (*g->regparse != ']')
++ FAIL("unmatched []");
++ g->regparse++;
++ *flagp |= HASWIDTH|SIMPLE;
++ }
++ break;
++ case '(':
++ ret = reg(g, 1, &flags);
++ if (ret == NULL)
++ return(NULL);
++ *flagp |= flags&(HASWIDTH|SPSTART);
++ break;
++ case '\0':
++ case '|':
++ case ')':
++ FAIL("internal urp"); /* Supposed to be caught earlier. */
++ break;
++ case '?':
++ case '+':
++ case '*':
++ FAIL("?+* follows nothing");
++ break;
++ case '\\':
++ if (*g->regparse == '\0')
++ FAIL("trailing \\");
++ ret = regnode(g, EXACTLY);
++ regc(g, *g->regparse++);
++ regc(g, '\0');
++ *flagp |= HASWIDTH|SIMPLE;
++ break;
++ default: {
++ register int len;
++ register char ender;
++
++ g->regparse--;
++ len = my_strcspn((const char *)g->regparse, (const char *)META);
++ if (len <= 0)
++ FAIL("internal disaster");
++ ender = *(g->regparse+len);
++ if (len > 1 && ISMULT(ender))
++ len--; /* Back off clear of ?+* operand. */
++ *flagp |= HASWIDTH;
++ if (len == 1)
++ *flagp |= SIMPLE;
++ ret = regnode(g, EXACTLY);
++ while (len > 0) {
++ regc(g, *g->regparse++);
++ len--;
++ }
++ regc(g, '\0');
++ }
++ break;
++ }
++
++ return(ret);
++}
++
++/*
++ - regnode - emit a node
++ */
++static char * /* Location. */
++regnode(struct match_globals *g, char op)
++{
++ register char *ret;
++ register char *ptr;
++
++ ret = g->regcode;
++ if (ret == &g->regdummy) {
++ g->regsize += 3;
++ return(ret);
++ }
++
++ ptr = ret;
++ *ptr++ = op;
++ *ptr++ = '\0'; /* Null "next" pointer. */
++ *ptr++ = '\0';
++ g->regcode = ptr;
++
++ return(ret);
++}
++
++/*
++ - regc - emit (if appropriate) a byte of code
++ */
++static void
++regc(struct match_globals *g, char b)
++{
++ if (g->regcode != &g->regdummy)
++ *g->regcode++ = b;
++ else
++ g->regsize++;
++}
++
++/*
++ - reginsert - insert an operator in front of already-emitted operand
++ *
++ * Means relocating the operand.
++ */
++static void
++reginsert(struct match_globals *g, char op, char* opnd)
++{
++ register char *src;
++ register char *dst;
++ register char *place;
++
++ if (g->regcode == &g->regdummy) {
++ g->regsize += 3;
++ return;
++ }
++
++ src = g->regcode;
++ g->regcode += 3;
++ dst = g->regcode;
++ while (src > opnd)
++ *--dst = *--src;
++
++ place = opnd; /* Op node, where operand used to be. */
++ *place++ = op;
++ *place++ = '\0';
++ *place++ = '\0';
++}
++
++/*
++ - regtail - set the next-pointer at the end of a node chain
++ */
++static void
++regtail(struct match_globals *g, char *p, char *val)
++{
++ register char *scan;
++ register char *temp;
++ register int offset;
++
++ if (p == &g->regdummy)
++ return;
++
++ /* Find last node. */
++ scan = p;
++ for (;;) {
++ temp = regnext(g, scan);
++ if (temp == NULL)
++ break;
++ scan = temp;
++ }
++
++ if (OP(scan) == BACK)
++ offset = scan - val;
++ else
++ offset = val - scan;
++ *(scan+1) = (offset>>8)&0377;
++ *(scan+2) = offset&0377;
++}
++
++/*
++ - regoptail - regtail on operand of first argument; nop if operandless
++ */
++static void
++regoptail(struct match_globals *g, char *p, char *val)
++{
++ /* "Operandless" and "op != BRANCH" are synonymous in practice. */
++ if (p == NULL || p == &g->regdummy || OP(p) != BRANCH)
++ return;
++ regtail(g, OPERAND(p), val);
++}
++
++/*
++ * regexec and friends
++ */
++
++
++/*
++ * Forwards.
++ */
++STATIC int regtry(struct match_globals *g, regexp *prog, char *string);
++STATIC int regmatch(struct match_globals *g, char *prog);
++STATIC int regrepeat(struct match_globals *g, char *p);
++
++#ifdef DEBUG
++int regnarrate = 0;
++void regdump();
++STATIC char *regprop(char *op);
++#endif
++
++/*
++ - regexec - match a regexp against a string
++ */
++int
++regexec(regexp *prog, char *string)
++{
++ register char *s;
++ struct match_globals g;
++
++ /* Be paranoid... */
++ if (prog == NULL || string == NULL) {
++ printk("<3>Regexp: NULL parameter\n");
++ return(0);
++ }
++
++ /* Check validity of program. */
++ if (UCHARAT(prog->program) != MAGIC) {
++ printk("<3>Regexp: corrupted program\n");
++ return(0);
++ }
++
++ /* If there is a "must appear" string, look for it. */
++ if (prog->regmust != NULL) {
++ s = string;
++ while ((s = strchr(s, prog->regmust[0])) != NULL) {
++ if (strncmp(s, prog->regmust, prog->regmlen) == 0)
++ break; /* Found it. */
++ s++;
++ }
++ if (s == NULL) /* Not present. */
++ return(0);
++ }
++
++ /* Mark beginning of line for ^ . */
++ g.regbol = string;
++
++ /* Simplest case: anchored match need be tried only once. */
++ if (prog->reganch)
++ return(regtry(&g, prog, string));
++
++ /* Messy cases: unanchored match. */
++ s = string;
++ if (prog->regstart != '\0')
++ /* We know what char it must start with. */
++ while ((s = strchr(s, prog->regstart)) != NULL) {
++ if (regtry(&g, prog, s))
++ return(1);
++ s++;
++ }
++ else
++ /* We don't -- general case. */
++ do {
++ if (regtry(&g, prog, s))
++ return(1);
++ } while (*s++ != '\0');
++
++ /* Failure. */
++ return(0);
++}
++
++/*
++ - regtry - try match at specific point
++ */
++static int /* 0 failure, 1 success */
++regtry(struct match_globals *g, regexp *prog, char *string)
++{
++ register int i;
++ register char **sp;
++ register char **ep;
++
++ g->reginput = string;
++ g->regstartp = prog->startp;
++ g->regendp = prog->endp;
++
++ sp = prog->startp;
++ ep = prog->endp;
++ for (i = NSUBEXP; i > 0; i--) {
++ *sp++ = NULL;
++ *ep++ = NULL;
++ }
++ if (regmatch(g, prog->program + 1)) {
++ prog->startp[0] = string;
++ prog->endp[0] = g->reginput;
++ return(1);
++ } else
++ return(0);
++}
++
++/*
++ - regmatch - main matching routine
++ *
++ * Conceptually the strategy is simple: check to see whether the current
++ * node matches, call self recursively to see whether the rest matches,
++ * and then act accordingly. In practice we make some effort to avoid
++ * recursion, in particular by going through "ordinary" nodes (that don't
++ * need to know whether the rest of the match failed) by a loop instead of
++ * by recursion.
++ */
++static int /* 0 failure, 1 success */
++regmatch(struct match_globals *g, char *prog)
++{
++ register char *scan = prog; /* Current node. */
++ char *next; /* Next node. */
++
++#ifdef DEBUG
++ if (scan != NULL && regnarrate)
++ fprintf(stderr, "%s(\n", regprop(scan));
++#endif
++ while (scan != NULL) {
++#ifdef DEBUG
++ if (regnarrate)
++ fprintf(stderr, "%s...\n", regprop(scan));
++#endif
++ next = regnext(g, scan);
++
++ switch (OP(scan)) {
++ case BOL:
++ if (g->reginput != g->regbol)
++ return(0);
++ break;
++ case EOL:
++ if (*g->reginput != '\0')
++ return(0);
++ break;
++ case ANY:
++ if (*g->reginput == '\0')
++ return(0);
++ g->reginput++;
++ break;
++ case EXACTLY: {
++ register int len;
++ register char *opnd;
++
++ opnd = OPERAND(scan);
++ /* Inline the first character, for speed. */
++ if (*opnd != *g->reginput)
++ return(0);
++ len = strlen(opnd);
++ if (len > 1 && strncmp(opnd, g->reginput, len) != 0)
++ return(0);
++ g->reginput += len;
++ }
++ break;
++ case ANYOF:
++ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL)
++ return(0);
++ g->reginput++;
++ break;
++ case ANYBUT:
++ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL)
++ return(0);
++ g->reginput++;
++ break;
++ case NOTHING:
++ case BACK:
++ break;
++ case OPEN+1:
++ case OPEN+2:
++ case OPEN+3:
++ case OPEN+4:
++ case OPEN+5:
++ case OPEN+6:
++ case OPEN+7:
++ case OPEN+8:
++ case OPEN+9: {
++ register int no;
++ register char *save;
++
++ no = OP(scan) - OPEN;
++ save = g->reginput;
++
++ if (regmatch(g, next)) {
++ /*
++ * Don't set startp if some later
++ * invocation of the same parentheses
++ * already has.
++ */
++ if (g->regstartp[no] == NULL)
++ g->regstartp[no] = save;
++ return(1);
++ } else
++ return(0);
++ }
++ break;
++ case CLOSE+1:
++ case CLOSE+2:
++ case CLOSE+3:
++ case CLOSE+4:
++ case CLOSE+5:
++ case CLOSE+6:
++ case CLOSE+7:
++ case CLOSE+8:
++ case CLOSE+9:
++ {
++ register int no;
++ register char *save;
++
++ no = OP(scan) - CLOSE;
++ save = g->reginput;
++
++ if (regmatch(g, next)) {
++ /*
++ * Don't set endp if some later
++ * invocation of the same parentheses
++ * already has.
++ */
++ if (g->regendp[no] == NULL)
++ g->regendp[no] = save;
++ return(1);
++ } else
++ return(0);
++ }
++ break;
++ case BRANCH: {
++ register char *save;
++
++ if (OP(next) != BRANCH) /* No choice. */
++ next = OPERAND(scan); /* Avoid recursion. */
++ else {
++ do {
++ save = g->reginput;
++ if (regmatch(g, OPERAND(scan)))
++ return(1);
++ g->reginput = save;
++ scan = regnext(g, scan);
++ } while (scan != NULL && OP(scan) == BRANCH);
++ return(0);
++ /* NOTREACHED */
++ }
++ }
++ break;
++ case STAR:
++ case PLUS: {
++ register char nextch;
++ register int no;
++ register char *save;
++ register int min;
++
++ /*
++ * Lookahead to avoid useless match attempts
++ * when we know what character comes next.
++ */
++ nextch = '\0';
++ if (OP(next) == EXACTLY)
++ nextch = *OPERAND(next);
++ min = (OP(scan) == STAR) ? 0 : 1;
++ save = g->reginput;
++ no = regrepeat(g, OPERAND(scan));
++ while (no >= min) {
++ /* If it could work, try it. */
++ if (nextch == '\0' || *g->reginput == nextch)
++ if (regmatch(g, next))
++ return(1);
++ /* Couldn't or didn't -- back up. */
++ no--;
++ g->reginput = save + no;
++ }
++ return(0);
++ }
++ break;
++ case END:
++ return(1); /* Success! */
++ break;
++ default:
++ printk("<3>Regexp: memory corruption\n");
++ return(0);
++ break;
++ }
++
++ scan = next;
++ }
++
++ /*
++ * We get here only if there's trouble -- normally "case END" is
++ * the terminating point.
++ */
++ printk("<3>Regexp: corrupted pointers\n");
++ return(0);
++}
++
++/*
++ - regrepeat - repeatedly match something simple, report how many
++ */
++static int
++regrepeat(struct match_globals *g, char *p)
++{
++ register int count = 0;
++ register char *scan;
++ register char *opnd;
++
++ scan = g->reginput;
++ opnd = OPERAND(p);
++ switch (OP(p)) {
++ case ANY:
++ count = strlen(scan);
++ scan += count;
++ break;
++ case EXACTLY:
++ while (*opnd == *scan) {
++ count++;
++ scan++;
++ }
++ break;
++ case ANYOF:
++ while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
++ count++;
++ scan++;
++ }
++ break;
++ case ANYBUT:
++ while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
++ count++;
++ scan++;
++ }
++ break;
++ default: /* Oh dear. Called inappropriately. */
++ printk("<3>Regexp: internal foulup\n");
++ count = 0; /* Best compromise. */
++ break;
++ }
++ g->reginput = scan;
++
++ return(count);
++}
++
++/*
++ - regnext - dig the "next" pointer out of a node
++ */
++static char*
++regnext(struct match_globals *g, char *p)
++{
++ register int offset;
++
++ if (p == &g->regdummy)
++ return(NULL);
++
++ offset = NEXT(p);
++ if (offset == 0)
++ return(NULL);
++
++ if (OP(p) == BACK)
++ return(p-offset);
++ else
++ return(p+offset);
++}
++
++#ifdef DEBUG
++
++STATIC char *regprop();
++
++/*
++ - regdump - dump a regexp onto stdout in vaguely comprehensible form
++ */
++void
++regdump(regexp *r)
++{
++ register char *s;
++ register char op = EXACTLY; /* Arbitrary non-END op. */
++ register char *next;
++ /* extern char *strchr(); */
++
++
++ s = r->program + 1;
++ while (op != END) { /* While that wasn't END last time... */
++ op = OP(s);
++ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
++ next = regnext(s);
++ if (next == NULL) /* Next ptr. */
++ printf("(0)");
++ else
++ printf("(%d)", (s-r->program)+(next-s));
++ s += 3;
++ if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
++ /* Literal string, where present. */
++ while (*s != '\0') {
++ putchar(*s);
++ s++;
++ }
++ s++;
++ }
++ putchar('\n');
++ }
++
++ /* Header fields of interest. */
++ if (r->regstart != '\0')
++ printf("start `%c' ", r->regstart);
++ if (r->reganch)
++ printf("anchored ");
++ if (r->regmust != NULL)
++ printf("must have \"%s\"", r->regmust);
++ printf("\n");
++}
++
++/*
++ - regprop - printable representation of opcode
++ */
++static char *
++regprop(char *op)
++{
++#define BUFLEN 50
++ register char *p;
++ static char buf[BUFLEN];
++
++ strcpy(buf, ":");
++
++ switch (OP(op)) {
++ case BOL:
++ p = "BOL";
++ break;
++ case EOL:
++ p = "EOL";
++ break;
++ case ANY:
++ p = "ANY";
++ break;
++ case ANYOF:
++ p = "ANYOF";
++ break;
++ case ANYBUT:
++ p = "ANYBUT";
++ break;
++ case BRANCH:
++ p = "BRANCH";
++ break;
++ case EXACTLY:
++ p = "EXACTLY";
++ break;
++ case NOTHING:
++ p = "NOTHING";
++ break;
++ case BACK:
++ p = "BACK";
++ break;
++ case END:
++ p = "END";
++ break;
++ case OPEN+1:
++ case OPEN+2:
++ case OPEN+3:
++ case OPEN+4:
++ case OPEN+5:
++ case OPEN+6:
++ case OPEN+7:
++ case OPEN+8:
++ case OPEN+9:
++ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN);
++ p = NULL;
++ break;
++ case CLOSE+1:
++ case CLOSE+2:
++ case CLOSE+3:
++ case CLOSE+4:
++ case CLOSE+5:
++ case CLOSE+6:
++ case CLOSE+7:
++ case CLOSE+8:
++ case CLOSE+9:
++ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE);
++ p = NULL;
++ break;
++ case STAR:
++ p = "STAR";
++ break;
++ case PLUS:
++ p = "PLUS";
++ break;
++ default:
++ printk("<3>Regexp: corrupted opcode\n");
++ break;
++ }
++ if (p != NULL)
++ strncat(buf, p, BUFLEN-strlen(buf));
++ return(buf);
++}
++#endif
++
++
+--- /dev/null
++++ b/net/netfilter/regexp/regexp.h
+@@ -0,0 +1,41 @@
++/*
++ * Definitions etc. for regexp(3) routines.
++ *
++ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
++ * not the System V one.
++ */
++
++#ifndef REGEXP_H
++#define REGEXP_H
++
++
++/*
++http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
++which contains a version of this library, says:
++
++ *
++ * NSUBEXP must be at least 10, and no greater than 117 or the parser
++ * will not work properly.
++ *
++
++However, it looks rather like this library is limited to 10. If you think
++otherwise, let us know.
++*/
++
++#define NSUBEXP 10
++typedef struct regexp {
++ char *startp[NSUBEXP];
++ char *endp[NSUBEXP];
++ char regstart; /* Internal use only. */
++ char reganch; /* Internal use only. */
++ char *regmust; /* Internal use only. */
++ int regmlen; /* Internal use only. */
++ char program[1]; /* Unwarranted chumminess with compiler. */
++} regexp;
++
++regexp * regcomp(char *exp, int *patternsize);
++int regexec(regexp *prog, char *string);
++void regsub(regexp *prog, char *source, char *dest);
++void regerror(char *s);
++
++#endif
+--- /dev/null
++++ b/net/netfilter/regexp/regmagic.h
+@@ -0,0 +1,5 @@
++/*
++ * The first byte of the regexp internal "program" is actually this magic
++ * number; the start node begins in the second byte.
++ */
++#define MAGIC 0234
+--- /dev/null
++++ b/net/netfilter/regexp/regsub.c
+@@ -0,0 +1,95 @@
++/*
++ * regsub
++ * @(#)regsub.c 1.3 of 2 April 86
++ *
++ * Copyright (c) 1986 by University of Toronto.
++ * Written by Henry Spencer. Not derived from licensed software.
++ *
++ * Permission is granted to anyone to use this software for any
++ * purpose on any computer system, and to redistribute it freely,
++ * subject to the following restrictions:
++ *
++ * 1. The author is not responsible for the consequences of use of
++ * this software, no matter how awful, even if they arise
++ * from defects in it.
++ *
++ * 2. The origin of this software must not be misrepresented, either
++ * by explicit claim or by omission.
++ *
++ * 3. Altered versions must be plainly marked as such, and must not
++ * be misrepresented as being the original software.
++ *
++ *
++ * This code was modified by Ethan Sommer to work within the kernel
++ * (it now uses kmalloc etc..)
++ *
++ */
++#include "regexp.h"
++#include "regmagic.h"
++#include <linux/string.h>
++
++
++#ifndef CHARBITS
++#define UCHARAT(p) ((int)*(unsigned char *)(p))
++#else
++#define UCHARAT(p) ((int)*(p)&CHARBITS)
++#endif
++
++#if 0
++//void regerror(char * s)
++//{
++// printk("regexp(3): %s", s);
++// /* NOTREACHED */
++//}
++#endif
++
++/*
++ - regsub - perform substitutions after a regexp match
++ */
++void
++regsub(regexp * prog, char * source, char * dest)
++{
++ register char *src;
++ register char *dst;
++ register char c;
++ register int no;
++ register int len;
++
++ /* Not necessary and gcc doesn't like it -MLS */
++ /*extern char *strncpy();*/
++
++ if (prog == NULL || source == NULL || dest == NULL) {
++ regerror("NULL parm to regsub");
++ return;
++ }
++ if (UCHARAT(prog->program) != MAGIC) {
++ regerror("damaged regexp fed to regsub");
++ return;
++ }
++
++ src = source;
++ dst = dest;
++ while ((c = *src++) != '\0') {
++ if (c == '&')
++ no = 0;
++ else if (c == '\\' && '0' <= *src && *src <= '9')
++ no = *src++ - '0';
++ else
++ no = -1;
++
++ if (no < 0) { /* Ordinary character. */
++ if (c == '\\' && (*src == '\\' || *src == '&'))
++ c = *src++;
++ *dst++ = c;
++ } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
++ len = prog->endp[no] - prog->startp[no];
++ (void) strncpy(dst, prog->startp[no], len);
++ dst += len;
++ if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
++ regerror("damaged match string");
++ return;
++ }
++ }
++ }
++ *dst++ = '\0';
++}
+--- /dev/null
++++ b/net/netfilter/xt_layer7.c
+@@ -0,0 +1,666 @@
++/*
++ Kernel module to match application layer (OSI layer 7) data in connections.
++
++ http://l7-filter.sf.net
++
++ (C) 2003-2009 Matthew Strait and Ethan Sommer.
++
++ This program is free software; you can redistribute it and/or
++ modify it under the terms of the GNU General Public License
++ as published by the Free Software Foundation; either version
++ 2 of the License, or (at your option) any later version.
++ http://www.gnu.org/licenses/gpl.txt
++
++ Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be>,
++ xt_helper.c (C) 2002 Harald Welte and cls_layer7.c (C) 2003 Matthew Strait,
++ Ethan Sommer, Justin Levandoski.
++*/
++
++#include <linux/spinlock.h>
++#include <linux/version.h>
++#include <net/ip.h>
++#include <net/tcp.h>
++#include <linux/module.h>
++#include <linux/skbuff.h>
++#include <linux/netfilter.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_conntrack_core.h>
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
++#include <net/netfilter/nf_conntrack_extend.h>
++#include <net/netfilter/nf_conntrack_acct.h>
++#endif
++#include <linux/netfilter/x_tables.h>
++#include <linux/netfilter/xt_layer7.h>
++#include <linux/ctype.h>
++#include <linux/proc_fs.h>
++
++#include "regexp/regexp.c"
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>");
++MODULE_DESCRIPTION("iptables application layer match module");
++MODULE_ALIAS("ipt_layer7");
++MODULE_VERSION("2.21");
++
++static int maxdatalen = 2048; // this is the default
++module_param(maxdatalen, int, 0444);
++MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter");
++#ifdef CONFIG_NETFILTER_XT_MATCH_LAYER7_DEBUG
++ #define DPRINTK(format,args...) printk(format,##args)
++#else
++ #define DPRINTK(format,args...)
++#endif
++
++/* Number of packets whose data we look at.
++This can be modified through /proc/net/layer7_numpackets */
++static int num_packets = 10;
++
++static struct pattern_cache {
++ char * regex_string;
++ regexp * pattern;
++ struct pattern_cache * next;
++} * first_pattern_cache = NULL;
++
++DEFINE_SPINLOCK(l7_lock);
++
++static int total_acct_packets(struct nf_conn *ct)
++{
++#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26)
++ BUG_ON(ct == NULL);
++ return (ct->counters[IP_CT_DIR_ORIGINAL].packets + ct->counters[IP_CT_DIR_REPLY].packets);
++#else
++ struct nf_conn_counter *acct;
++
++ BUG_ON(ct == NULL);
++ acct = nf_conn_acct_find(ct);
++ if (!acct)
++ return 0;
++ return (acct[IP_CT_DIR_ORIGINAL].packets + acct[IP_CT_DIR_REPLY].packets);
++#endif
++}
++
++#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
++/* Converts an unfriendly string into a friendly one by
++replacing unprintables with periods and all whitespace with " ". */
++static char * friendly_print(unsigned char * s)
++{
++ char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC);
++ int i;
++
++ if(!f) {
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in "
++ "friendly_print, bailing.\n");
++ return NULL;
++ }
++
++ for(i = 0; i < strlen(s); i++){
++ if(isprint(s[i]) && s[i] < 128) f[i] = s[i];
++ else if(isspace(s[i])) f[i] = ' ';
++ else f[i] = '.';
++ }
++ f[i] = '\0';
++ return f;
++}
++
++static char dec2hex(int i)
++{
++ switch (i) {
++ case 0 ... 9:
++ return (i + '0');
++ break;
++ case 10 ... 15:
++ return (i - 10 + 'a');
++ break;
++ default:
++ if (net_ratelimit())
++ printk("layer7: Problem in dec2hex\n");
++ return '\0';
++ }
++}
++
++static char * hex_print(unsigned char * s)
++{
++ char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC);
++ int i;
++
++ if(!g) {
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in hex_print, "
++ "bailing.\n");
++ return NULL;
++ }
++
++ for(i = 0; i < strlen(s); i++) {
++ g[i*3 ] = dec2hex(s[i]/16);
++ g[i*3 + 1] = dec2hex(s[i]%16);
++ g[i*3 + 2] = ' ';
++ }
++ g[i*3] = '\0';
++
++ return g;
++}
++#endif // DEBUG
++
++/* Use instead of regcomp. As we expect to be seeing the same regexps over and
++over again, it make sense to cache the results. */
++static regexp * compile_and_cache(const char * regex_string,
++ const char * protocol)
++{
++ struct pattern_cache * node = first_pattern_cache;
++ struct pattern_cache * last_pattern_cache = first_pattern_cache;
++ struct pattern_cache * tmp;
++ unsigned int len;
++
++ while (node != NULL) {
++ if (!strcmp(node->regex_string, regex_string))
++ return node->pattern;
++
++ last_pattern_cache = node;/* points at the last non-NULL node */
++ node = node->next;
++ }
++
++ /* If we reach the end of the list, then we have not yet cached
++ the pattern for this regex. Let's do that now.
++ Be paranoid about running out of memory to avoid list corruption. */
++ tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC);
++
++ if(!tmp) {
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in "
++ "compile_and_cache, bailing.\n");
++ return NULL;
++ }
++
++ tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC);
++ tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC);
++ tmp->next = NULL;
++
++ if(!tmp->regex_string || !tmp->pattern) {
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in "
++ "compile_and_cache, bailing.\n");
++ kfree(tmp->regex_string);
++ kfree(tmp->pattern);
++ kfree(tmp);
++ return NULL;
++ }
++
++ /* Ok. The new node is all ready now. */
++ node = tmp;
++
++ if(first_pattern_cache == NULL) /* list is empty */
++ first_pattern_cache = node; /* make node the beginning */
++ else
++ last_pattern_cache->next = node; /* attach node to the end */
++
++ /* copy the string and compile the regex */
++ len = strlen(regex_string);
++ DPRINTK("About to compile this: \"%s\"\n", regex_string);
++ node->pattern = regcomp((char *)regex_string, &len);
++ if ( !node->pattern ) {
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: Error compiling regexp "
++ "\"%s\" (%s)\n",
++ regex_string, protocol);
++ /* pattern is now cached as NULL, so we won't try again. */
++ }
++
++ strcpy(node->regex_string, regex_string);
++ return node->pattern;
++}
++
++static int can_handle(const struct sk_buff *skb)
++{
++ if(!ip_hdr(skb)) /* not IP */
++ return 0;
++ if(ip_hdr(skb)->protocol != IPPROTO_TCP &&
++ ip_hdr(skb)->protocol != IPPROTO_UDP &&
++ ip_hdr(skb)->protocol != IPPROTO_ICMP)
++ return 0;
++ return 1;
++}
++
++/* Returns offset the into the skb->data that the application data starts */
++static int app_data_offset(const struct sk_buff *skb)
++{
++ /* In case we are ported somewhere (ebtables?) where ip_hdr(skb)
++ isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */
++ int ip_hl = 4*ip_hdr(skb)->ihl;
++
++ if( ip_hdr(skb)->protocol == IPPROTO_TCP ) {
++ /* 12 == offset into TCP header for the header length field.
++ Can't get this with skb->h.th->doff because the tcphdr
++ struct doesn't get set when routing (this is confirmed to be
++ true in Netfilter as well as QoS.) */
++ int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4);
++
++ return ip_hl + tcp_hl;
++ } else if( ip_hdr(skb)->protocol == IPPROTO_UDP ) {
++ return ip_hl + 8; /* UDP header is always 8 bytes */
++ } else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) {
++ return ip_hl + 8; /* ICMP header is 8 bytes */
++ } else {
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: tried to handle unknown "
++ "protocol!\n");
++ return ip_hl + 8; /* something reasonable */
++ }
++}
++
++/* handles whether there's a match when we aren't appending data anymore */
++static int match_no_append(struct nf_conn * conntrack,
++ struct nf_conn * master_conntrack,
++ enum ip_conntrack_info ctinfo,
++ enum ip_conntrack_info master_ctinfo,
++ const struct xt_layer7_info * info)
++{
++ /* If we're in here, throw the app data away */
++ if(master_conntrack->layer7.app_data != NULL) {
++
++ #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
++ if(!master_conntrack->layer7.app_proto) {
++ char * f =
++ friendly_print(master_conntrack->layer7.app_data);
++ char * g =
++ hex_print(master_conntrack->layer7.app_data);
++ DPRINTK("\nl7-filter gave up after %d bytes "
++ "(%d packets):\n%s\n",
++ strlen(f), total_acct_packets(master_conntrack), f);
++ kfree(f);
++ DPRINTK("In hex: %s\n", g);
++ kfree(g);
++ }
++ #endif
++
++ kfree(master_conntrack->layer7.app_data);
++ master_conntrack->layer7.app_data = NULL; /* don't free again */
++ }
++
++ if(master_conntrack->layer7.app_proto){
++ /* Here child connections set their .app_proto (for /proc) */
++ if(!conntrack->layer7.app_proto) {
++ conntrack->layer7.app_proto =
++ kmalloc(strlen(master_conntrack->layer7.app_proto)+1,
++ GFP_ATOMIC);
++ if(!conntrack->layer7.app_proto){
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory "
++ "in match_no_append, "
++ "bailing.\n");
++ return 1;
++ }
++ strcpy(conntrack->layer7.app_proto,
++ master_conntrack->layer7.app_proto);
++ }
++
++ return (!strcmp(master_conntrack->layer7.app_proto,
++ info->protocol));
++ }
++ else {
++ /* If not classified, set to "unknown" to distinguish from
++ connections that are still being tested. */
++ master_conntrack->layer7.app_proto =
++ kmalloc(strlen("unknown")+1, GFP_ATOMIC);
++ if(!master_conntrack->layer7.app_proto){
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in "
++ "match_no_append, bailing.\n");
++ return 1;
++ }
++ strcpy(master_conntrack->layer7.app_proto, "unknown");
++ return 0;
++ }
++}
++
++/* add the new app data to the conntrack. Return number of bytes added. */
++static int add_data(struct nf_conn * master_conntrack,
++ char * app_data, int appdatalen)
++{
++ int length = 0, i;
++ int oldlength = master_conntrack->layer7.app_data_len;
++
++ /* This is a fix for a race condition by Deti Fliegl. However, I'm not
++ clear on whether the race condition exists or whether this really
++ fixes it. I might just be being dense... Anyway, if it's not really
++ a fix, all it does is waste a very small amount of time. */
++ if(!master_conntrack->layer7.app_data) return 0;
++
++ /* Strip nulls. Make everything lower case (our regex lib doesn't
++ do case insensitivity). Add it to the end of the current data. */
++ for(i = 0; i < maxdatalen-oldlength-1 &&
++ i < appdatalen; i++) {
++ if(app_data[i] != '\0') {
++ /* the kernel version of tolower mungs 'upper ascii' */
++ master_conntrack->layer7.app_data[length+oldlength] =
++ isascii(app_data[i])?
++ tolower(app_data[i]) : app_data[i];
++ length++;
++ }
++ }
++
++ master_conntrack->layer7.app_data[length+oldlength] = '\0';
++ master_conntrack->layer7.app_data_len = length + oldlength;
++
++ return length;
++}
++
++/* taken from drivers/video/modedb.c */
++static int my_atoi(const char *s)
++{
++ int val = 0;
++
++ for (;; s++) {
++ switch (*s) {
++ case '0'...'9':
++ val = 10*val+(*s-'0');
++ break;
++ default:
++ return val;
++ }
++ }
++}
++
++/* write out num_packets to userland. */
++static int layer7_read_proc(char* page, char ** start, off_t off, int count,
++ int* eof, void * data)
++{
++ if(num_packets > 99 && net_ratelimit())
++ printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n");
++
++ page[0] = num_packets/10 + '0';
++ page[1] = num_packets%10 + '0';
++ page[2] = '\n';
++ page[3] = '\0';
++
++ *eof=1;
++
++ return 3;
++}
++
++/* Read in num_packets from userland */
++static int layer7_write_proc(struct file* file, const char* buffer,
++ unsigned long count, void *data)
++{
++ char * foo = kmalloc(count, GFP_ATOMIC);
++
++ if(!foo){
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory, bailing. "
++ "num_packets unchanged.\n");
++ return count;
++ }
++
++ if(copy_from_user(foo, buffer, count)) {
++ return -EFAULT;
++ }
++
++
++ num_packets = my_atoi(foo);
++ kfree (foo);
++
++ /* This has an arbitrary limit to make the math easier. I'm lazy.
++ But anyway, 99 is a LOT! If you want more, you're doing it wrong! */
++ if(num_packets > 99) {
++ printk(KERN_WARNING "layer7: num_packets can't be > 99.\n");
++ num_packets = 99;
++ } else if(num_packets < 1) {
++ printk(KERN_WARNING "layer7: num_packets can't be < 1.\n");
++ num_packets = 1;
++ }
++
++ return count;
++}
++
++static bool
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++match(const struct sk_buff *skbin, const struct xt_match_param *par)
++#else
++match(const struct sk_buff *skbin,
++ const struct net_device *in,
++ const struct net_device *out,
++ const struct xt_match *match,
++ const void *matchinfo,
++ int offset,
++ unsigned int protoff,
++ bool *hotdrop)
++#endif
++{
++ /* sidestep const without getting a compiler warning... */
++ struct sk_buff * skb = (struct sk_buff *)skbin;
++
++ const struct xt_layer7_info * info =
++ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++ par->matchinfo;
++ #else
++ matchinfo;
++ #endif
++
++ enum ip_conntrack_info master_ctinfo, ctinfo;
++ struct nf_conn *master_conntrack, *conntrack;
++ unsigned char * app_data;
++ unsigned int pattern_result, appdatalen;
++ regexp * comppattern;
++
++ /* Be paranoid/incompetent - lock the entire match function. */
++ spin_lock_bh(&l7_lock);
++
++ if(!can_handle(skb)){
++ DPRINTK("layer7: This is some protocol I can't handle.\n");
++ spin_unlock_bh(&l7_lock);
++ return info->invert;
++ }
++
++ /* Treat parent & all its children together as one connection, except
++ for the purpose of setting conntrack->layer7.app_proto in the actual
++ connection. This makes /proc/net/ip_conntrack more satisfying. */
++ if(!(conntrack = nf_ct_get(skb, &ctinfo)) ||
++ !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){
++ DPRINTK("layer7: couldn't get conntrack.\n");
++ spin_unlock_bh(&l7_lock);
++ return info->invert;
++ }
++
++ /* Try to get a master conntrack (and its master etc) for FTP, etc. */
++ while (master_ct(master_conntrack) != NULL)
++ master_conntrack = master_ct(master_conntrack);
++
++ /* if we've classified it or seen too many packets */
++ if(total_acct_packets(master_conntrack) > num_packets ||
++ master_conntrack->layer7.app_proto) {
++
++ pattern_result = match_no_append(conntrack, master_conntrack,
++ ctinfo, master_ctinfo, info);
++
++ /* skb->cb[0] == seen. Don't do things twice if there are
++ multiple l7 rules. I'm not sure that using cb for this purpose
++ is correct, even though it says "put your private variables
++ there". But it doesn't look like it is being used for anything
++ else in the skbs that make it here. */
++ skb->cb[0] = 1; /* marking it seen here's probably irrelevant */
++
++ spin_unlock_bh(&l7_lock);
++ return (pattern_result ^ info->invert);
++ }
++
++ if(skb_is_nonlinear(skb)){
++ if(skb_linearize(skb) != 0){
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: failed to linearize "
++ "packet, bailing.\n");
++ spin_unlock_bh(&l7_lock);
++ return info->invert;
++ }
++ }
++
++ /* now that the skb is linearized, it's safe to set these. */
++ app_data = skb->data + app_data_offset(skb);
++ appdatalen = skb_tail_pointer(skb) - app_data;
++
++ /* the return value gets checked later, when we're ready to use it */
++ comppattern = compile_and_cache(info->pattern, info->protocol);
++
++ /* On the first packet of a connection, allocate space for app data */
++ if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] &&
++ !master_conntrack->layer7.app_data){
++ master_conntrack->layer7.app_data =
++ kmalloc(maxdatalen, GFP_ATOMIC);
++ if(!master_conntrack->layer7.app_data){
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in "
++ "match, bailing.\n");
++ spin_unlock_bh(&l7_lock);
++ return info->invert;
++ }
++
++ master_conntrack->layer7.app_data[0] = '\0';
++ }
++
++ /* Can be here, but unallocated, if numpackets is increased near
++ the beginning of a connection */
++ if(master_conntrack->layer7.app_data == NULL){
++ spin_unlock_bh(&l7_lock);
++ return info->invert; /* unmatched */
++ }
++
++ if(!skb->cb[0]){
++ int newbytes;
++ newbytes = add_data(master_conntrack, app_data, appdatalen);
++
++ if(newbytes == 0) { /* didn't add any data */
++ skb->cb[0] = 1;
++ /* Didn't match before, not going to match now */
++ spin_unlock_bh(&l7_lock);
++ return info->invert;
++ }
++ }
++
++ /* If looking for "unknown", then never match. "Unknown" means that
++ we've given up; we're still trying with these packets. */
++ if(!strcmp(info->protocol, "unknown")) {
++ pattern_result = 0;
++ /* If looking for "unset", then always match. "Unset" means that we
++ haven't yet classified the connection. */
++ } else if(!strcmp(info->protocol, "unset")) {
++ pattern_result = 2;
++ DPRINTK("layer7: matched unset: not yet classified "
++ "(%d/%d packets)\n",
++ total_acct_packets(master_conntrack), num_packets);
++ /* If the regexp failed to compile, don't bother running it */
++ } else if(comppattern &&
++ regexec(comppattern, master_conntrack->layer7.app_data)){
++ DPRINTK("layer7: matched %s\n", info->protocol);
++ pattern_result = 1;
++ } else pattern_result = 0;
++
++ if(pattern_result == 1) {
++ master_conntrack->layer7.app_proto =
++ kmalloc(strlen(info->protocol)+1, GFP_ATOMIC);
++ if(!master_conntrack->layer7.app_proto){
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in "
++ "match, bailing.\n");
++ spin_unlock_bh(&l7_lock);
++ return (pattern_result ^ info->invert);
++ }
++ strcpy(master_conntrack->layer7.app_proto, info->protocol);
++ } else if(pattern_result > 1) { /* cleanup from "unset" */
++ pattern_result = 1;
++ }
++
++ /* mark the packet seen */
++ skb->cb[0] = 1;
++
++ spin_unlock_bh(&l7_lock);
++ return (pattern_result ^ info->invert);
++}
++
++// load nf_conntrack_ipv4
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++static bool check(const struct xt_mtchk_param *par)
++{
++ if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
++ printk(KERN_WARNING "can't load conntrack support for "
++ "proto=%d\n", par->match->family);
++#else
++static bool check(const char *tablename, const void *inf,
++ const struct xt_match *match, void *matchinfo,
++ unsigned int hook_mask)
++{
++ if (nf_ct_l3proto_try_module_get(match->family) < 0) {
++ printk(KERN_WARNING "can't load conntrack support for "
++ "proto=%d\n", match->family);
++#endif
++ return 0;
++ }
++ return 1;
++}
++
++
++#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
++ static void destroy(const struct xt_mtdtor_param *par)
++ {
++ nf_ct_l3proto_module_put(par->match->family);
++ }
++#else
++ static void destroy(const struct xt_match *match, void *matchinfo)
++ {
++ nf_ct_l3proto_module_put(match->family);
++ }
++#endif
++
++static struct xt_match xt_layer7_match[] __read_mostly = {
++{
++ .name = "layer7",
++ .family = AF_INET,
++ .checkentry = check,
++ .match = match,
++ .destroy = destroy,
++ .matchsize = sizeof(struct xt_layer7_info),
++ .me = THIS_MODULE
++}
++};
++
++static void layer7_cleanup_proc(void)
++{
++ remove_proc_entry("layer7_numpackets", init_net.proc_net);
++}
++
++/* register the proc file */
++static void layer7_init_proc(void)
++{
++ struct proc_dir_entry* entry;
++ entry = create_proc_entry("layer7_numpackets", 0644, init_net.proc_net);
++ entry->read_proc = layer7_read_proc;
++ entry->write_proc = layer7_write_proc;
++}
++
++static int __init xt_layer7_init(void)
++{
++ need_conntrack();
++
++ layer7_init_proc();
++ if(maxdatalen < 1) {
++ printk(KERN_WARNING "layer7: maxdatalen can't be < 1, "
++ "using 1\n");
++ maxdatalen = 1;
++ }
++ /* This is not a hard limit. It's just here to prevent people from
++ bringing their slow machines to a grinding halt. */
++ else if(maxdatalen > 65536) {
++ printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, "
++ "using 65536\n");
++ maxdatalen = 65536;
++ }
++ return xt_register_matches(xt_layer7_match,
++ ARRAY_SIZE(xt_layer7_match));
++}
++
++static void __exit xt_layer7_fini(void)
++{
++ layer7_cleanup_proc();
++ xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match));
++}
++
++module_init(xt_layer7_init);
++module_exit(xt_layer7_fini);
--- /dev/null
+--- a/include/linux/netfilter/xt_layer7.h
++++ b/include/linux/netfilter/xt_layer7.h
+@@ -8,6 +8,7 @@ struct xt_layer7_info {
+ char protocol[MAX_PROTOCOL_LEN];
+ char pattern[MAX_PATTERN_LEN];
+ u_int8_t invert;
++ u_int8_t pkt;
+ };
+
+ #endif /* _XT_LAYER7_H */
+--- a/net/netfilter/xt_layer7.c
++++ b/net/netfilter/xt_layer7.c
+@@ -314,33 +314,35 @@ static int match_no_append(struct nf_con
+ }
+
+ /* add the new app data to the conntrack. Return number of bytes added. */
+-static int add_data(struct nf_conn * master_conntrack,
+- char * app_data, int appdatalen)
++static int add_datastr(char *target, int offset, char *app_data, int len)
+ {
+ int length = 0, i;
+- int oldlength = master_conntrack->layer7.app_data_len;
+-
+- /* This is a fix for a race condition by Deti Fliegl. However, I'm not
+- clear on whether the race condition exists or whether this really
+- fixes it. I might just be being dense... Anyway, if it's not really
+- a fix, all it does is waste a very small amount of time. */
+- if(!master_conntrack->layer7.app_data) return 0;
++ if (!target) return 0;
+
+ /* Strip nulls. Make everything lower case (our regex lib doesn't
+ do case insensitivity). Add it to the end of the current data. */
+- for(i = 0; i < maxdatalen-oldlength-1 &&
+- i < appdatalen; i++) {
++ for(i = 0; i < maxdatalen-offset-1 && i < len; i++) {
+ if(app_data[i] != '\0') {
+ /* the kernel version of tolower mungs 'upper ascii' */
+- master_conntrack->layer7.app_data[length+oldlength] =
++ target[length+offset] =
+ isascii(app_data[i])?
+ tolower(app_data[i]) : app_data[i];
+ length++;
+ }
+ }
++ target[length+offset] = '\0';
++
++ return length;
++}
++
++/* add the new app data to the conntrack. Return number of bytes added. */
++static int add_data(struct nf_conn * master_conntrack,
++ char * app_data, int appdatalen)
++{
++ int length;
+
+- master_conntrack->layer7.app_data[length+oldlength] = '\0';
+- master_conntrack->layer7.app_data_len = length + oldlength;
++ length = add_datastr(master_conntrack->layer7.app_data, master_conntrack->layer7.app_data_len, app_data, appdatalen);
++ master_conntrack->layer7.app_data_len += length;
+
+ return length;
+ }
+@@ -438,7 +440,7 @@ match(const struct sk_buff *skbin,
+
+ enum ip_conntrack_info master_ctinfo, ctinfo;
+ struct nf_conn *master_conntrack, *conntrack;
+- unsigned char * app_data;
++ unsigned char *app_data, *tmp_data;
+ unsigned int pattern_result, appdatalen;
+ regexp * comppattern;
+
+@@ -466,8 +468,8 @@ match(const struct sk_buff *skbin,
+ master_conntrack = master_ct(master_conntrack);
+
+ /* if we've classified it or seen too many packets */
+- if(total_acct_packets(master_conntrack) > num_packets ||
+- master_conntrack->layer7.app_proto) {
++ if(!info->pkt && (total_acct_packets(master_conntrack) > num_packets ||
++ master_conntrack->layer7.app_proto)) {
+
+ pattern_result = match_no_append(conntrack, master_conntrack,
+ ctinfo, master_ctinfo, info);
+@@ -500,6 +502,25 @@ match(const struct sk_buff *skbin,
+ /* the return value gets checked later, when we're ready to use it */
+ comppattern = compile_and_cache(info->pattern, info->protocol);
+
++ if (info->pkt) {
++ tmp_data = kmalloc(maxdatalen, GFP_ATOMIC);
++ if(!tmp_data){
++ if (net_ratelimit())
++ printk(KERN_ERR "layer7: out of memory in match, bailing.\n");
++ return info->invert;
++ }
++
++ tmp_data[0] = '\0';
++ add_datastr(tmp_data, 0, app_data, appdatalen);
++ pattern_result = ((comppattern && regexec(comppattern, tmp_data)) ? 1 : 0);
++
++ kfree(tmp_data);
++ tmp_data = NULL;
++ spin_unlock_bh(&l7_lock);
++
++ return (pattern_result ^ info->invert);
++ }
++
+ /* On the first packet of a connection, allocate space for app data */
+ if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] &&
+ !master_conntrack->layer7.app_data){
--- /dev/null
+--- a/include/linux/netfilter_ipv4/ip_tables.h
++++ b/include/linux/netfilter_ipv4/ip_tables.h
+@@ -62,6 +62,7 @@ struct ipt_ip {
+ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */
+ #define IPT_F_GOTO 0x02 /* Set if jump is a goto */
+ #define IPT_F_MASK 0x03 /* All possible flag bits mask. */
++#define IPT_F_NO_DEF_MATCH 0x80 /* Internal: no default match rules present */
+
+ /* Values for "inv" field in struct ipt_ip. */
+ #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */
+--- a/net/ipv4/netfilter/ip_tables.c
++++ b/net/ipv4/netfilter/ip_tables.c
+@@ -87,6 +87,9 @@ ip_packet_match(const struct iphdr *ip,
+
+ #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
+
++ if (ipinfo->flags & IPT_F_NO_DEF_MATCH)
++ return true;
++
+ if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
+ IPT_INV_SRCIP)
+ || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+@@ -137,13 +140,35 @@ ip_packet_match(const struct iphdr *ip,
+ return false;
+ }
+
++#undef FWINV
+ return true;
+ }
+
+ static bool
+-ip_checkentry(const struct ipt_ip *ip)
++ip_checkentry(struct ipt_ip *ip)
+ {
+- if (ip->flags & ~IPT_F_MASK) {
++#define FWINV(bool, invflg) ((bool) || (ip->invflags & (invflg)))
++
++ if (FWINV(ip->smsk.s_addr, IPT_INV_SRCIP) ||
++ FWINV(ip->dmsk.s_addr, IPT_INV_DSTIP))
++ goto has_match_rules;
++
++ if (FWINV(!!((const unsigned long *)ip->iniface_mask)[0],
++ IPT_INV_VIA_IN) ||
++ FWINV(!!((const unsigned long *)ip->outiface_mask)[0],
++ IPT_INV_VIA_OUT))
++ goto has_match_rules;
++
++ if (FWINV(ip->proto, IPT_INV_PROTO))
++ goto has_match_rules;
++
++ if (FWINV(ip->flags&IPT_F_FRAG, IPT_INV_FRAG))
++ goto has_match_rules;
++
++ ip->flags |= IPT_F_NO_DEF_MATCH;
++
++has_match_rules:
++ if (ip->flags & ~(IPT_F_MASK|IPT_F_NO_DEF_MATCH)) {
+ duprintf("Unknown flag bits set: %08X\n",
+ ip->flags & ~IPT_F_MASK);
+ return false;
+@@ -153,6 +178,8 @@ ip_checkentry(const struct ipt_ip *ip)
+ ip->invflags & ~IPT_INV_MASK);
+ return false;
+ }
++
++#undef FWINV
+ return true;
+ }
+
+@@ -200,7 +227,6 @@ unconditional(const struct ipt_ip *ip)
+ return 0;
+
+ return 1;
+-#undef FWINV
+ }
+
+ #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
+@@ -326,8 +352,28 @@ ipt_do_table(struct sk_buff *skb,
+ struct xt_match_param mtpar;
+ struct xt_target_param tgpar;
+
+- /* Initialization */
+ ip = ip_hdr(skb);
++
++ IP_NF_ASSERT(table->valid_hooks & (1 << hook));
++ xt_info_rdlock_bh();
++ private = table->private;
++ table_base = private->entries[smp_processor_id()];
++ e = get_entry(table_base, private->hook_entry[hook]);
++
++ if (e->target_offset <= sizeof(struct ipt_entry) &&
++ (e->ip.flags & IPT_F_NO_DEF_MATCH)) {
++ struct ipt_entry_target *t = ipt_get_target(e);
++ if (!t->u.kernel.target->target) {
++ int v = ((struct ipt_standard_target *)t)->verdict;
++ if ((v < 0) && (v != IPT_RETURN)) {
++ ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
++ xt_info_rdunlock_bh();
++ return (unsigned)(-v) - 1;
++ }
++ }
++ }
++
++ /* Initialization */
+ datalen = skb->len - ip->ihl * 4;
+ indev = in ? in->name : nulldevname;
+ outdev = out ? out->name : nulldevname;
+@@ -345,13 +391,6 @@ ipt_do_table(struct sk_buff *skb,
+ mtpar.family = tgpar.family = NFPROTO_IPV4;
+ mtpar.hooknum = tgpar.hooknum = hook;
+
+- IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+- xt_info_rdlock_bh();
+- private = table->private;
+- table_base = private->entries[smp_processor_id()];
+-
+- e = get_entry(table_base, private->hook_entry[hook]);
+-
+ /* For return from builtin chain */
+ back = get_entry(table_base, private->underflow[hook]);
+
--- /dev/null
+--- a/include/linux/netfilter_ipv4/Kbuild
++++ b/include/linux/netfilter_ipv4/Kbuild
+@@ -45,3 +45,14 @@ header-y += ipt_ttl.h
+
+ unifdef-y += ip_queue.h
+ unifdef-y += ip_tables.h
++
++unifdef-y += ip_set.h
++header-y += ip_set_iphash.h
++header-y += ip_set_ipmap.h
++header-y += ip_set_ipporthash.h
++unifdef-y += ip_set_iptree.h
++unifdef-y += ip_set_iptreemap.h
++header-y += ip_set_jhash.h
++header-y += ip_set_macipmap.h
++unifdef-y += ip_set_nethash.h
++header-y += ip_set_portmap.h
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set.h
+@@ -0,0 +1,498 @@
++#ifndef _IP_SET_H
++#define _IP_SET_H
++
++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
++ * Patrick Schaaf <bof@bof.de>
++ * Martin Josefsson <gandalf@wlug.westbo.se>
++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#if 0
++#define IP_SET_DEBUG
++#endif
++
++/*
++ * A sockopt of such quality has hardly ever been seen before on the open
++ * market! This little beauty, hardly ever used: above 64, so it's
++ * traditionally used for firewalling, not touched (even once!) by the
++ * 2.0, 2.2 and 2.4 kernels!
++ *
++ * Comes with its own certificate of authenticity, valid anywhere in the
++ * Free world!
++ *
++ * Rusty, 19.4.2000
++ */
++#define SO_IP_SET 83
++
++/*
++ * Heavily modify by Joakim Axelsson 08.03.2002
++ * - Made it more modulebased
++ *
++ * Additional heavy modifications by Jozsef Kadlecsik 22.02.2004
++ * - bindings added
++ * - in order to "deal with" backward compatibility, renamed to ipset
++ */
++
++/*
++ * Used so that the kernel module and ipset-binary can match their versions
++ */
++#define IP_SET_PROTOCOL_VERSION 2
++
++#define IP_SET_MAXNAMELEN 32 /* set names and set typenames */
++
++/* Lets work with our own typedef for representing an IP address.
++ * We hope to make the code more portable, possibly to IPv6...
++ *
++ * The representation works in HOST byte order, because most set types
++ * will perform arithmetic operations and compare operations.
++ *
++ * For now the type is an uint32_t.
++ *
++ * Make sure to ONLY use the functions when translating and parsing
++ * in order to keep the host byte order and make it more portable:
++ * parse_ip()
++ * parse_mask()
++ * parse_ipandmask()
++ * ip_tostring()
++ * (Joakim: where are they???)
++ */
++
++typedef uint32_t ip_set_ip_t;
++
++/* Sets are identified by an id in kernel space. Tweak with ip_set_id_t
++ * and IP_SET_INVALID_ID if you want to increase the max number of sets.
++ */
++typedef uint16_t ip_set_id_t;
++
++#define IP_SET_INVALID_ID 65535
++
++/* How deep we follow bindings */
++#define IP_SET_MAX_BINDINGS 6
++
++/*
++ * Option flags for kernel operations (ipt_set_info)
++ */
++#define IPSET_SRC 0x01 /* Source match/add */
++#define IPSET_DST 0x02 /* Destination match/add */
++#define IPSET_MATCH_INV 0x04 /* Inverse matching */
++
++/*
++ * Set features
++ */
++#define IPSET_TYPE_IP 0x01 /* IP address type of set */
++#define IPSET_TYPE_PORT 0x02 /* Port type of set */
++#define IPSET_DATA_SINGLE 0x04 /* Single data storage */
++#define IPSET_DATA_DOUBLE 0x08 /* Double data storage */
++
++/* Reserved keywords */
++#define IPSET_TOKEN_DEFAULT ":default:"
++#define IPSET_TOKEN_ALL ":all:"
++
++/* SO_IP_SET operation constants, and their request struct types.
++ *
++ * Operation ids:
++ * 0-99: commands with version checking
++ * 100-199: add/del/test/bind/unbind
++ * 200-299: list, save, restore
++ */
++
++/* Single shot operations:
++ * version, create, destroy, flush, rename and swap
++ *
++ * Sets are identified by name.
++ */
++
++#define IP_SET_REQ_STD \
++ unsigned op; \
++ unsigned version; \
++ char name[IP_SET_MAXNAMELEN]
++
++#define IP_SET_OP_CREATE 0x00000001 /* Create a new (empty) set */
++struct ip_set_req_create {
++ IP_SET_REQ_STD;
++ char typename[IP_SET_MAXNAMELEN];
++};
++
++#define IP_SET_OP_DESTROY 0x00000002 /* Remove a (empty) set */
++struct ip_set_req_std {
++ IP_SET_REQ_STD;
++};
++
++#define IP_SET_OP_FLUSH 0x00000003 /* Remove all IPs in a set */
++/* Uses ip_set_req_std */
++
++#define IP_SET_OP_RENAME 0x00000004 /* Rename a set */
++/* Uses ip_set_req_create */
++
++#define IP_SET_OP_SWAP 0x00000005 /* Swap two sets */
++/* Uses ip_set_req_create */
++
++union ip_set_name_index {
++ char name[IP_SET_MAXNAMELEN];
++ ip_set_id_t index;
++};
++
++#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */
++struct ip_set_req_get_set {
++ unsigned op;
++ unsigned version;
++ union ip_set_name_index set;
++};
++
++#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */
++/* Uses ip_set_req_get_set */
++
++#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */
++struct ip_set_req_version {
++ unsigned op;
++ unsigned version;
++};
++
++/* Double shots operations:
++ * add, del, test, bind and unbind.
++ *
++ * First we query the kernel to get the index and type of the target set,
++ * then issue the command. Validity of IP is checked in kernel in order
++ * to minimalize sockopt operations.
++ */
++
++/* Get minimal set data for add/del/test/bind/unbind IP */
++#define IP_SET_OP_ADT_GET 0x00000010 /* Get set and type */
++struct ip_set_req_adt_get {
++ unsigned op;
++ unsigned version;
++ union ip_set_name_index set;
++ char typename[IP_SET_MAXNAMELEN];
++};
++
++#define IP_SET_REQ_BYINDEX \
++ unsigned op; \
++ ip_set_id_t index;
++
++struct ip_set_req_adt {
++ IP_SET_REQ_BYINDEX;
++};
++
++#define IP_SET_OP_ADD_IP 0x00000101 /* Add an IP to a set */
++/* Uses ip_set_req_adt, with type specific addage */
++
++#define IP_SET_OP_DEL_IP 0x00000102 /* Remove an IP from a set */
++/* Uses ip_set_req_adt, with type specific addage */
++
++#define IP_SET_OP_TEST_IP 0x00000103 /* Test an IP in a set */
++/* Uses ip_set_req_adt, with type specific addage */
++
++#define IP_SET_OP_BIND_SET 0x00000104 /* Bind an IP to a set */
++/* Uses ip_set_req_bind, with type specific addage */
++struct ip_set_req_bind {
++ IP_SET_REQ_BYINDEX;
++ char binding[IP_SET_MAXNAMELEN];
++};
++
++#define IP_SET_OP_UNBIND_SET 0x00000105 /* Unbind an IP from a set */
++/* Uses ip_set_req_bind, with type speficic addage
++ * index = 0 means unbinding for all sets */
++
++#define IP_SET_OP_TEST_BIND_SET 0x00000106 /* Test binding an IP to a set */
++/* Uses ip_set_req_bind, with type specific addage */
++
++/* Multiple shots operations: list, save, restore.
++ *
++ * - check kernel version and query the max number of sets
++ * - get the basic information on all sets
++ * and size required for the next step
++ * - get actual set data: header, data, bindings
++ */
++
++/* Get max_sets and the index of a queried set
++ */
++#define IP_SET_OP_MAX_SETS 0x00000020
++struct ip_set_req_max_sets {
++ unsigned op;
++ unsigned version;
++ ip_set_id_t max_sets; /* max_sets */
++ ip_set_id_t sets; /* real number of sets */
++ union ip_set_name_index set; /* index of set if name used */
++};
++
++/* Get the id and name of the sets plus size for next step */
++#define IP_SET_OP_LIST_SIZE 0x00000201
++#define IP_SET_OP_SAVE_SIZE 0x00000202
++struct ip_set_req_setnames {
++ unsigned op;
++ ip_set_id_t index; /* set to list/save */
++ size_t size; /* size to get setdata/bindings */
++ /* followed by sets number of struct ip_set_name_list */
++};
++
++struct ip_set_name_list {
++ char name[IP_SET_MAXNAMELEN];
++ char typename[IP_SET_MAXNAMELEN];
++ ip_set_id_t index;
++ ip_set_id_t id;
++};
++
++/* The actual list operation */
++#define IP_SET_OP_LIST 0x00000203
++struct ip_set_req_list {
++ IP_SET_REQ_BYINDEX;
++ /* sets number of struct ip_set_list in reply */
++};
++
++struct ip_set_list {
++ ip_set_id_t index;
++ ip_set_id_t binding;
++ u_int32_t ref;
++ size_t header_size; /* Set header data of header_size */
++ size_t members_size; /* Set members data of members_size */
++ size_t bindings_size; /* Set bindings data of bindings_size */
++};
++
++struct ip_set_hash_list {
++ ip_set_ip_t ip;
++ ip_set_id_t binding;
++};
++
++/* The save operation */
++#define IP_SET_OP_SAVE 0x00000204
++/* Uses ip_set_req_list, in the reply replaced by
++ * sets number of struct ip_set_save plus a marker
++ * ip_set_save followed by ip_set_hash_save structures.
++ */
++struct ip_set_save {
++ ip_set_id_t index;
++ ip_set_id_t binding;
++ size_t header_size; /* Set header data of header_size */
++ size_t members_size; /* Set members data of members_size */
++};
++
++/* At restoring, ip == 0 means default binding for the given set: */
++struct ip_set_hash_save {
++ ip_set_ip_t ip;
++ ip_set_id_t id;
++ ip_set_id_t binding;
++};
++
++/* The restore operation */
++#define IP_SET_OP_RESTORE 0x00000205
++/* Uses ip_set_req_setnames followed by ip_set_restore structures
++ * plus a marker ip_set_restore, followed by ip_set_hash_save
++ * structures.
++ */
++struct ip_set_restore {
++ char name[IP_SET_MAXNAMELEN];
++ char typename[IP_SET_MAXNAMELEN];
++ ip_set_id_t index;
++ size_t header_size; /* Create data of header_size */
++ size_t members_size; /* Set members data of members_size */
++};
++
++static inline int bitmap_bytes(ip_set_ip_t a, ip_set_ip_t b)
++{
++ return 4 * ((((b - a + 8) / 8) + 3) / 4);
++}
++
++#ifdef __KERNEL__
++
++#define ip_set_printk(format, args...) \
++ do { \
++ printk("%s: %s: ", __FILE__, __FUNCTION__); \
++ printk(format "\n" , ## args); \
++ } while (0)
++
++#if defined(IP_SET_DEBUG)
++#define DP(format, args...) \
++ do { \
++ printk("%s: %s (DBG): ", __FILE__, __FUNCTION__);\
++ printk(format "\n" , ## args); \
++ } while (0)
++#define IP_SET_ASSERT(x) \
++ do { \
++ if (!(x)) \
++ printk("IP_SET_ASSERT: %s:%i(%s)\n", \
++ __FILE__, __LINE__, __FUNCTION__); \
++ } while (0)
++#else
++#define DP(format, args...)
++#define IP_SET_ASSERT(x)
++#endif
++
++struct ip_set;
++
++/*
++ * The ip_set_type definition - one per set type, e.g. "ipmap".
++ *
++ * Each individual set has a pointer, set->type, going to one
++ * of these structures. Function pointers inside the structure implement
++ * the real behaviour of the sets.
++ *
++ * If not mentioned differently, the implementation behind the function
++ * pointers of a set_type, is expected to return 0 if ok, and a negative
++ * errno (e.g. -EINVAL) on error.
++ */
++struct ip_set_type {
++ struct list_head list; /* next in list of set types */
++
++ /* test for IP in set (kernel: iptables -m set src|dst)
++ * return 0 if not in set, 1 if in set.
++ */
++ int (*testip_kernel) (struct ip_set *set,
++ const struct sk_buff * skb,
++ ip_set_ip_t *ip,
++ const u_int32_t *flags,
++ unsigned char index);
++
++ /* test for IP in set (userspace: ipset -T set IP)
++ * return 0 if not in set, 1 if in set.
++ */
++ int (*testip) (struct ip_set *set,
++ const void *data, size_t size,
++ ip_set_ip_t *ip);
++
++ /*
++ * Size of the data structure passed by when
++ * adding/deletin/testing an entry.
++ */
++ size_t reqsize;
++
++ /* Add IP into set (userspace: ipset -A set IP)
++ * Return -EEXIST if the address is already in the set,
++ * and -ERANGE if the address lies outside the set bounds.
++ * If the address was not already in the set, 0 is returned.
++ */
++ int (*addip) (struct ip_set *set,
++ const void *data, size_t size,
++ ip_set_ip_t *ip);
++
++ /* Add IP into set (kernel: iptables ... -j SET set src|dst)
++ * Return -EEXIST if the address is already in the set,
++ * and -ERANGE if the address lies outside the set bounds.
++ * If the address was not already in the set, 0 is returned.
++ */
++ int (*addip_kernel) (struct ip_set *set,
++ const struct sk_buff * skb,
++ ip_set_ip_t *ip,
++ const u_int32_t *flags,
++ unsigned char index);
++
++ /* remove IP from set (userspace: ipset -D set --entry x)
++ * Return -EEXIST if the address is NOT in the set,
++ * and -ERANGE if the address lies outside the set bounds.
++ * If the address really was in the set, 0 is returned.
++ */
++ int (*delip) (struct ip_set *set,
++ const void *data, size_t size,
++ ip_set_ip_t *ip);
++
++ /* remove IP from set (kernel: iptables ... -j SET --entry x)
++ * Return -EEXIST if the address is NOT in the set,
++ * and -ERANGE if the address lies outside the set bounds.
++ * If the address really was in the set, 0 is returned.
++ */
++ int (*delip_kernel) (struct ip_set *set,
++ const struct sk_buff * skb,
++ ip_set_ip_t *ip,
++ const u_int32_t *flags,
++ unsigned char index);
++
++ /* new set creation - allocated type specific items
++ */
++ int (*create) (struct ip_set *set,
++ const void *data, size_t size);
++
++ /* retry the operation after successfully tweaking the set
++ */
++ int (*retry) (struct ip_set *set);
++
++ /* set destruction - free type specific items
++ * There is no return value.
++ * Can be called only when child sets are destroyed.
++ */
++ void (*destroy) (struct ip_set *set);
++
++ /* set flushing - reset all bits in the set, or something similar.
++ * There is no return value.
++ */
++ void (*flush) (struct ip_set *set);
++
++ /* Listing: size needed for header
++ */
++ size_t header_size;
++
++ /* Listing: Get the header
++ *
++ * Fill in the information in "data".
++ * This function is always run after list_header_size() under a
++ * writelock on the set. Therefor is the length of "data" always
++ * correct.
++ */
++ void (*list_header) (const struct ip_set *set,
++ void *data);
++
++ /* Listing: Get the size for the set members
++ */
++ int (*list_members_size) (const struct ip_set *set);
++
++ /* Listing: Get the set members
++ *
++ * Fill in the information in "data".
++ * This function is always run after list_member_size() under a
++ * writelock on the set. Therefor is the length of "data" always
++ * correct.
++ */
++ void (*list_members) (const struct ip_set *set,
++ void *data);
++
++ char typename[IP_SET_MAXNAMELEN];
++ unsigned char features;
++ int protocol_version;
++
++ /* Set this to THIS_MODULE if you are a module, otherwise NULL */
++ struct module *me;
++};
++
++extern int ip_set_register_set_type(struct ip_set_type *set_type);
++extern void ip_set_unregister_set_type(struct ip_set_type *set_type);
++
++/* A generic ipset */
++struct ip_set {
++ char name[IP_SET_MAXNAMELEN]; /* the name of the set */
++ rwlock_t lock; /* lock for concurrency control */
++ ip_set_id_t id; /* set id for swapping */
++ ip_set_id_t binding; /* default binding for the set */
++ atomic_t ref; /* in kernel and in hash references */
++ struct ip_set_type *type; /* the set types */
++ void *data; /* pooltype specific data */
++};
++
++/* Structure to bind set elements to sets */
++struct ip_set_hash {
++ struct list_head list; /* list of clashing entries in hash */
++ ip_set_ip_t ip; /* ip from set */
++ ip_set_id_t id; /* set id */
++ ip_set_id_t binding; /* set we bind the element to */
++};
++
++/* register and unregister set references */
++extern ip_set_id_t ip_set_get_byname(const char name[IP_SET_MAXNAMELEN]);
++extern ip_set_id_t ip_set_get_byindex(ip_set_id_t id);
++extern void ip_set_put(ip_set_id_t id);
++
++/* API for iptables set match, and SET target */
++extern void ip_set_addip_kernel(ip_set_id_t id,
++ const struct sk_buff *skb,
++ const u_int32_t *flags);
++extern void ip_set_delip_kernel(ip_set_id_t id,
++ const struct sk_buff *skb,
++ const u_int32_t *flags);
++extern int ip_set_testip_kernel(ip_set_id_t id,
++ const struct sk_buff *skb,
++ const u_int32_t *flags);
++
++#endif /* __KERNEL__ */
++
++#endif /*_IP_SET_H*/
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_iphash.h
+@@ -0,0 +1,30 @@
++#ifndef __IP_SET_IPHASH_H
++#define __IP_SET_IPHASH_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "iphash"
++#define MAX_RANGE 0x0000FFFF
++
++struct ip_set_iphash {
++ ip_set_ip_t *members; /* the iphash proper */
++ uint32_t elements; /* number of elements */
++ uint32_t hashsize; /* hash size */
++ uint16_t probes; /* max number of probes */
++ uint16_t resize; /* resize factor in percent */
++ ip_set_ip_t netmask; /* netmask */
++ void *initval[0]; /* initvals for jhash_1word */
++};
++
++struct ip_set_req_iphash_create {
++ uint32_t hashsize;
++ uint16_t probes;
++ uint16_t resize;
++ ip_set_ip_t netmask;
++};
++
++struct ip_set_req_iphash {
++ ip_set_ip_t ip;
++};
++
++#endif /* __IP_SET_IPHASH_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_ipmap.h
+@@ -0,0 +1,56 @@
++#ifndef __IP_SET_IPMAP_H
++#define __IP_SET_IPMAP_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "ipmap"
++#define MAX_RANGE 0x0000FFFF
++
++struct ip_set_ipmap {
++ void *members; /* the ipmap proper */
++ ip_set_ip_t first_ip; /* host byte order, included in range */
++ ip_set_ip_t last_ip; /* host byte order, included in range */
++ ip_set_ip_t netmask; /* subnet netmask */
++ ip_set_ip_t sizeid; /* size of set in IPs */
++ ip_set_ip_t hosts; /* number of hosts in a subnet */
++};
++
++struct ip_set_req_ipmap_create {
++ ip_set_ip_t from;
++ ip_set_ip_t to;
++ ip_set_ip_t netmask;
++};
++
++struct ip_set_req_ipmap {
++ ip_set_ip_t ip;
++};
++
++unsigned int
++mask_to_bits(ip_set_ip_t mask)
++{
++ unsigned int bits = 32;
++ ip_set_ip_t maskaddr;
++
++ if (mask == 0xFFFFFFFF)
++ return bits;
++
++ maskaddr = 0xFFFFFFFE;
++ while (--bits >= 0 && maskaddr != mask)
++ maskaddr <<= 1;
++
++ return bits;
++}
++
++ip_set_ip_t
++range_to_mask(ip_set_ip_t from, ip_set_ip_t to, unsigned int *bits)
++{
++ ip_set_ip_t mask = 0xFFFFFFFE;
++
++ *bits = 32;
++ while (--(*bits) >= 0 && mask && (to & mask) != from)
++ mask <<= 1;
++
++ return mask;
++}
++
++#endif /* __IP_SET_IPMAP_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_ipporthash.h
+@@ -0,0 +1,34 @@
++#ifndef __IP_SET_IPPORTHASH_H
++#define __IP_SET_IPPORTHASH_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "ipporthash"
++#define MAX_RANGE 0x0000FFFF
++#define INVALID_PORT (MAX_RANGE + 1)
++
++struct ip_set_ipporthash {
++ ip_set_ip_t *members; /* the ipporthash proper */
++ uint32_t elements; /* number of elements */
++ uint32_t hashsize; /* hash size */
++ uint16_t probes; /* max number of probes */
++ uint16_t resize; /* resize factor in percent */
++ ip_set_ip_t first_ip; /* host byte order, included in range */
++ ip_set_ip_t last_ip; /* host byte order, included in range */
++ void *initval[0]; /* initvals for jhash_1word */
++};
++
++struct ip_set_req_ipporthash_create {
++ uint32_t hashsize;
++ uint16_t probes;
++ uint16_t resize;
++ ip_set_ip_t from;
++ ip_set_ip_t to;
++};
++
++struct ip_set_req_ipporthash {
++ ip_set_ip_t ip;
++ ip_set_ip_t port;
++};
++
++#endif /* __IP_SET_IPPORTHASH_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_iptree.h
+@@ -0,0 +1,40 @@
++#ifndef __IP_SET_IPTREE_H
++#define __IP_SET_IPTREE_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "iptree"
++#define MAX_RANGE 0x0000FFFF
++
++struct ip_set_iptreed {
++ unsigned long expires[256]; /* x.x.x.ADDR */
++};
++
++struct ip_set_iptreec {
++ struct ip_set_iptreed *tree[256]; /* x.x.ADDR.* */
++};
++
++struct ip_set_iptreeb {
++ struct ip_set_iptreec *tree[256]; /* x.ADDR.*.* */
++};
++
++struct ip_set_iptree {
++ unsigned int timeout;
++ unsigned int gc_interval;
++#ifdef __KERNEL__
++ uint32_t elements; /* number of elements */
++ struct timer_list gc;
++ struct ip_set_iptreeb *tree[256]; /* ADDR.*.*.* */
++#endif
++};
++
++struct ip_set_req_iptree_create {
++ unsigned int timeout;
++};
++
++struct ip_set_req_iptree {
++ ip_set_ip_t ip;
++ unsigned int timeout;
++};
++
++#endif /* __IP_SET_IPTREE_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_iptreemap.h
+@@ -0,0 +1,40 @@
++#ifndef __IP_SET_IPTREEMAP_H
++#define __IP_SET_IPTREEMAP_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "iptreemap"
++
++#ifdef __KERNEL__
++struct ip_set_iptreemap_d {
++ unsigned char bitmap[32]; /* x.x.x.y */
++};
++
++struct ip_set_iptreemap_c {
++ struct ip_set_iptreemap_d *tree[256]; /* x.x.y.x */
++};
++
++struct ip_set_iptreemap_b {
++ struct ip_set_iptreemap_c *tree[256]; /* x.y.x.x */
++ unsigned char dirty[32];
++};
++#endif
++
++struct ip_set_iptreemap {
++ unsigned int gc_interval;
++#ifdef __KERNEL__
++ struct timer_list gc;
++ struct ip_set_iptreemap_b *tree[256]; /* y.x.x.x */
++#endif
++};
++
++struct ip_set_req_iptreemap_create {
++ unsigned int gc_interval;
++};
++
++struct ip_set_req_iptreemap {
++ ip_set_ip_t start;
++ ip_set_ip_t end;
++};
++
++#endif /* __IP_SET_IPTREEMAP_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_jhash.h
+@@ -0,0 +1,148 @@
++#ifndef _LINUX_IPSET_JHASH_H
++#define _LINUX_IPSET_JHASH_H
++
++/* This is a copy of linux/jhash.h but the types u32/u8 are changed
++ * to __u32/__u8 so that the header file can be included into
++ * userspace code as well. Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
++ */
++
++/* jhash.h: Jenkins hash support.
++ *
++ * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net)
++ *
++ * http://burtleburtle.net/bob/hash/
++ *
++ * These are the credits from Bob's sources:
++ *
++ * lookup2.c, by Bob Jenkins, December 1996, Public Domain.
++ * hash(), hash2(), hash3, and mix() are externally useful functions.
++ * Routines to test the hash are included if SELF_TEST is defined.
++ * You can use this free for any purpose. It has no warranty.
++ *
++ * Copyright (C) 2003 David S. Miller (davem@redhat.com)
++ *
++ * I've modified Bob's hash to be useful in the Linux kernel, and
++ * any bugs present are surely my fault. -DaveM
++ */
++
++/* NOTE: Arguments are modified. */
++#define __jhash_mix(a, b, c) \
++{ \
++ a -= b; a -= c; a ^= (c>>13); \
++ b -= c; b -= a; b ^= (a<<8); \
++ c -= a; c -= b; c ^= (b>>13); \
++ a -= b; a -= c; a ^= (c>>12); \
++ b -= c; b -= a; b ^= (a<<16); \
++ c -= a; c -= b; c ^= (b>>5); \
++ a -= b; a -= c; a ^= (c>>3); \
++ b -= c; b -= a; b ^= (a<<10); \
++ c -= a; c -= b; c ^= (b>>15); \
++}
++
++/* The golden ration: an arbitrary value */
++#define JHASH_GOLDEN_RATIO 0x9e3779b9
++
++/* The most generic version, hashes an arbitrary sequence
++ * of bytes. No alignment or length assumptions are made about
++ * the input key.
++ */
++static inline __u32 jhash(void *key, __u32 length, __u32 initval)
++{
++ __u32 a, b, c, len;
++ __u8 *k = key;
++
++ len = length;
++ a = b = JHASH_GOLDEN_RATIO;
++ c = initval;
++
++ while (len >= 12) {
++ a += (k[0] +((__u32)k[1]<<8) +((__u32)k[2]<<16) +((__u32)k[3]<<24));
++ b += (k[4] +((__u32)k[5]<<8) +((__u32)k[6]<<16) +((__u32)k[7]<<24));
++ c += (k[8] +((__u32)k[9]<<8) +((__u32)k[10]<<16)+((__u32)k[11]<<24));
++
++ __jhash_mix(a,b,c);
++
++ k += 12;
++ len -= 12;
++ }
++
++ c += length;
++ switch (len) {
++ case 11: c += ((__u32)k[10]<<24);
++ case 10: c += ((__u32)k[9]<<16);
++ case 9 : c += ((__u32)k[8]<<8);
++ case 8 : b += ((__u32)k[7]<<24);
++ case 7 : b += ((__u32)k[6]<<16);
++ case 6 : b += ((__u32)k[5]<<8);
++ case 5 : b += k[4];
++ case 4 : a += ((__u32)k[3]<<24);
++ case 3 : a += ((__u32)k[2]<<16);
++ case 2 : a += ((__u32)k[1]<<8);
++ case 1 : a += k[0];
++ };
++
++ __jhash_mix(a,b,c);
++
++ return c;
++}
++
++/* A special optimized version that handles 1 or more of __u32s.
++ * The length parameter here is the number of __u32s in the key.
++ */
++static inline __u32 jhash2(__u32 *k, __u32 length, __u32 initval)
++{
++ __u32 a, b, c, len;
++
++ a = b = JHASH_GOLDEN_RATIO;
++ c = initval;
++ len = length;
++
++ while (len >= 3) {
++ a += k[0];
++ b += k[1];
++ c += k[2];
++ __jhash_mix(a, b, c);
++ k += 3; len -= 3;
++ }
++
++ c += length * 4;
++
++ switch (len) {
++ case 2 : b += k[1];
++ case 1 : a += k[0];
++ };
++
++ __jhash_mix(a,b,c);
++
++ return c;
++}
++
++
++/* A special ultra-optimized versions that knows they are hashing exactly
++ * 3, 2 or 1 word(s).
++ *
++ * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally
++ * done at the end is not done here.
++ */
++static inline __u32 jhash_3words(__u32 a, __u32 b, __u32 c, __u32 initval)
++{
++ a += JHASH_GOLDEN_RATIO;
++ b += JHASH_GOLDEN_RATIO;
++ c += initval;
++
++ __jhash_mix(a, b, c);
++
++ return c;
++}
++
++static inline __u32 jhash_2words(__u32 a, __u32 b, __u32 initval)
++{
++ return jhash_3words(a, b, 0, initval);
++}
++
++static inline __u32 jhash_1word(__u32 a, __u32 initval)
++{
++ return jhash_3words(a, 0, 0, initval);
++}
++
++#endif /* _LINUX_IPSET_JHASH_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_macipmap.h
+@@ -0,0 +1,38 @@
++#ifndef __IP_SET_MACIPMAP_H
++#define __IP_SET_MACIPMAP_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "macipmap"
++#define MAX_RANGE 0x0000FFFF
++
++/* general flags */
++#define IPSET_MACIP_MATCHUNSET 1
++
++/* per ip flags */
++#define IPSET_MACIP_ISSET 1
++
++struct ip_set_macipmap {
++ void *members; /* the macipmap proper */
++ ip_set_ip_t first_ip; /* host byte order, included in range */
++ ip_set_ip_t last_ip; /* host byte order, included in range */
++ u_int32_t flags;
++};
++
++struct ip_set_req_macipmap_create {
++ ip_set_ip_t from;
++ ip_set_ip_t to;
++ u_int32_t flags;
++};
++
++struct ip_set_req_macipmap {
++ ip_set_ip_t ip;
++ unsigned char ethernet[ETH_ALEN];
++};
++
++struct ip_set_macip {
++ unsigned short flags;
++ unsigned char ethernet[ETH_ALEN];
++};
++
++#endif /* __IP_SET_MACIPMAP_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_malloc.h
+@@ -0,0 +1,116 @@
++#ifndef _IP_SET_MALLOC_H
++#define _IP_SET_MALLOC_H
++
++#ifdef __KERNEL__
++
++/* Memory allocation and deallocation */
++static size_t max_malloc_size = 0;
++
++static inline void init_max_malloc_size(void)
++{
++#define CACHE(x) max_malloc_size = x;
++#include <linux/kmalloc_sizes.h>
++#undef CACHE
++}
++
++static inline void * ip_set_malloc(size_t bytes)
++{
++ if (bytes > max_malloc_size)
++ return vmalloc(bytes);
++ else
++ return kmalloc(bytes, GFP_KERNEL);
++}
++
++static inline void ip_set_free(void * data, size_t bytes)
++{
++ if (bytes > max_malloc_size)
++ vfree(data);
++ else
++ kfree(data);
++}
++
++struct harray {
++ size_t max_elements;
++ void *arrays[0];
++};
++
++static inline void *
++harray_malloc(size_t hashsize, size_t typesize, int flags)
++{
++ struct harray *harray;
++ size_t max_elements, size, i, j;
++
++ if (!max_malloc_size)
++ init_max_malloc_size();
++
++ if (typesize > max_malloc_size)
++ return NULL;
++
++ max_elements = max_malloc_size/typesize;
++ size = hashsize/max_elements;
++ if (hashsize % max_elements)
++ size++;
++
++ /* Last pointer signals end of arrays */
++ harray = kmalloc(sizeof(struct harray) + (size + 1) * sizeof(void *),
++ flags);
++
++ if (!harray)
++ return NULL;
++
++ for (i = 0; i < size - 1; i++) {
++ harray->arrays[i] = kmalloc(max_elements * typesize, flags);
++ if (!harray->arrays[i])
++ goto undo;
++ memset(harray->arrays[i], 0, max_elements * typesize);
++ }
++ harray->arrays[i] = kmalloc((hashsize - i * max_elements) * typesize,
++ flags);
++ if (!harray->arrays[i])
++ goto undo;
++ memset(harray->arrays[i], 0, (hashsize - i * max_elements) * typesize);
++
++ harray->max_elements = max_elements;
++ harray->arrays[size] = NULL;
++
++ return (void *)harray;
++
++ undo:
++ for (j = 0; j < i; j++) {
++ kfree(harray->arrays[j]);
++ }
++ kfree(harray);
++ return NULL;
++}
++
++static inline void harray_free(void *h)
++{
++ struct harray *harray = (struct harray *) h;
++ size_t i;
++
++ for (i = 0; harray->arrays[i] != NULL; i++)
++ kfree(harray->arrays[i]);
++ kfree(harray);
++}
++
++static inline void harray_flush(void *h, size_t hashsize, size_t typesize)
++{
++ struct harray *harray = (struct harray *) h;
++ size_t i;
++
++ for (i = 0; harray->arrays[i+1] != NULL; i++)
++ memset(harray->arrays[i], 0, harray->max_elements * typesize);
++ memset(harray->arrays[i], 0,
++ (hashsize - i * harray->max_elements) * typesize);
++}
++
++#define HARRAY_ELEM(h, type, which) \
++({ \
++ struct harray *__h = (struct harray *)(h); \
++ ((type)((__h)->arrays[(which)/(__h)->max_elements]) \
++ + (which)%(__h)->max_elements); \
++})
++
++#endif /* __KERNEL__ */
++
++#endif /*_IP_SET_MALLOC_H*/
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_nethash.h
+@@ -0,0 +1,55 @@
++#ifndef __IP_SET_NETHASH_H
++#define __IP_SET_NETHASH_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "nethash"
++#define MAX_RANGE 0x0000FFFF
++
++struct ip_set_nethash {
++ ip_set_ip_t *members; /* the nethash proper */
++ uint32_t elements; /* number of elements */
++ uint32_t hashsize; /* hash size */
++ uint16_t probes; /* max number of probes */
++ uint16_t resize; /* resize factor in percent */
++ unsigned char cidr[30]; /* CIDR sizes */
++ void *initval[0]; /* initvals for jhash_1word */
++};
++
++struct ip_set_req_nethash_create {
++ uint32_t hashsize;
++ uint16_t probes;
++ uint16_t resize;
++};
++
++struct ip_set_req_nethash {
++ ip_set_ip_t ip;
++ unsigned char cidr;
++};
++
++static unsigned char shifts[] = {255, 253, 249, 241, 225, 193, 129, 1};
++
++static inline ip_set_ip_t
++pack(ip_set_ip_t ip, unsigned char cidr)
++{
++ ip_set_ip_t addr, *paddr = &addr;
++ unsigned char n, t, *a;
++
++ addr = htonl(ip & (0xFFFFFFFF << (32 - (cidr))));
++#ifdef __KERNEL__
++ DP("ip:%u.%u.%u.%u/%u", NIPQUAD(addr), cidr);
++#endif
++ n = cidr / 8;
++ t = cidr % 8;
++ a = &((unsigned char *)paddr)[n];
++ *a = *a /(1 << (8 - t)) + shifts[t];
++#ifdef __KERNEL__
++ DP("n: %u, t: %u, a: %u", n, t, *a);
++ DP("ip:%u.%u.%u.%u/%u, %u.%u.%u.%u",
++ HIPQUAD(ip), cidr, NIPQUAD(addr));
++#endif
++
++ return ntohl(addr);
++}
++
++#endif /* __IP_SET_NETHASH_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ip_set_portmap.h
+@@ -0,0 +1,25 @@
++#ifndef __IP_SET_PORTMAP_H
++#define __IP_SET_PORTMAP_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++#define SETTYPE_NAME "portmap"
++#define MAX_RANGE 0x0000FFFF
++#define INVALID_PORT (MAX_RANGE + 1)
++
++struct ip_set_portmap {
++ void *members; /* the portmap proper */
++ ip_set_ip_t first_port; /* host byte order, included in range */
++ ip_set_ip_t last_port; /* host byte order, included in range */
++};
++
++struct ip_set_req_portmap_create {
++ ip_set_ip_t from;
++ ip_set_ip_t to;
++};
++
++struct ip_set_req_portmap {
++ ip_set_ip_t port;
++};
++
++#endif /* __IP_SET_PORTMAP_H */
+--- /dev/null
++++ b/include/linux/netfilter_ipv4/ipt_set.h
+@@ -0,0 +1,21 @@
++#ifndef _IPT_SET_H
++#define _IPT_SET_H
++
++#include <linux/netfilter_ipv4/ip_set.h>
++
++struct ipt_set_info {
++ ip_set_id_t index;
++ u_int32_t flags[IP_SET_MAX_BINDINGS + 1];
++};
++
++/* match info */
++struct ipt_set_info_match {
++ struct ipt_set_info match_set;
++};
++
++struct ipt_set_info_target {
++ struct ipt_set_info add_set;
++ struct ipt_set_info del_set;
++};
++
++#endif /*_IPT_SET_H*/
+--- /dev/null
++++ b/net/ipv4/netfilter/ip_set.c
+@@ -0,0 +1,2003 @@
++/* Copyright (C) 2000-2002 Joakim Axelsson <gozem@linux.nu>
++ * Patrick Schaaf <bof@bof.de>
++ * Copyright (C) 2003-2004 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++/* Kernel module for IP set management */
++
++#include <linux/version.h>
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
++#include <linux/config.h>
++#endif
++#include <linux/module.h>
++#include <linux/moduleparam.h>
++#include <linux/kmod.h>
++#include <linux/ip.h>
++#include <linux/skbuff.h>
++#include <linux/random.h>
++#include <linux/jhash.h>
++#include <linux/netfilter_ipv4/ip_tables.h>
++#include <linux/errno.h>
++#include <linux/semaphore.h>
++#include <asm/uaccess.h>
++#include <asm/bitops.h>
++#include <linux/spinlock.h>
++#include <linux/vmalloc.h>
++
++#define ASSERT_READ_LOCK(x)
++#define ASSERT_WRITE_LOCK(x)
++#include <linux/netfilter_ipv4/ip_set.h>
++
++static struct list_head set_type_list; /* all registered sets */
++static struct ip_set **ip_set_list; /* all individual sets */
++static DEFINE_RWLOCK(ip_set_lock); /* protects the lists and the hash */
++static DECLARE_MUTEX(ip_set_app_mutex); /* serializes user access */
++static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX;
++static ip_set_id_t ip_set_bindings_hash_size = CONFIG_IP_NF_SET_HASHSIZE;
++static struct list_head *ip_set_hash; /* hash of bindings */
++static unsigned int ip_set_hash_random; /* random seed */
++
++/*
++ * Sets are identified either by the index in ip_set_list or by id.
++ * The id never changes and is used to find a key in the hash.
++ * The index may change by swapping and used at all other places
++ * (set/SET netfilter modules, binding value, etc.)
++ *
++ * Userspace requests are serialized by ip_set_mutex and sets can
++ * be deleted only from userspace. Therefore ip_set_list locking
++ * must obey the following rules:
++ *
++ * - kernel requests: read and write locking mandatory
++ * - user requests: read locking optional, write locking mandatory
++ */
++
++static inline void
++__ip_set_get(ip_set_id_t index)
++{
++ atomic_inc(&ip_set_list[index]->ref);
++}
++
++static inline void
++__ip_set_put(ip_set_id_t index)
++{
++ atomic_dec(&ip_set_list[index]->ref);
++}
++
++/*
++ * Binding routines
++ */
++
++static inline struct ip_set_hash *
++__ip_set_find(u_int32_t key, ip_set_id_t id, ip_set_ip_t ip)
++{
++ struct ip_set_hash *set_hash;
++
++ list_for_each_entry(set_hash, &ip_set_hash[key], list)
++ if (set_hash->id == id && set_hash->ip == ip)
++ return set_hash;
++
++ return NULL;
++}
++
++static ip_set_id_t
++ip_set_find_in_hash(ip_set_id_t id, ip_set_ip_t ip)
++{
++ u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
++ % ip_set_bindings_hash_size;
++ struct ip_set_hash *set_hash;
++
++ ASSERT_READ_LOCK(&ip_set_lock);
++ IP_SET_ASSERT(ip_set_list[id]);
++ DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));
++
++ set_hash = __ip_set_find(key, id, ip);
++
++ DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name,
++ HIPQUAD(ip),
++ set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
++
++ return (set_hash != NULL ? set_hash->binding : IP_SET_INVALID_ID);
++}
++
++static inline void
++__set_hash_del(struct ip_set_hash *set_hash)
++{
++ ASSERT_WRITE_LOCK(&ip_set_lock);
++ IP_SET_ASSERT(ip_set_list[set_hash->binding]);
++
++ __ip_set_put(set_hash->binding);
++ list_del(&set_hash->list);
++ kfree(set_hash);
++}
++
++static int
++ip_set_hash_del(ip_set_id_t id, ip_set_ip_t ip)
++{
++ u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
++ % ip_set_bindings_hash_size;
++ struct ip_set_hash *set_hash;
++
++ IP_SET_ASSERT(ip_set_list[id]);
++ DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip));
++ write_lock_bh(&ip_set_lock);
++ set_hash = __ip_set_find(key, id, ip);
++ DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name,
++ HIPQUAD(ip),
++ set_hash != NULL ? ip_set_list[set_hash->binding]->name : "");
++
++ if (set_hash != NULL)
++ __set_hash_del(set_hash);
++ write_unlock_bh(&ip_set_lock);
++ return 0;
++}
++
++static int
++ip_set_hash_add(ip_set_id_t id, ip_set_ip_t ip, ip_set_id_t binding)
++{
++ u_int32_t key = jhash_2words(id, ip, ip_set_hash_random)
++ % ip_set_bindings_hash_size;
++ struct ip_set_hash *set_hash;
++ int ret = 0;
++
++ IP_SET_ASSERT(ip_set_list[id]);
++ IP_SET_ASSERT(ip_set_list[binding]);
++ DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name,
++ HIPQUAD(ip), ip_set_list[binding]->name);
++ write_lock_bh(&ip_set_lock);
++ set_hash = __ip_set_find(key, id, ip);
++ if (!set_hash) {
++ set_hash = kmalloc(sizeof(struct ip_set_hash), GFP_ATOMIC);
++ if (!set_hash) {
++ ret = -ENOMEM;
++ goto unlock;
++ }
++ INIT_LIST_HEAD(&set_hash->list);
++ set_hash->id = id;
++ set_hash->ip = ip;
++ list_add(&set_hash->list, &ip_set_hash[key]);
++ } else {
++ IP_SET_ASSERT(ip_set_list[set_hash->binding]);
++ DP("overwrite binding: %s",
++ ip_set_list[set_hash->binding]->name);
++ __ip_set_put(set_hash->binding);
++ }
++ set_hash->binding = binding;
++ __ip_set_get(set_hash->binding);
++ DP("stored: key %u, id %u (%s), ip %u.%u.%u.%u, binding %u (%s)",
++ key, id, ip_set_list[id]->name,
++ HIPQUAD(ip), binding, ip_set_list[binding]->name);
++ unlock:
++ write_unlock_bh(&ip_set_lock);
++ return ret;
++}
++
++#define FOREACH_HASH_DO(fn, args...) \
++({ \
++ ip_set_id_t __key; \
++ struct ip_set_hash *__set_hash; \
++ \
++ for (__key = 0; __key < ip_set_bindings_hash_size; __key++) { \
++ list_for_each_entry(__set_hash, &ip_set_hash[__key], list) \
++ fn(__set_hash , ## args); \
++ } \
++})
++
++#define FOREACH_HASH_RW_DO(fn, args...) \
++({ \
++ ip_set_id_t __key; \
++ struct ip_set_hash *__set_hash, *__n; \
++ \
++ ASSERT_WRITE_LOCK(&ip_set_lock); \
++ for (__key = 0; __key < ip_set_bindings_hash_size; __key++) { \
++ list_for_each_entry_safe(__set_hash, __n, &ip_set_hash[__key], list)\
++ fn(__set_hash , ## args); \
++ } \
++})
++
++/* Add, del and test set entries from kernel */
++
++#define follow_bindings(index, set, ip) \
++((index = ip_set_find_in_hash((set)->id, ip)) != IP_SET_INVALID_ID \
++ || (index = (set)->binding) != IP_SET_INVALID_ID)
++
++int
++ip_set_testip_kernel(ip_set_id_t index,
++ const struct sk_buff *skb,
++ const u_int32_t *flags)
++{
++ struct ip_set *set;
++ ip_set_ip_t ip;
++ int res;
++ unsigned char i = 0;
++
++ IP_SET_ASSERT(flags[i]);
++ read_lock_bh(&ip_set_lock);
++ do {
++ set = ip_set_list[index];
++ IP_SET_ASSERT(set);
++ DP("set %s, index %u", set->name, index);
++ read_lock_bh(&set->lock);
++ res = set->type->testip_kernel(set, skb, &ip, flags, i++);
++ read_unlock_bh(&set->lock);
++ i += !!(set->type->features & IPSET_DATA_DOUBLE);
++ } while (res > 0
++ && flags[i]
++ && follow_bindings(index, set, ip));
++ read_unlock_bh(&ip_set_lock);
++
++ return res;
++}
++
++void
++ip_set_addip_kernel(ip_set_id_t index,
++ const struct sk_buff *skb,
++ const u_int32_t *flags)
++{
++ struct ip_set *set;
++ ip_set_ip_t ip;
++ int res;
++ unsigned char i = 0;
++
++ IP_SET_ASSERT(flags[i]);
++ retry:
++ read_lock_bh(&ip_set_lock);
++ do {
++ set = ip_set_list[index];
++ IP_SET_ASSERT(set);
++ DP("set %s, index %u", set->name, index);
++ write_lock_bh(&set->lock);
++ res = set->type->addip_kernel(set, skb, &ip, flags, i++);
++ write_unlock_bh(&set->lock);
++ i += !!(set->type->features & IPSET_DATA_DOUBLE);
++ } while ((res == 0 || res == -EEXIST)
++ && flags[i]
++ && follow_bindings(index, set, ip));
++ read_unlock_bh(&ip_set_lock);
++
++ if (res == -EAGAIN
++ && set->type->retry
++ && (res = set->type->retry(set)) == 0)
++ goto retry;
++}
++
++void
++ip_set_delip_kernel(ip_set_id_t index,
++ const struct sk_buff *skb,
++ const u_int32_t *flags)
++{
++ struct ip_set *set;
++ ip_set_ip_t ip;
++ int res;
++ unsigned char i = 0;
++
++ IP_SET_ASSERT(flags[i]);
++ read_lock_bh(&ip_set_lock);
++ do {
++ set = ip_set_list[index];
++ IP_SET_ASSERT(set);
++ DP("set %s, index %u", set->name, index);
++ write_lock_bh(&set->lock);
++ res = set->type->delip_kernel(set, skb, &ip, flags, i++);
++ write_unlock_bh(&set->lock);
++ i += !!(set->type->features & IPSET_DATA_DOUBLE);
++ } while ((res == 0 || res == -EEXIST)
++ && flags[i]
++ && follow_bindings(index, set, ip));
++ read_unlock_bh(&ip_set_lock);
++}
++
++/* Register and deregister settype */
++
++static inline struct ip_set_type *
++find_set_type(const char *name)
++{
++ struct ip_set_type *set_type;
++
++ list_for_each_entry(set_type, &set_type_list, list)
++ if (!strncmp(set_type->typename, name, IP_SET_MAXNAMELEN - 1))
++ return set_type;
++ return NULL;
++}
++
++int
++ip_set_register_set_type(struct ip_set_type *set_type)
++{
++ int ret = 0;
++
++ if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) {
++ ip_set_printk("'%s' uses wrong protocol version %u (want %u)",
++ set_type->typename,
++ set_type->protocol_version,
++ IP_SET_PROTOCOL_VERSION);
++ return -EINVAL;
++ }
++
++ write_lock_bh(&ip_set_lock);
++ if (find_set_type(set_type->typename)) {
++ /* Duplicate! */
++ ip_set_printk("'%s' already registered!",
++ set_type->typename);
++ ret = -EINVAL;
++ goto unlock;
++ }
++ if (!try_module_get(THIS_MODULE)) {
++ ret = -EFAULT;
++ goto unlock;
++ }
++ list_add(&set_type->list, &set_type_list);
++ DP("'%s' registered.", set_type->typename);
++ unlock:
++ write_unlock_bh(&ip_set_lock);
++ return ret;
++}
++
++void
++ip_set_unregister_set_type(struct ip_set_type *set_type)
++{
++ write_lock_bh(&ip_set_lock);
++ if (!find_set_type(set_type->typename)) {
++ ip_set_printk("'%s' not registered?",
++ set_type->typename);
++ goto unlock;
++ }
++ list_del(&set_type->list);
++ module_put(THIS_MODULE);
++ DP("'%s' unregistered.", set_type->typename);
++ unlock:
++ write_unlock_bh(&ip_set_lock);
++
++}
++
++/*
++ * Userspace routines
++ */
++
++/*
++ * Find set by name, reference it once. The reference makes sure the
++ * thing pointed to, does not go away under our feet. Drop the reference
++ * later, using ip_set_put().
++ */
++ip_set_id_t
++ip_set_get_byname(const char *name)
++{
++ ip_set_id_t i, index = IP_SET_INVALID_ID;
++
++ down(&ip_set_app_mutex);
++ for (i = 0; i < ip_set_max; i++) {
++ if (ip_set_list[i] != NULL
++ && strcmp(ip_set_list[i]->name, name) == 0) {
++ __ip_set_get(i);
++ index = i;
++ break;
++ }
++ }
++ up(&ip_set_app_mutex);
++ return index;
++}
++
++/*
++ * Find set by index, reference it once. The reference makes sure the
++ * thing pointed to, does not go away under our feet. Drop the reference
++ * later, using ip_set_put().
++ */
++ip_set_id_t
++ip_set_get_byindex(ip_set_id_t index)
++{
++ down(&ip_set_app_mutex);
++
++ if (index >= ip_set_max)
++ return IP_SET_INVALID_ID;
++
++ if (ip_set_list[index])
++ __ip_set_get(index);
++ else
++ index = IP_SET_INVALID_ID;
++
++ up(&ip_set_app_mutex);
++ return index;
++}
++
++/*
++ * If the given set pointer points to a valid set, decrement
++ * reference count by 1. The caller shall not assume the index
++ * to be valid, after calling this function.
++ */
++void ip_set_put(ip_set_id_t index)
++{
++ down(&ip_set_app_mutex);
++ if (ip_set_list[index])
++ __ip_set_put(index);
++ up(&ip_set_app_mutex);
++}
++
++/* Find a set by name or index */
++static ip_set_id_t
++ip_set_find_byname(const char *name)
++{
++ ip_set_id_t i, index = IP_SET_INVALID_ID;
++
++ for (i = 0; i < ip_set_max; i++) {
++ if (ip_set_list[i] != NULL
++ && strcmp(ip_set_list[i]->name, name) == 0) {
++ index = i;
++ break;
++ }
++ }
++ return index;
++}
++
++static ip_set_id_t
++ip_set_find_byindex(ip_set_id_t index)
++{
++ if (index >= ip_set_max || ip_set_list[index] == NULL)
++ index = IP_SET_INVALID_ID;
++
++ return index;
++}
++
++/*
++ * Add, del, test, bind and unbind
++ */
++
++static inline int
++__ip_set_testip(struct ip_set *set,
++ const void *data,
++ size_t size,
++ ip_set_ip_t *ip)
++{
++ int res;
++
++ read_lock_bh(&set->lock);
++ res = set->type->testip(set, data, size, ip);
++ read_unlock_bh(&set->lock);
++
++ return res;
++}
++
++static int
++__ip_set_addip(ip_set_id_t index,
++ const void *data,
++ size_t size)
++{
++ struct ip_set *set = ip_set_list[index];
++ ip_set_ip_t ip;
++ int res;
++
++ IP_SET_ASSERT(set);
++ do {
++ write_lock_bh(&set->lock);
++ res = set->type->addip(set, data, size, &ip);
++ write_unlock_bh(&set->lock);
++ } while (res == -EAGAIN
++ && set->type->retry
++ && (res = set->type->retry(set)) == 0);
++
++ return res;
++}
++
++static int
++ip_set_addip(ip_set_id_t index,
++ const void *data,
++ size_t size)
++{
++
++ return __ip_set_addip(index,
++ data + sizeof(struct ip_set_req_adt),
++ size - sizeof(struct ip_set_req_adt));
++}
++
++static int
++ip_set_delip(ip_set_id_t index,
++ const void *data,
++ size_t size)
++{
++ struct ip_set *set = ip_set_list[index];
++ ip_set_ip_t ip;
++ int res;
++
++ IP_SET_ASSERT(set);
++ write_lock_bh(&set->lock);
++ res = set->type->delip(set,
++ data + sizeof(struct ip_set_req_adt),
++ size - sizeof(struct ip_set_req_adt),
++ &ip);
++ write_unlock_bh(&set->lock);
++
++ return res;
++}
++
++static int
++ip_set_testip(ip_set_id_t index,
++ const void *data,
++ size_t size)
++{
++ struct ip_set *set = ip_set_list[index];
++ ip_set_ip_t ip;
++ int res;
++
++ IP_SET_ASSERT(set);
++ res = __ip_set_testip(set,
++ data + sizeof(struct ip_set_req_adt),
++ size - sizeof(struct ip_set_req_adt),
++ &ip);
++
++ return (res > 0 ? -EEXIST : res);
++}
++
++static int
++ip_set_bindip(ip_set_id_t index,
++ const void *data,
++ size_t size)
++{
++ struct ip_set *set = ip_set_list[index];
++ struct ip_set_req_bind *req_bind;
++ ip_set_id_t binding;
++ ip_set_ip_t ip;
++ int res;
++
++ IP_SET_ASSERT(set);
++ if (size < sizeof(struct ip_set_req_bind))
++ return -EINVAL;
++
++ req_bind = (struct ip_set_req_bind *) data;
++ req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
++
++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
++ /* Default binding of a set */
++ char *binding_name;
++
++ if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
++ return -EINVAL;
++
++ binding_name = (char *)(data + sizeof(struct ip_set_req_bind));
++ binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
++
++ binding = ip_set_find_byname(binding_name);
++ if (binding == IP_SET_INVALID_ID)
++ return -ENOENT;
++
++ write_lock_bh(&ip_set_lock);
++ /* Sets as binding values are referenced */
++ if (set->binding != IP_SET_INVALID_ID)
++ __ip_set_put(set->binding);
++ set->binding = binding;
++ __ip_set_get(set->binding);
++ write_unlock_bh(&ip_set_lock);
++
++ return 0;
++ }
++ binding = ip_set_find_byname(req_bind->binding);
++ if (binding == IP_SET_INVALID_ID)
++ return -ENOENT;
++
++ res = __ip_set_testip(set,
++ data + sizeof(struct ip_set_req_bind),
++ size - sizeof(struct ip_set_req_bind),
++ &ip);
++ DP("set %s, ip: %u.%u.%u.%u, binding %s",
++ set->name, HIPQUAD(ip), ip_set_list[binding]->name);
++
++ if (res >= 0)
++ res = ip_set_hash_add(set->id, ip, binding);
++
++ return res;
++}
++
++#define FOREACH_SET_DO(fn, args...) \
++({ \
++ ip_set_id_t __i; \
++ struct ip_set *__set; \
++ \
++ for (__i = 0; __i < ip_set_max; __i++) { \
++ __set = ip_set_list[__i]; \
++ if (__set != NULL) \
++ fn(__set , ##args); \
++ } \
++})
++
++static inline void
++__set_hash_del_byid(struct ip_set_hash *set_hash, ip_set_id_t id)
++{
++ if (set_hash->id == id)
++ __set_hash_del(set_hash);
++}
++
++static inline void
++__unbind_default(struct ip_set *set)
++{
++ if (set->binding != IP_SET_INVALID_ID) {
++ /* Sets as binding values are referenced */
++ __ip_set_put(set->binding);
++ set->binding = IP_SET_INVALID_ID;
++ }
++}
++
++static int
++ip_set_unbindip(ip_set_id_t index,
++ const void *data,
++ size_t size)
++{
++ struct ip_set *set;
++ struct ip_set_req_bind *req_bind;
++ ip_set_ip_t ip;
++ int res;
++
++ DP("");
++ if (size < sizeof(struct ip_set_req_bind))
++ return -EINVAL;
++
++ req_bind = (struct ip_set_req_bind *) data;
++ req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
++
++ DP("%u %s", index, req_bind->binding);
++ if (index == IP_SET_INVALID_ID) {
++ /* unbind :all: */
++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
++ /* Default binding of sets */
++ write_lock_bh(&ip_set_lock);
++ FOREACH_SET_DO(__unbind_default);
++ write_unlock_bh(&ip_set_lock);
++ return 0;
++ } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
++ /* Flush all bindings of all sets*/
++ write_lock_bh(&ip_set_lock);
++ FOREACH_HASH_RW_DO(__set_hash_del);
++ write_unlock_bh(&ip_set_lock);
++ return 0;
++ }
++ DP("unreachable reached!");
++ return -EINVAL;
++ }
++
++ set = ip_set_list[index];
++ IP_SET_ASSERT(set);
++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
++ /* Default binding of set */
++ ip_set_id_t binding = ip_set_find_byindex(set->binding);
++
++ if (binding == IP_SET_INVALID_ID)
++ return -ENOENT;
++
++ write_lock_bh(&ip_set_lock);
++ /* Sets in hash values are referenced */
++ __ip_set_put(set->binding);
++ set->binding = IP_SET_INVALID_ID;
++ write_unlock_bh(&ip_set_lock);
++
++ return 0;
++ } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) {
++ /* Flush all bindings */
++
++ write_lock_bh(&ip_set_lock);
++ FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
++ write_unlock_bh(&ip_set_lock);
++ return 0;
++ }
++
++ res = __ip_set_testip(set,
++ data + sizeof(struct ip_set_req_bind),
++ size - sizeof(struct ip_set_req_bind),
++ &ip);
++
++ DP("set %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip));
++ if (res >= 0)
++ res = ip_set_hash_del(set->id, ip);
++
++ return res;
++}
++
++static int
++ip_set_testbind(ip_set_id_t index,
++ const void *data,
++ size_t size)
++{
++ struct ip_set *set = ip_set_list[index];
++ struct ip_set_req_bind *req_bind;
++ ip_set_id_t binding;
++ ip_set_ip_t ip;
++ int res;
++
++ IP_SET_ASSERT(set);
++ if (size < sizeof(struct ip_set_req_bind))
++ return -EINVAL;
++
++ req_bind = (struct ip_set_req_bind *) data;
++ req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0';
++
++ if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) {
++ /* Default binding of set */
++ char *binding_name;
++
++ if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN)
++ return -EINVAL;
++
++ binding_name = (char *)(data + sizeof(struct ip_set_req_bind));
++ binding_name[IP_SET_MAXNAMELEN - 1] = '\0';
++
++ binding = ip_set_find_byname(binding_name);
++ if (binding == IP_SET_INVALID_ID)
++ return -ENOENT;
++
++ res = (set->binding == binding) ? -EEXIST : 0;
++
++ return res;
++ }
++ binding = ip_set_find_byname(req_bind->binding);
++ if (binding == IP_SET_INVALID_ID)
++ return -ENOENT;
++
++
++ res = __ip_set_testip(set,
++ data + sizeof(struct ip_set_req_bind),
++ size - sizeof(struct ip_set_req_bind),
++ &ip);
++ DP("set %s, ip: %u.%u.%u.%u, binding %s",
++ set->name, HIPQUAD(ip), ip_set_list[binding]->name);
++
++ if (res >= 0)
++ res = (ip_set_find_in_hash(set->id, ip) == binding)
++ ? -EEXIST : 0;
++
++ return res;
++}
++
++static struct ip_set_type *
++find_set_type_rlock(const char *typename)
++{
++ struct ip_set_type *type;
++
++ read_lock_bh(&ip_set_lock);
++ type = find_set_type(typename);
++ if (type == NULL)
++ read_unlock_bh(&ip_set_lock);
++
++ return type;
++}
++
++static int
++find_free_id(const char *name,
++ ip_set_id_t *index,
++ ip_set_id_t *id)
++{
++ ip_set_id_t i;
++
++ *id = IP_SET_INVALID_ID;
++ for (i = 0; i < ip_set_max; i++) {
++ if (ip_set_list[i] == NULL) {
++ if (*id == IP_SET_INVALID_ID)
++ *id = *index = i;
++ } else if (strcmp(name, ip_set_list[i]->name) == 0)
++ /* Name clash */
++ return -EEXIST;
++ }
++ if (*id == IP_SET_INVALID_ID)
++ /* No free slot remained */
++ return -ERANGE;
++ /* Check that index is usable as id (swapping) */
++ check:
++ for (i = 0; i < ip_set_max; i++) {
++ if (ip_set_list[i] != NULL
++ && ip_set_list[i]->id == *id) {
++ *id = i;
++ goto check;
++ }
++ }
++ return 0;
++}
++
++/*
++ * Create a set
++ */
++static int
++ip_set_create(const char *name,
++ const char *typename,
++ ip_set_id_t restore,
++ const void *data,
++ size_t size)
++{
++ struct ip_set *set;
++ ip_set_id_t index = 0, id;
++ int res = 0;
++
++ DP("setname: %s, typename: %s, id: %u", name, typename, restore);
++ /*
++ * First, and without any locks, allocate and initialize
++ * a normal base set structure.
++ */
++ set = kmalloc(sizeof(struct ip_set), GFP_KERNEL);
++ if (!set)
++ return -ENOMEM;
++ set->lock = RW_LOCK_UNLOCKED;
++ strncpy(set->name, name, IP_SET_MAXNAMELEN);
++ set->binding = IP_SET_INVALID_ID;
++ atomic_set(&set->ref, 0);
++
++ /*
++ * Next, take the &ip_set_lock, check that we know the type,
++ * and take a reference on the type, to make sure it
++ * stays available while constructing our new set.
++ *
++ * After referencing the type, we drop the &ip_set_lock,
++ * and let the new set construction run without locks.
++ */
++ set->type = find_set_type_rlock(typename);
++ if (set->type == NULL) {
++ /* Try loading the module */
++ char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1];
++ strcpy(modulename, "ip_set_");
++ strcat(modulename, typename);
++ DP("try to load %s", modulename);
++ request_module(modulename);
++ set->type = find_set_type_rlock(typename);
++ }
++ if (set->type == NULL) {
++ ip_set_printk("no set type '%s', set '%s' not created",
++ typename, name);
++ res = -ENOENT;
++ goto out;
++ }
++ if (!try_module_get(set->type->me)) {
++ read_unlock_bh(&ip_set_lock);
++ res = -EFAULT;
++ goto out;
++ }
++ read_unlock_bh(&ip_set_lock);
++
++ /*
++ * Without holding any locks, create private part.
++ */
++ res = set->type->create(set, data, size);
++ if (res != 0)
++ goto put_out;
++
++ /* BTW, res==0 here. */
++
++ /*
++ * Here, we have a valid, constructed set. &ip_set_lock again,
++ * find free id/index and check that it is not already in
++ * ip_set_list.
++ */
++ write_lock_bh(&ip_set_lock);
++ if ((res = find_free_id(set->name, &index, &id)) != 0) {
++ DP("no free id!");
++ goto cleanup;
++ }
++
++ /* Make sure restore gets the same index */
++ if (restore != IP_SET_INVALID_ID && index != restore) {
++ DP("Can't restore, sets are screwed up");
++ res = -ERANGE;
++ goto cleanup;
++ }
++
++ /*
++ * Finally! Add our shiny new set to the list, and be done.
++ */
++ DP("create: '%s' created with index %u, id %u!", set->name, index, id);
++ set->id = id;
++ ip_set_list[index] = set;
++ write_unlock_bh(&ip_set_lock);
++ return res;
++
++ cleanup:
++ write_unlock_bh(&ip_set_lock);
++ set->type->destroy(set);
++ put_out:
++ module_put(set->type->me);
++ out:
++ kfree(set);
++ return res;
++}
++
++/*
++ * Destroy a given existing set
++ */
++static void
++ip_set_destroy_set(ip_set_id_t index)
++{
++ struct ip_set *set = ip_set_list[index];
++
++ IP_SET_ASSERT(set);
++ DP("set: %s", set->name);
++ write_lock_bh(&ip_set_lock);
++ FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id);
++ if (set->binding != IP_SET_INVALID_ID)
++ __ip_set_put(set->binding);
++ ip_set_list[index] = NULL;
++ write_unlock_bh(&ip_set_lock);
++
++ /* Must call it without holding any lock */
++ set->type->destroy(set);
++ module_put(set->type->me);
++ kfree(set);
++}
++
++/*
++ * Destroy a set - or all sets
++ * Sets must not be referenced/used.
++ */
++static int
++ip_set_destroy(ip_set_id_t index)
++{
++ ip_set_id_t i;
++
++ /* ref modification always protected by the mutex */
++ if (index != IP_SET_INVALID_ID) {
++ if (atomic_read(&ip_set_list[index]->ref))
++ return -EBUSY;
++ ip_set_destroy_set(index);
++ } else {
++ for (i = 0; i < ip_set_max; i++) {
++ if (ip_set_list[i] != NULL
++ && (atomic_read(&ip_set_list[i]->ref)))
++ return -EBUSY;
++ }
++
++ for (i = 0; i < ip_set_max; i++) {
++ if (ip_set_list[i] != NULL)
++ ip_set_destroy_set(i);
++ }
++ }
++ return 0;
++}
++
++static void
++ip_set_flush_set(struct ip_set *set)
++{
++ DP("set: %s %u", set->name, set->id);
++
++ write_lock_bh(&set->lock);
++ set->type->flush(set);
++ write_unlock_bh(&set->lock);
++}
++
++/*
++ * Flush data in a set - or in all sets
++ */
++static int
++ip_set_flush(ip_set_id_t index)
++{
++ if (index != IP_SET_INVALID_ID) {
++ IP_SET_ASSERT(ip_set_list[index]);
++ ip_set_flush_set(ip_set_list[index]);
++ } else
++ FOREACH_SET_DO(ip_set_flush_set);
++
++ return 0;
++}
++
++/* Rename a set */
++static int
++ip_set_rename(ip_set_id_t index, const char *name)
++{
++ struct ip_set *set = ip_set_list[index];
++ ip_set_id_t i;
++ int res = 0;
++
++ DP("set: %s to %s", set->name, name);
++ write_lock_bh(&ip_set_lock);
++ for (i = 0; i < ip_set_max; i++) {
++ if (ip_set_list[i] != NULL
++ && strncmp(ip_set_list[i]->name,
++ name,
++ IP_SET_MAXNAMELEN - 1) == 0) {
++ res = -EEXIST;
++ goto unlock;
++ }
++ }
++ strncpy(set->name, name, IP_SET_MAXNAMELEN);
++ unlock:
++ write_unlock_bh(&ip_set_lock);
++ return res;
++}
++
++/*
++ * Swap two sets so that name/index points to the other.
++ * References are also swapped.
++ */
++static int
++ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index)
++{
++ struct ip_set *from = ip_set_list[from_index];
++ struct ip_set *to = ip_set_list[to_index];
++ char from_name[IP_SET_MAXNAMELEN];
++ u_int32_t from_ref;
++
++ DP("set: %s to %s", from->name, to->name);
++ /* Features must not change. Artifical restriction. */
++ if (from->type->features != to->type->features)
++ return -ENOEXEC;
++
++ /* No magic here: ref munging protected by the mutex */
++ write_lock_bh(&ip_set_lock);
++ strncpy(from_name, from->name, IP_SET_MAXNAMELEN);
++ from_ref = atomic_read(&from->ref);
++
++ strncpy(from->name, to->name, IP_SET_MAXNAMELEN);
++ atomic_set(&from->ref, atomic_read(&to->ref));
++ strncpy(to->name, from_name, IP_SET_MAXNAMELEN);
++ atomic_set(&to->ref, from_ref);
++
++ ip_set_list[from_index] = to;
++ ip_set_list[to_index] = from;
++
++ write_unlock_bh(&ip_set_lock);
++ return 0;
++}
++
++/*
++ * List set data
++ */
++
++static inline void
++__set_hash_bindings_size_list(struct ip_set_hash *set_hash,
++ ip_set_id_t id, size_t *size)
++{
++ if (set_hash->id == id)
++ *size += sizeof(struct ip_set_hash_list);
++}
++
++static inline void
++__set_hash_bindings_size_save(struct ip_set_hash *set_hash,
++ ip_set_id_t id, size_t *size)
++{
++ if (set_hash->id == id)
++ *size += sizeof(struct ip_set_hash_save);
++}
++
++static inline void
++__set_hash_bindings(struct ip_set_hash *set_hash,
++ ip_set_id_t id, void *data, int *used)
++{
++ if (set_hash->id == id) {
++ struct ip_set_hash_list *hash_list =
++ (struct ip_set_hash_list *)(data + *used);
++
++ hash_list->ip = set_hash->ip;
++ hash_list->binding = set_hash->binding;
++ *used += sizeof(struct ip_set_hash_list);
++ }
++}
++
++static int ip_set_list_set(ip_set_id_t index,
++ void *data,
++ int *used,
++ int len)
++{
++ struct ip_set *set = ip_set_list[index];
++ struct ip_set_list *set_list;
++
++ /* Pointer to our header */
++ set_list = (struct ip_set_list *) (data + *used);
++
++ DP("set: %s, used: %d %p %p", set->name, *used, data, data + *used);
++
++ /* Get and ensure header size */
++ if (*used + sizeof(struct ip_set_list) > len)
++ goto not_enough_mem;
++ *used += sizeof(struct ip_set_list);
++
++ read_lock_bh(&set->lock);
++ /* Get and ensure set specific header size */
++ set_list->header_size = set->type->header_size;
++ if (*used + set_list->header_size > len)
++ goto unlock_set;
++
++ /* Fill in the header */
++ set_list->index = index;
++ set_list->binding = set->binding;
++ set_list->ref = atomic_read(&set->ref);
++
++ /* Fill in set spefific header data */
++ set->type->list_header(set, data + *used);
++ *used += set_list->header_size;
++
++ /* Get and ensure set specific members size */
++ set_list->members_size = set->type->list_members_size(set);
++ if (*used + set_list->members_size > len)
++ goto unlock_set;
++
++ /* Fill in set spefific members data */
++ set->type->list_members(set, data + *used);
++ *used += set_list->members_size;
++ read_unlock_bh(&set->lock);
++
++ /* Bindings */
++
++ /* Get and ensure set specific bindings size */
++ set_list->bindings_size = 0;
++ FOREACH_HASH_DO(__set_hash_bindings_size_list,
++ set->id, &set_list->bindings_size);
++ if (*used + set_list->bindings_size > len)
++ goto not_enough_mem;
++
++ /* Fill in set spefific bindings data */
++ FOREACH_HASH_DO(__set_hash_bindings, set->id, data, used);
++
++ return 0;
++
++ unlock_set:
++ read_unlock_bh(&set->lock);
++ not_enough_mem:
++ DP("not enough mem, try again");
++ return -EAGAIN;
++}
++
++/*
++ * Save sets
++ */
++static int ip_set_save_set(ip_set_id_t index,
++ void *data,
++ int *used,
++ int len)
++{
++ struct ip_set *set;
++ struct ip_set_save *set_save;
++
++ /* Pointer to our header */
++ set_save = (struct ip_set_save *) (data + *used);
++
++ /* Get and ensure header size */
++ if (*used + sizeof(struct ip_set_save) > len)
++ goto not_enough_mem;
++ *used += sizeof(struct ip_set_save);
++
++ set = ip_set_list[index];
++ DP("set: %s, used: %u(%u) %p %p", set->name, *used, len,
++ data, data + *used);
++
++ read_lock_bh(&set->lock);
++ /* Get and ensure set specific header size */
++ set_save->header_size = set->type->header_size;
++ if (*used + set_save->header_size > len)
++ goto unlock_set;
++
++ /* Fill in the header */
++ set_save->index = index;
++ set_save->binding = set->binding;
++
++ /* Fill in set spefific header data */
++ set->type->list_header(set, data + *used);
++ *used += set_save->header_size;
++
++ DP("set header filled: %s, used: %u(%u) %p %p", set->name, *used,
++ set_save->header_size, data, data + *used);
++ /* Get and ensure set specific members size */
++ set_save->members_size = set->type->list_members_size(set);
++ if (*used + set_save->members_size > len)
++ goto unlock_set;
++
++ /* Fill in set spefific members data */
++ set->type->list_members(set, data + *used);
++ *used += set_save->members_size;
++ read_unlock_bh(&set->lock);
++ DP("set members filled: %s, used: %u(%u) %p %p", set->name, *used,
++ set_save->members_size, data, data + *used);
++ return 0;
++
++ unlock_set:
++ read_unlock_bh(&set->lock);
++ not_enough_mem:
++ DP("not enough mem, try again");
++ return -EAGAIN;
++}
++
++static inline void
++__set_hash_save_bindings(struct ip_set_hash *set_hash,
++ ip_set_id_t id,
++ void *data,
++ int *used,
++ int len,
++ int *res)
++{
++ if (*res == 0
++ && (id == IP_SET_INVALID_ID || set_hash->id == id)) {
++ struct ip_set_hash_save *hash_save =
++ (struct ip_set_hash_save *)(data + *used);
++ /* Ensure bindings size */
++ if (*used + sizeof(struct ip_set_hash_save) > len) {
++ *res = -ENOMEM;
++ return;
++ }
++ hash_save->id = set_hash->id;
++ hash_save->ip = set_hash->ip;
++ hash_save->binding = set_hash->binding;
++ *used += sizeof(struct ip_set_hash_save);
++ }
++}
++
++static int ip_set_save_bindings(ip_set_id_t index,
++ void *data,
++ int *used,
++ int len)
++{
++ int res = 0;
++ struct ip_set_save *set_save;
++
++ DP("used %u, len %u", *used, len);
++ /* Get and ensure header size */
++ if (*used + sizeof(struct ip_set_save) > len)
++ return -ENOMEM;
++
++ /* Marker */
++ set_save = (struct ip_set_save *) (data + *used);
++ set_save->index = IP_SET_INVALID_ID;
++ set_save->header_size = 0;
++ set_save->members_size = 0;
++ *used += sizeof(struct ip_set_save);
++
++ DP("marker added used %u, len %u", *used, len);
++ /* Fill in bindings data */
++ if (index != IP_SET_INVALID_ID)
++ /* Sets are identified by id in hash */
++ index = ip_set_list[index]->id;
++ FOREACH_HASH_DO(__set_hash_save_bindings, index, data, used, len, &res);
++
++ return res;
++}
++
++/*
++ * Restore sets
++ */
++static int ip_set_restore(void *data,
++ int len)
++{
++ int res = 0;
++ int line = 0, used = 0, members_size;
++ struct ip_set *set;
++ struct ip_set_hash_save *hash_save;
++ struct ip_set_restore *set_restore;
++ ip_set_id_t index;
++
++ /* Loop to restore sets */
++ while (1) {
++ line++;
++
++ DP("%u %u %u", used, sizeof(struct ip_set_restore), len);
++ /* Get and ensure header size */
++ if (used + sizeof(struct ip_set_restore) > len)
++ return line;
++ set_restore = (struct ip_set_restore *) (data + used);
++ used += sizeof(struct ip_set_restore);
++
++ /* Ensure data size */
++ if (used
++ + set_restore->header_size
++ + set_restore->members_size > len)
++ return line;
++
++ /* Check marker */
++ if (set_restore->index == IP_SET_INVALID_ID) {
++ line--;
++ goto bindings;
++ }
++
++ /* Try to create the set */
++ DP("restore %s %s", set_restore->name, set_restore->typename);
++ res = ip_set_create(set_restore->name,
++ set_restore->typename,
++ set_restore->index,
++ data + used,
++ set_restore->header_size);
++
++ if (res != 0)
++ return line;
++ used += set_restore->header_size;
++
++ index = ip_set_find_byindex(set_restore->index);
++ DP("index %u, restore_index %u", index, set_restore->index);
++ if (index != set_restore->index)
++ return line;
++ /* Try to restore members data */
++ set = ip_set_list[index];
++ members_size = 0;
++ DP("members_size %u reqsize %u",
++ set_restore->members_size, set->type->reqsize);
++ while (members_size + set->type->reqsize <=
++ set_restore->members_size) {
++ line++;
++ DP("members: %u, line %u", members_size, line);
++ res = __ip_set_addip(index,
++ data + used + members_size,
++ set->type->reqsize);
++ if (!(res == 0 || res == -EEXIST))
++ return line;
++ members_size += set->type->reqsize;
++ }
++
++ DP("members_size %u %u",
++ set_restore->members_size, members_size);
++ if (members_size != set_restore->members_size)
++ return line++;
++ used += set_restore->members_size;
++ }
++
++ bindings:
++ /* Loop to restore bindings */
++ while (used < len) {
++ line++;
++
++ DP("restore binding, line %u", line);
++ /* Get and ensure size */
++ if (used + sizeof(struct ip_set_hash_save) > len)
++ return line;
++ hash_save = (struct ip_set_hash_save *) (data + used);
++ used += sizeof(struct ip_set_hash_save);
++
++ /* hash_save->id is used to store the index */
++ index = ip_set_find_byindex(hash_save->id);
++ DP("restore binding index %u, id %u, %u -> %u",
++ index, hash_save->id, hash_save->ip, hash_save->binding);
++ if (index != hash_save->id)
++ return line;
++ if (ip_set_find_byindex(hash_save->binding) == IP_SET_INVALID_ID) {
++ DP("corrupt binding set index %u", hash_save->binding);
++ return line;
++ }
++ set = ip_set_list[hash_save->id];
++ /* Null valued IP means default binding */
++ if (hash_save->ip)
++ res = ip_set_hash_add(set->id,
++ hash_save->ip,
++ hash_save->binding);
++ else {
++ IP_SET_ASSERT(set->binding == IP_SET_INVALID_ID);
++ write_lock_bh(&ip_set_lock);
++ set->binding = hash_save->binding;
++ __ip_set_get(set->binding);
++ write_unlock_bh(&ip_set_lock);
++ DP("default binding: %u", set->binding);
++ }
++ if (res != 0)
++ return line;
++ }
++ if (used != len)
++ return line;
++
++ return 0;
++}
++
++static int
++ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len)
++{
++ void *data;
++ int res = 0; /* Assume OK */
++ unsigned *op;
++ struct ip_set_req_adt *req_adt;
++ ip_set_id_t index = IP_SET_INVALID_ID;
++ int (*adtfn)(ip_set_id_t index,
++ const void *data, size_t size);
++ struct fn_table {
++ int (*fn)(ip_set_id_t index,
++ const void *data, size_t size);
++ } adtfn_table[] =
++ { { ip_set_addip }, { ip_set_delip }, { ip_set_testip},
++ { ip_set_bindip}, { ip_set_unbindip }, { ip_set_testbind },
++ };
++
++ DP("optval=%d, user=%p, len=%d", optval, user, len);
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++ if (optval != SO_IP_SET)
++ return -EBADF;
++ if (len <= sizeof(unsigned)) {
++ ip_set_printk("short userdata (want >%zu, got %u)",
++ sizeof(unsigned), len);
++ return -EINVAL;
++ }
++ data = vmalloc(len);
++ if (!data) {
++ DP("out of mem for %u bytes", len);
++ return -ENOMEM;
++ }
++ if (copy_from_user(data, user, len) != 0) {
++ res = -EFAULT;
++ goto done;
++ }
++ if (down_interruptible(&ip_set_app_mutex)) {
++ res = -EINTR;
++ goto done;
++ }
++
++ op = (unsigned *)data;
++ DP("op=%x", *op);
++
++ if (*op < IP_SET_OP_VERSION) {
++ /* Check the version at the beginning of operations */
++ struct ip_set_req_version *req_version =
++ (struct ip_set_req_version *) data;
++ if (req_version->version != IP_SET_PROTOCOL_VERSION) {
++ res = -EPROTO;
++ goto done;
++ }
++ }
++
++ switch (*op) {
++ case IP_SET_OP_CREATE:{
++ struct ip_set_req_create *req_create
++ = (struct ip_set_req_create *) data;
++
++ if (len < sizeof(struct ip_set_req_create)) {
++ ip_set_printk("short CREATE data (want >=%zu, got %u)",
++ sizeof(struct ip_set_req_create), len);
++ res = -EINVAL;
++ goto done;
++ }
++ req_create->name[IP_SET_MAXNAMELEN - 1] = '\0';
++ req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0';
++ res = ip_set_create(req_create->name,
++ req_create->typename,
++ IP_SET_INVALID_ID,
++ data + sizeof(struct ip_set_req_create),
++ len - sizeof(struct ip_set_req_create));
++ goto done;
++ }
++ case IP_SET_OP_DESTROY:{
++ struct ip_set_req_std *req_destroy
++ = (struct ip_set_req_std *) data;
++
++ if (len != sizeof(struct ip_set_req_std)) {
++ ip_set_printk("invalid DESTROY data (want %zu, got %u)",
++ sizeof(struct ip_set_req_std), len);
++ res = -EINVAL;
++ goto done;
++ }
++ if (strcmp(req_destroy->name, IPSET_TOKEN_ALL) == 0) {
++ /* Destroy all sets */
++ index = IP_SET_INVALID_ID;
++ } else {
++ req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0';
++ index = ip_set_find_byname(req_destroy->name);
++
++ if (index == IP_SET_INVALID_ID) {
++ res = -ENOENT;
++ goto done;
++ }
++ }
++
++ res = ip_set_destroy(index);
++ goto done;
++ }
++ case IP_SET_OP_FLUSH:{
++ struct ip_set_req_std *req_flush =
++ (struct ip_set_req_std *) data;
++
++ if (len != sizeof(struct ip_set_req_std)) {
++ ip_set_printk("invalid FLUSH data (want %zu, got %u)",
++ sizeof(struct ip_set_req_std), len);
++ res = -EINVAL;
++ goto done;