restructure; cleanup
[openwrt/svn-archive/archive.git] / obsolete-buildroot / sources / openwrt / kernel / patches / 130-nfsswap.patch
1 diff -Nurb src/linux/linux.orig/Documentation/netswap.txt src/linux/linux/Documentation/netswap.txt
2 --- src/linux/linux.orig/Documentation/netswap.txt 1969-12-31 19:00:00.000000000 -0500
3 +++ src/linux/linux/Documentation/netswap.txt 2004-05-31 02:18:03.000000000 -0400
4 @@ -0,0 +1,51 @@
5 + Swapping over network
6 +
7 +Support for this is enabled via the CONFIG_NETSWAP option, which is
8 +automatically enabled when enabling swap files located on NFS volumes
9 +(CONFIG_SWAP_VIA_NFS).
10 +
11 +When swapping to files located on a network file system like NFS or
12 +CODA or others or to nbd (network block device, see `nbd.txt')
13 +partitions there is the problem that this requires additional memory,
14 +besides the page which is currently swapped in or out, probably at
15 +least two more pages for each page in question.
16 +
17 +This means that not only there needs to be free space left in the swap
18 +file or the swap partition, but in addition there must be enough free
19 +memory left in the system to perform the swap out of pages.
20 +
21 +This is particularly painful as receiving data over the network itself
22 +consumes memory, and this memory is allocated from an interrupt
23 +context (i.e. in the interrupt handler of the network card). That
24 +means that on a congested network there are chances that the machine
25 +runs out of memory, simply because the network device's interrupt
26 +routines allocate memory faster that it is freed by swapping via
27 +network.
28 +
29 +To cope with this problem, there is a new socket option `SO_SWAPPING'
30 +which has to be set on the `SOL_SOCKET' level with setsockopt() (see
31 +setsockopt(2)). When this option is set on any network socket, then
32 +the system will start to drop network packets it receives on any other
33 +socket when the number of free pages falls below a certain threshold.
34 +
35 +This threshold initially is 4 pages less than `freepages.min' (see
36 +`Documentation/sysctl/vm.txt') but can be tuned using the sysctl
37 +interface by writing to the file `/proc/sys/net/swapping/threshold'
38 +
39 +There are two other files:
40 +
41 +`/proc/sys/net/swapping/dropped':
42 + how many network packets have been dropped so far. This file is
43 + writable, writing to it simply sets the counter to the given value
44 + (useful for resetting the counter).
45 +
46 +`/proc/sys/net/swapping/sock_count':
47 + How many network sockets have the `SO_SWAPPING' option set (read
48 + only, of course).
49 +
50 +When using swap-files on NFS volumes, then the `SO_SWAPPING' option is
51 +set or cleared by swapon/swapoff system calls, so the user need not
52 +care about it.
53 +
54 +Swapping over the network is insecure unless the data would be
55 +encrypted, which is not the case with NFS. It is also very slow.
56 diff -Nurb src/linux/linux.orig/Documentation/nfsswap.txt src/linux/linux/Documentation/nfsswap.txt
57 --- src/linux/linux.orig/Documentation/nfsswap.txt 1969-12-31 19:00:00.000000000 -0500
58 +++ src/linux/linux/Documentation/nfsswap.txt 2004-05-31 02:18:03.000000000 -0400
59 @@ -0,0 +1,41 @@
60 + Swapping to files on NFS volumes
61 +
62 +To do this you have to say `Y' or `M' to the CONFIG_SWAP_VIA_NFS
63 +configuration option. When compling support for this as a module you
64 +should read `Documentation/modules.txt'. For auto-loading of the
65 +module during the `swapon' system call you have to place a line like
66 +
67 +alias swapfile-mod nfsswap
68 +
69 +in `/etc/modules.conf' (or `/etc/conf.modules', depending on your
70 +setup). NFS volumes holding swapfile should be mounted with `rsize'
71 +and `wsize' set to something less than the size of a page, otherwise
72 +deadlocks caused by memory fragmentation can happen, i.e. mount the
73 +volume which is to hold the swapfiles with
74 +
75 +mount -t nfs -o rsize=2048,wsize=2048 NFS_SERVER_IP:/server_volume /mount_point
76 +
77 +or set the option in `/etc/fstab'. Read `Documentation/nfsroot.txt' to
78 +learn how to set mount options for the root file system, if your swap
79 +files are to be located on the root file system.
80 +
81 +Setting the `rsize' and `wsize' to anything less than PAGE_SIZE is a
82 +performance hit, so you probably want to have at least two volumes
83 +mounted, one for the swapfiles, one for the rest.
84 +
85 +You may want to read `Documentation/netswap.txt' as well.
86 +
87 +Swapfiles on NFS volumes can be treated like any other swapfile,
88 +i.e.
89 +
90 +dd if=/dev/zero of=/swapfiles/SWAPFILE bs=1k count=20480
91 +mkswap /swapfiles/SWAPFILE
92 +swapon /swapfiles/SWAPFILE
93 +
94 +will create a 20M swapfile and tell the system to use it. Actually,
95 +one could use lseek(2) to create an empty swapfile. This is different
96 +from swapfiles located on local harddisk.
97 +
98 +Swapping over the network is insecure unless the data would be
99 +encrypted, which is not the case with NFS. It is also very slow.
100 +
101 diff -Nurb src/linux/linux.orig/drivers/block/blkpg.c src/linux/linux/drivers/block/blkpg.c
102 --- src/linux/linux.orig/drivers/block/blkpg.c 2003-07-04 04:11:31.000000000 -0400
103 +++ src/linux/linux/drivers/block/blkpg.c 2004-05-31 02:18:03.000000000 -0400
104 @@ -34,7 +34,7 @@
105 #include <linux/blk.h> /* for set_device_ro() */
106 #include <linux/blkpg.h>
107 #include <linux/genhd.h>
108 -#include <linux/swap.h> /* for is_swap_partition() */
109 +#include <linux/swap.h> /* for swap_run_test() */
110 #include <linux/module.h> /* for EXPORT_SYMBOL */
111
112 #include <asm/uaccess.h>
113 @@ -114,6 +114,29 @@
114 return 0;
115 }
116
117 +/* swap_run_test() applies this hook to all swapfiles until it returns
118 + * "1". If it never returns "1", the result of swap_run_test() is "0",
119 + * otherwise "1".
120 + */
121 +static int is_swap_partition_hook(unsigned int flags, struct file *swap_file,
122 + void *testdata)
123 +{
124 + kdev_t swap_dev = S_ISBLK(swap_file->f_dentry->d_inode->i_mode)
125 + ? swap_file->f_dentry->d_inode->i_rdev : 0;
126 + kdev_t dev = *((kdev_t *)testdata);
127 +
128 + if (flags & SWP_USED && dev == swap_dev) {
129 + return 1;
130 + } else {
131 + return 0;
132 + }
133 +}
134 +
135 +static inline int is_swap_partition(kdev_t dev)
136 +{
137 + return swap_run_test(is_swap_partition_hook, &dev);
138 +}
139 +
140 /*
141 * Delete a partition given by partition number
142 *
143 diff -Nurb src/linux/linux.orig/fs/Config.in src/linux/linux/fs/Config.in
144 --- src/linux/linux.orig/fs/Config.in 2004-05-31 02:02:43.000000000 -0400
145 +++ src/linux/linux/fs/Config.in 2004-05-31 02:18:03.000000000 -0400
146 @@ -4,6 +4,12 @@
147 mainmenu_option next_comment
148 comment 'File systems'
149
150 +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
151 + tristate 'Swapping to block devices' CONFIG_BLKDEV_SWAP
152 +else
153 + define_bool CONFIG_BLKDEV_SWAP y
154 +fi
155 +
156 bool 'Quota support' CONFIG_QUOTA
157 tristate 'Kernel automounter support' CONFIG_AUTOFS_FS
158 tristate 'Kernel automounter version 4 support (also supports v3)' CONFIG_AUTOFS4_FS
159 @@ -110,6 +116,12 @@
160 dep_tristate 'NFS file system support' CONFIG_NFS_FS $CONFIG_INET
161 dep_mbool ' Provide NFSv3 client support' CONFIG_NFS_V3 $CONFIG_NFS_FS
162 dep_bool ' Root file system on NFS' CONFIG_ROOT_NFS $CONFIG_NFS_FS $CONFIG_IP_PNP
163 + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
164 + dep_tristate ' Swapping via NFS (EXPERIMENTAL)' CONFIG_SWAP_VIA_NFS $CONFIG_NFS_FS
165 + if [ "$CONFIG_SWAP_VIA_NFS" = "y" -o "$CONFIG_SWAP_VIA_NFS" = "m" ]; then
166 + define_bool CONFIG_NETSWAP y
167 + fi
168 + fi
169
170 dep_tristate 'NFS server support' CONFIG_NFSD $CONFIG_INET
171 dep_mbool ' Provide NFSv3 server support' CONFIG_NFSD_V3 $CONFIG_NFSD
172 diff -Nurb src/linux/linux.orig/fs/Makefile src/linux/linux/fs/Makefile
173 --- src/linux/linux.orig/fs/Makefile 2004-05-31 02:02:42.000000000 -0400
174 +++ src/linux/linux/fs/Makefile 2004-05-31 02:18:03.000000000 -0400
175 @@ -8,7 +8,7 @@
176 O_TARGET := fs.o
177
178 export-objs := filesystems.o open.o dcache.o buffer.o
179 -mod-subdirs := nls
180 +mod-subdirs := nls nfs
181
182 obj-y := open.o read_write.o devices.o file_table.o buffer.o \
183 super.o block_dev.o char_dev.o stat.o exec.o pipe.o namei.o \
184 @@ -70,6 +70,7 @@
185 subdir-$(CONFIG_JFS_FS) += jfs
186 subdir-$(CONFIG_SQUASHFS) += squashfs
187
188 +obj-$(CONFIG_BLKDEV_SWAP) += blkdev_swap.o
189
190 obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
191 obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
192 diff -Nurb src/linux/linux.orig/fs/blkdev_swap.c src/linux/linux/fs/blkdev_swap.c
193 --- src/linux/linux.orig/fs/blkdev_swap.c 1969-12-31 19:00:00.000000000 -0500
194 +++ src/linux/linux/fs/blkdev_swap.c 2004-05-31 02:18:03.000000000 -0400
195 @@ -0,0 +1,309 @@
196 +/*
197 + * Swapping to partitions or files located on partitions.
198 + */
199 +
200 +#include <linux/config.h>
201 +#include <linux/module.h>
202 +#include <linux/init.h>
203 +#include <linux/slab.h>
204 +#include <linux/locks.h>
205 +#include <linux/blkdev.h>
206 +#include <linux/pagemap.h>
207 +#include <linux/swap.h>
208 +#include <linux/fs.h>
209 +
210 +#ifdef DEBUG_BLKDEV_SWAP
211 +# define dprintk(fmt...) printk(##fmt)
212 +#else
213 +# define dprintk(fmt...) do { /* */ } while (0)
214 +#endif
215 +
216 +#define BLKDEV_SWAP_ID "blkdev"
217 +#define BLKDEV_FILE_SWAP_ID "blkdev file"
218 +
219 +/*
220 + * Helper function, copied here from buffer.c
221 + */
222 +
223 +/*
224 + * Start I/O on a page.
225 + * This function expects the page to be locked and may return
226 + * before I/O is complete. You then have to check page->locked
227 + * and page->uptodate.
228 + *
229 + * brw_page() is SMP-safe, although it's being called with the
230 + * kernel lock held - but the code is ready.
231 + *
232 + * FIXME: we need a swapper_inode->get_block function to remove
233 + * some of the bmap kludges and interface ugliness here.
234 + */
235 +int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
236 +{
237 + struct buffer_head *head, *bh;
238 +
239 + if (!PageLocked(page))
240 + panic("brw_page: page not locked for I/O");
241 +
242 + if (!page->buffers)
243 + create_empty_buffers(page, dev, size);
244 + head = bh = page->buffers;
245 +
246 + /* Stage 1: lock all the buffers */
247 + do {
248 + lock_buffer(bh);
249 + bh->b_blocknr = *(b++);
250 + set_bit(BH_Mapped, &bh->b_state);
251 + set_buffer_async_io(bh);
252 + bh = bh->b_this_page;
253 + } while (bh != head);
254 +
255 + /* Stage 2: start the IO */
256 + do {
257 + struct buffer_head *next = bh->b_this_page;
258 + submit_bh(rw, bh);
259 + bh = next;
260 + } while (bh != head);
261 + return 0;
262 +}
263 +
264 +/*
265 + * We implement to methods: swapping to partitions, and swapping to files
266 + * located on partitions.
267 + */
268 +
269 +struct blkdev_swap_data {
270 + kdev_t dev;
271 +};
272 +
273 +struct test_data {
274 + struct file * filp;
275 + kdev_t dev;
276 +};
277 +
278 +static int is_blkdev_swapping(unsigned int flags,
279 + struct file * swapf,
280 + void *data)
281 +{
282 + struct test_data *testdata = (struct test_data *) data;
283 + struct file * filp = testdata->filp;
284 + kdev_t dev = testdata->dev;
285 +
286 + /* Only check filp's that don't match the one already opened
287 + * for us by sys_swapon(). Otherwise, we will always flag a
288 + * busy swap file.
289 + */
290 +
291 + if (swapf != filp) {
292 + if (dev == swapf->f_dentry->d_inode->i_rdev)
293 + return 1;
294 + }
295 + return 0;
296 +}
297 +
298 +static int blkdev_swap_open(struct file * filp, void **dptr)
299 +{
300 + int swapfilesize;
301 + kdev_t dev;
302 + struct blkdev_swap_data *data;
303 + int error;
304 + struct test_data testdata;
305 +
306 + MOD_INC_USE_COUNT;
307 +
308 + if (!S_ISBLK(filp->f_dentry->d_inode->i_mode)) {
309 + dprintk(__FUNCTION__": can't handle this swap file: %s\n",
310 + swapf->d_name.name);
311 + error = 0; /* not for us */
312 + goto bad_swap;
313 + }
314 +
315 + dev = filp->f_dentry->d_inode->i_rdev;
316 + set_blocksize(dev, PAGE_SIZE);
317 + error = -ENODEV;
318 + if (!dev ||
319 + (blk_size[MAJOR(dev)] && !blk_size[MAJOR(dev)][MINOR(dev)])) {
320 + printk("blkdev_swap_open: blkdev weirdness for %s\n",
321 + filp->f_dentry->d_name.name);
322 + goto bad_swap;
323 + }
324 +
325 + /* Check to make sure that we aren't already swapping. */
326 + error = -EBUSY;
327 + testdata.filp = filp;
328 + testdata.dev = dev;
329 + if (swap_run_test(is_blkdev_swapping, &testdata)) {
330 + printk("blkdev_swap_open: already swapping to %s\n",
331 + filp->f_dentry->d_name.name);
332 + goto bad_swap;
333 + }
334 +
335 + swapfilesize = 0;
336 + if (blk_size[MAJOR(dev)])
337 + swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
338 + >> (PAGE_SHIFT - 10);
339 +
340 + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
341 + printk("blkdev_swap_open: can't allocate data for %s\n",
342 + filp->f_dentry->d_name.name);
343 + error = -ENOMEM;
344 + goto bad_swap;
345 + }
346 + data->dev = dev;
347 + *dptr = data;
348 +
349 + dprintk("blkdev_swap_open: returning %d\n", swapfilesize);
350 + return swapfilesize;
351 +
352 + bad_swap:
353 + MOD_DEC_USE_COUNT;
354 + return error; /* this swap thing is not for us */
355 +}
356 +
357 +static int blkdev_swap_release(struct file * filp, void *data)
358 +{
359 + dprintk("blkdev_swap_release: releasing swap device %s\n",
360 + filp->f_dentry->d_name.name);
361 + kfree(data);
362 + MOD_DEC_USE_COUNT;
363 + return 0;
364 +}
365 +
366 +static int blkdev_rw_page(int rw, struct page *page, unsigned long offset,
367 + void *ptr)
368 +{
369 + struct blkdev_swap_data *data = (struct blkdev_swap_data *)ptr;
370 + brw_page(rw, page, data->dev, (int *)&offset, PAGE_SIZE);
371 + return 1;
372 +}
373 +
374 +static struct swap_ops blkdev_swap_ops = {
375 + blkdev_swap_open,
376 + blkdev_swap_release,
377 + blkdev_rw_page
378 +};
379 +
380 +struct blkdevfile_swap_data {
381 + struct inode *swapf;
382 +};
383 +
384 +static int is_blkdevfile_swapping(unsigned int flags,
385 + struct file * swapf,
386 + void * data)
387 +{
388 + struct file * filp = (struct file *) data;
389 +
390 + /* Only check filp's that don't match the one already opened
391 + * for us by sys_swapon(). Otherwise, we will always flag a
392 + * busy swap file.
393 + */
394 +
395 + if (swapf != filp) {
396 + if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
397 + return 1;
398 + }
399 + return 0;
400 +}
401 +
402 +static int blkdevfile_swap_open(struct file *swapf, void **dptr)
403 +{
404 + int error = 0;
405 + int swapfilesize;
406 + struct blkdevfile_swap_data *data;
407 +
408 + MOD_INC_USE_COUNT;
409 +
410 + /* first check whether this is a regular file located on a local
411 + * hard disk
412 + */
413 + if (!S_ISREG(swapf->f_dentry->d_inode->i_mode)) {
414 + dprintk("blkdevfile_swap_open: "
415 + "can't handle this swap file: %s\n",
416 + swapf->d_name.name);
417 + error = 0; /* not for us */
418 + goto bad_swap;
419 + }
420 + if (!swapf->f_dentry->d_inode->i_mapping->a_ops->bmap) {
421 + dprintk("blkdevfile_swap_open: no bmap for file: %s\n",
422 + swapf->d_name.name);
423 + error = 0; /* not for us */
424 + goto bad_swap;
425 + }
426 +
427 + if (swap_run_test(is_blkdevfile_swapping, swapf)) {
428 + dprintk("blkdevfile_swap_open: already swapping to %s\n",
429 + swapf->d_name.name);
430 + error = -EBUSY;
431 + goto bad_swap;
432 + }
433 + swapfilesize = swapf->f_dentry->d_inode->i_size >> PAGE_SHIFT;
434 + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
435 + error = -ENOMEM;
436 + goto bad_swap;
437 + }
438 + data->swapf = swapf->f_dentry->d_inode;
439 + *dptr = data;
440 + return swapfilesize;
441 +
442 + bad_swap:
443 + MOD_DEC_USE_COUNT;
444 + return error;
445 +}
446 +
447 +static int blkdevfile_swap_release(struct file *swapf, void *data)
448 +{
449 + kfree(data);
450 + MOD_DEC_USE_COUNT;
451 + return 0;
452 +}
453 +
454 +static int blkdevfile_rw_page(int rw, struct page *page, unsigned long offset,
455 + void *ptr)
456 +{
457 + struct blkdevfile_swap_data *data = (struct blkdevfile_swap_data *)ptr;
458 + struct inode * swapf = data->swapf;
459 + int i, j;
460 + unsigned int block = offset
461 + << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
462 + kdev_t dev = swapf->i_dev;
463 + int block_size;
464 + int zones[PAGE_SIZE/512];
465 + int zones_used;
466 +
467 + block_size = swapf->i_sb->s_blocksize;
468 + for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
469 + if (!(zones[i] = bmap(swapf,block++))) {
470 + printk("blkdevfile_rw_page: bad swap file\n");
471 + return 0;
472 + }
473 + zones_used = i;
474 +
475 + /* block_size == PAGE_SIZE/zones_used */
476 + brw_page(rw, page, dev, zones, block_size);
477 + return 1;
478 +}
479 +
480 +static struct swap_ops blkdevfile_swap_ops = {
481 + blkdevfile_swap_open,
482 + blkdevfile_swap_release,
483 + blkdevfile_rw_page
484 + };
485 +
486 +int __init blkdev_swap_init(void)
487 +{
488 + (void)register_swap_method(BLKDEV_SWAP_ID, &blkdev_swap_ops);
489 + (void)register_swap_method(BLKDEV_FILE_SWAP_ID, &blkdevfile_swap_ops);
490 + return 0;
491 +}
492 +
493 +void __exit blkdev_swap_exit(void)
494 +{
495 + unregister_swap_method(BLKDEV_SWAP_ID);
496 + unregister_swap_method(BLKDEV_FILE_SWAP_ID);
497 +}
498 +
499 +module_init(blkdev_swap_init)
500 +module_exit(blkdev_swap_exit)
501 +
502 +MODULE_LICENSE("GPL");
503 +MODULE_AUTHOR("Many. Stuffed into a module by cH (Claus-Justus Heine)");
504 +MODULE_DESCRIPTION("Swapping to partitions and files on local hard-disks");
505 diff -Nurb src/linux/linux.orig/fs/buffer.c src/linux/linux/fs/buffer.c
506 --- src/linux/linux.orig/fs/buffer.c 2003-07-04 04:12:05.000000000 -0400
507 +++ src/linux/linux/fs/buffer.c 2004-05-31 02:21:05.000000000 -0400
508 @@ -743,7 +743,7 @@
509 bh->b_private = private;
510 }
511
512 -static void end_buffer_io_async(struct buffer_head * bh, int uptodate)
513 +void end_buffer_io_async(struct buffer_head * bh, int uptodate)
514 {
515 static spinlock_t page_uptodate_lock = SPIN_LOCK_UNLOCKED;
516 unsigned long flags;
517 @@ -2344,35 +2344,6 @@
518 return err;
519 }
520
521 -int brw_page(int rw, struct page *page, kdev_t dev, int b[], int size)
522 -{
523 - struct buffer_head *head, *bh;
524 -
525 - if (!PageLocked(page))
526 - panic("brw_page: page not locked for I/O");
527 -
528 - if (!page->buffers)
529 - create_empty_buffers(page, dev, size);
530 - head = bh = page->buffers;
531 -
532 - /* Stage 1: lock all the buffers */
533 - do {
534 - lock_buffer(bh);
535 - bh->b_blocknr = *(b++);
536 - set_bit(BH_Mapped, &bh->b_state);
537 - set_buffer_async_io(bh);
538 - bh = bh->b_this_page;
539 - } while (bh != head);
540 -
541 - /* Stage 2: start the IO */
542 - do {
543 - struct buffer_head *next = bh->b_this_page;
544 - submit_bh(rw, bh);
545 - bh = next;
546 - } while (bh != head);
547 - return 0;
548 -}
549 -
550 int block_symlink(struct inode *inode, const char *symname, int len)
551 {
552 struct address_space *mapping = inode->i_mapping;
553 diff -Nurb src/linux/linux.orig/fs/nfs/Makefile src/linux/linux/fs/nfs/Makefile
554 --- src/linux/linux.orig/fs/nfs/Makefile 2003-07-04 04:12:07.000000000 -0400
555 +++ src/linux/linux/fs/nfs/Makefile 2004-05-31 02:18:03.000000000 -0400
556 @@ -15,6 +15,14 @@
557 obj-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o
558 obj-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
559
560 -obj-m := $(O_TARGET)
561 +obj-$(CONFIG_SWAP_VIA_NFS) += nfsswap.o
562 +ifeq ($(CONFIG_SWAP_VIA_NFS),m)
563 +export-objs := nfs_syms.o
564 +obj-y += nfs_syms.o
565 +endif
566 +
567 +ifeq ($(CONFIG_NFS_FS),m)
568 +obj-m += $(O_TARGET)
569 +endif
570
571 include $(TOPDIR)/Rules.make
572 diff -Nurb src/linux/linux.orig/fs/nfs/file.c src/linux/linux/fs/nfs/file.c
573 --- src/linux/linux.orig/fs/nfs/file.c 2003-07-04 04:12:07.000000000 -0400
574 +++ src/linux/linux/fs/nfs/file.c 2004-05-31 02:18:03.000000000 -0400
575 @@ -58,11 +58,6 @@
576 setattr: nfs_notify_change,
577 };
578
579 -/* Hack for future NFS swap support */
580 -#ifndef IS_SWAPFILE
581 -# define IS_SWAPFILE(inode) (0)
582 -#endif
583 -
584 /*
585 * Flush all dirty pages, and check for write errors.
586 *
587 @@ -217,8 +212,6 @@
588 inode->i_ino, (unsigned long) count, (unsigned long) *ppos);
589
590 result = -EBUSY;
591 - if (IS_SWAPFILE(inode))
592 - goto out_swapfile;
593 result = nfs_revalidate_inode(NFS_SERVER(inode), inode);
594 if (result)
595 goto out;
596 @@ -230,10 +223,6 @@
597 result = generic_file_write(file, buf, count, ppos);
598 out:
599 return result;
600 -
601 -out_swapfile:
602 - printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
603 - goto out;
604 }
605
606 /*
607 diff -Nurb src/linux/linux.orig/fs/nfs/nfs_syms.c src/linux/linux/fs/nfs/nfs_syms.c
608 --- src/linux/linux.orig/fs/nfs/nfs_syms.c 1969-12-31 19:00:00.000000000 -0500
609 +++ src/linux/linux/fs/nfs/nfs_syms.c 2004-05-31 02:18:03.000000000 -0400
610 @@ -0,0 +1,10 @@
611 +#include <linux/config.h>
612 +#define __NO_VERSION__
613 +#include <linux/module.h>
614 +#include <linux/types.h>
615 +#include <linux/sunrpc/clnt.h>
616 +#include <linux/nfs_fs.h>
617 +
618 +EXPORT_SYMBOL(__nfs_refresh_inode);
619 +EXPORT_SYMBOL(nfs_write_attributes);
620 +
621 diff -Nurb src/linux/linux.orig/fs/nfs/nfsswap.c src/linux/linux/fs/nfs/nfsswap.c
622 --- src/linux/linux.orig/fs/nfs/nfsswap.c 1969-12-31 19:00:00.000000000 -0500
623 +++ src/linux/linux/fs/nfs/nfsswap.c 2004-05-31 02:18:03.000000000 -0400
624 @@ -0,0 +1,350 @@
625 +/*
626 + * Swapping to files located on NFS mounted volumes
627 + * Copyright (c) 2000 Claus-Justus Heine
628 + *
629 + */
630 +
631 +#include <linux/config.h>
632 +#include <linux/module.h>
633 +#include <linux/init.h>
634 +#include <linux/types.h>
635 +#include <linux/slab.h>
636 +#include <linux/swap.h>
637 +#include <linux/pagemap.h>
638 +#include <linux/file.h>
639 +#include <linux/fs.h>
640 +#include <linux/socket.h>
641 +#include <linux/smp_lock.h>
642 +#include <net/netswapping.h>
643 +#include <net/sock.h>
644 +
645 +#include <linux/sunrpc/clnt.h>
646 +#include <linux/nfs_fs.h>
647 +#include <linux/nfs_fs_sb.h>
648 +#include <asm/uaccess.h>
649 +
650 +#define NFSDBG_FACILITY NFSDBG_SWAP
651 +
652 +#define NFS_SWAP_ID "nfs file"
653 +
654 +/* we cache some values here. In principle, we only need the file.
655 + */
656 +struct nfs_swap_data {
657 + struct file *file;
658 + struct inode *inode;
659 + struct nfs_server *server;
660 + struct socket *socket;
661 +};
662 +
663 +/* Nearly a clone of nfs_readpage_sync() in read.c, but "struct page" does not
664 + * contain information about the file offset when swapping. So.
665 + */
666 +static int nfs_read_swap_page(struct page *page,
667 + struct nfs_server *server,
668 + struct inode *inode,
669 + struct file *file)
670 +{
671 + unsigned int rsize = server->rsize;
672 + unsigned int count = PAGE_SIZE;
673 + unsigned int offset = 0; /* always at start of page */
674 + int result, eof;
675 + struct rpc_cred *cred;
676 + struct nfs_fattr fattr;
677 +
678 + cred = nfs_file_cred(file);
679 +
680 + do {
681 + if (count < rsize)
682 + rsize = count;
683 +
684 + lock_kernel();
685 + result = NFS_PROTO(inode)->read(inode, cred,
686 + &fattr,
687 + NFS_RPC_SWAPFLAGS,
688 + offset, rsize, page, &eof);
689 + nfs_refresh_inode(inode, &fattr);
690 + unlock_kernel();
691 +
692 + /*
693 + * Even if we had a partial success we can't mark the page
694 + * cache valid.
695 + */
696 + if (result < 0) {
697 + if (result == -EISDIR)
698 + result = -EINVAL;
699 + goto io_error;
700 + }
701 + count -= result;
702 + offset += result;
703 + if (result < rsize) /* NFSv2ism */
704 + break;
705 + } while (count);
706 +
707 + if (count) {
708 + char *kaddr = kmap(page);
709 + memset(kaddr + offset, 0, count);
710 + kunmap(page);
711 + }
712 + flush_dcache_page(page);
713 + result = 0;
714 +
715 +io_error:
716 + return result;
717 +}
718 +
719 +/* Like nfs_writepage_sync(), but when swapping page->index does not encode
720 + * the offset in the swap file alone.
721 + *
722 + */
723 +static int nfs_write_swap_page(struct page *page,
724 + struct nfs_server *server,
725 + struct inode *inode,
726 + struct file *file)
727 +{
728 + struct rpc_cred *cred;
729 + unsigned int wsize = server->wsize;
730 + unsigned int count = PAGE_SIZE;
731 + unsigned int offset = 0;
732 + int result;
733 + struct nfs_writeverf verf;
734 + struct nfs_fattr fattr;
735 +
736 + cred = nfs_file_cred(file);
737 +
738 + do {
739 + if (count < wsize)
740 + wsize = count;
741 +
742 + lock_kernel();
743 + result = NFS_PROTO(inode)->write(inode, cred, &fattr,
744 + NFS_RW_SWAP|NFS_RW_SYNC,
745 + offset, wsize, page, &verf);
746 + nfs_write_attributes(inode, &fattr);
747 + unlock_kernel();
748 +
749 + if (result < 0) {
750 + goto io_error;
751 + }
752 + if (result != wsize)
753 + printk("NFS: short write, wsize=%u, result=%d\n",
754 + wsize, result);
755 + offset += wsize;
756 + count -= wsize;
757 + /*
758 + * If we've extended the file, update the inode
759 + * now so we don't invalidate the cache.
760 + */
761 + if (offset > inode->i_size)
762 + inode->i_size = offset;
763 + } while (count);
764 +
765 + result = 0;
766 +
767 +io_error:
768 +
769 + return result;
770 +}
771 +
772 +/* Unluckily (for us) form 2.4.19 -> 2.4.20 the nfs-proc's where
773 + * changed and expect now a proper file-mapping page, where index
774 + * encodes the offset alone.
775 + *
776 + * What we do: we save the original value of page->index, initialize
777 + * page->index to what the NFS/sun-rpc subsystem expects and restore
778 + * the index later.
779 + */
780 +static int nfs_rw_swap_page(int rw, struct page *page,
781 + unsigned long offset, void *dptr)
782 +{
783 + int error;
784 + struct nfs_swap_data *data = dptr;
785 + unsigned long alloc_flag = current->flags & PF_MEMALLOC;
786 + unsigned long page_index;
787 +
788 + if (!PageLocked(page))
789 + panic("nfs_rw_swap_page: page not locked for I/O");
790 +
791 + /* prevent memory deadlocks */
792 + if (!(current->flags & PF_MEMALLOC)) {
793 + dprintk("nfs_rw_swap_page: Setting PF_MEMALLOC\n");
794 + }
795 + current->flags |= PF_MEMALLOC;
796 +
797 + /* now tweak the page->index field ... */
798 + page_index = page->index;
799 + page->index = ((loff_t)offset*(loff_t)PAGE_SIZE) >> PAGE_CACHE_SHIFT;
800 +
801 + if (rw == WRITE) {
802 + error = nfs_write_swap_page(page,
803 + data->server,
804 + data->inode,
805 + data->file);
806 + } else {
807 + error = nfs_read_swap_page(page,
808 + data->server,
809 + data->inode,
810 + data->file);
811 + }
812 +
813 + if (!alloc_flag) {
814 + current->flags &= ~PF_MEMALLOC;
815 + }
816 +
817 + /* now restore the page->index field ... */
818 + page->index = page_index;
819 +
820 + if (error) {
821 + /* Must mark the page invalid after I/O error */
822 + SetPageError(page);
823 + ClearPageUptodate(page);
824 + } else {
825 + ClearPageError(page);
826 + SetPageUptodate(page);
827 + }
828 +
829 + if (!error) { /* in case of an error rw_swap_page() likes to unlock
830 + * itself.
831 + */
832 + UnlockPage(page);
833 + }
834 +
835 + return error < 0 ? 0 : 1;
836 +}
837 +
838 +static int is_nfsfile_swapping(unsigned int flags,
839 + struct file * swapf,
840 + void * data)
841 +{
842 + struct file * filp = (struct file *) data;
843 +
844 + /* Only check filp's that don't match the one already opened
845 + * for us by sys_swapon(). Otherwise, we will always flag a
846 + * busy swap file.
847 + */
848 +
849 + if (swapf != filp) {
850 + if (filp->f_dentry->d_inode == swapf->f_dentry->d_inode)
851 + return 1;
852 + }
853 + return 0;
854 +}
855 +
856 +static int nfs_swap_open(struct file *swapf, void **dptr)
857 +{
858 + int error = 0;
859 + int swapfilesize;
860 + struct nfs_swap_data *data;
861 + int on = 1;
862 + mm_segment_t fs;
863 + struct inode *inode = swapf->f_dentry->d_inode;
864 +
865 + MOD_INC_USE_COUNT;
866 +
867 + if (!S_ISREG(inode->i_mode)) {
868 + dprintk("nfs_swap_open: can't handle this swap file: %s\n",
869 + swapf->f_dentry->d_name.name);
870 + error = 0; /* not for us */
871 + goto bad_swap;
872 + }
873 + /* determine whether this file really is located on an NFS mounted
874 + * volume
875 + */
876 + if (!inode->i_sb || inode->i_sb->s_magic != NFS_SUPER_MAGIC) {
877 + dprintk("nfs_swap_open: %s is not an NFS file.\n",
878 + swapf->f_dentry->d_name.name);
879 + error = 0; /* not for us */
880 + goto bad_swap;
881 + }
882 +
883 + if (swap_run_test(is_nfsfile_swapping, swapf)) {
884 + dprintk("nfs_swap_open: already swapping to %s\n",
885 + swapf->f_dentry->d_name.name);
886 + error = -EBUSY;
887 + goto bad_swap;
888 + }
889 + swapfilesize = inode->i_size >> PAGE_SHIFT;
890 + if ((data = kmalloc(sizeof(*data), GFP_KERNEL)) == NULL) {
891 + error = -ENOMEM;
892 + goto bad_swap;
893 + }
894 + data->file = swapf;
895 + data->inode = inode;
896 + data->server = NFS_SERVER(inode);
897 + data->socket = data->server->client->cl_xprt->sock;
898 +
899 + /* set socket option SO_SWAPPING */
900 + fs = get_fs();
901 + set_fs(KERNEL_DS);
902 + error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
903 + (char *)&on, sizeof(on));
904 + set_fs(fs);
905 + if (error) {
906 + dprintk("nfs_swap_open: error setting SO_SWAPPING\n");
907 + goto bad_swap_2;
908 + }
909 +
910 + *dptr = data;
911 + return swapfilesize;
912 +
913 + bad_swap_2:
914 + kfree(data);
915 + bad_swap:
916 + MOD_DEC_USE_COUNT;
917 + return error;
918 +}
919 +
920 +static int nfs_swap_release(struct file *swapf, void *dptr)
921 +{
922 + struct nfs_swap_data *data = (struct nfs_swap_data *)dptr;
923 + int off = 0;
924 + mm_segment_t fs;
925 + int error;
926 +
927 +#if 1
928 + if (swapf != data->file ||
929 + swapf->f_dentry->d_inode != data->inode ||
930 + !swapf->f_dentry->d_inode->i_sb ||
931 + swapf->f_dentry->d_inode->i_sb->s_magic != NFS_SUPER_MAGIC ||
932 + NFS_SERVER(swapf->f_dentry->d_inode) != data->server ||
933 + data->socket != data->server->client->cl_xprt->sock) {
934 + panic("nfs_swap_release: nfs swap data messed up");
935 + }
936 +#endif
937 +
938 + /* remove socket option SO_SWAPPING */
939 + fs = get_fs();
940 + set_fs(KERNEL_DS);
941 + error = sock_setsockopt(data->socket, SOL_SOCKET, SO_SWAPPING,
942 + (char *)&off, sizeof(off));
943 + set_fs(fs);
944 + if (error) {
945 + dprintk("nfs_swap_open: error clearing SO_SWAPPING\n");
946 + }
947 + kfree(data);
948 + MOD_DEC_USE_COUNT;
949 + return error;
950 +}
951 +
952 +static struct swap_ops nfs_swap_ops = {
953 + open: nfs_swap_open,
954 + release: nfs_swap_release,
955 + rw_page: nfs_rw_swap_page
956 +};
957 +
958 +int __init nfs_swap_init(void)
959 +{
960 + (void)register_swap_method(NFS_SWAP_ID, &nfs_swap_ops);
961 + return 0;
962 +}
963 +
964 +void __exit nfs_swap_exit(void)
965 +{
966 + unregister_swap_method(NFS_SWAP_ID);
967 +}
968 +
969 +module_init(nfs_swap_init)
970 +module_exit(nfs_swap_exit)
971 +
972 +MODULE_LICENSE("GPL");
973 +MODULE_AUTHOR("(c) 1996-2002 cH (Claus-Justus Heine)");
974 +MODULE_DESCRIPTION("Swapping to files located on volumes mounted via NFS");
975 diff -Nurb src/linux/linux.orig/fs/nfs/read.c src/linux/linux/fs/nfs/read.c
976 --- src/linux/linux.orig/fs/nfs/read.c 2003-07-04 04:12:08.000000000 -0400
977 +++ src/linux/linux/fs/nfs/read.c 2004-05-31 02:18:03.000000000 -0400
978 @@ -50,11 +50,6 @@
979 */
980 static void nfs_readpage_result(struct rpc_task *task);
981
982 -/* Hack for future NFS swap support */
983 -#ifndef IS_SWAPFILE
984 -# define IS_SWAPFILE(inode) (0)
985 -#endif
986 -
987 static kmem_cache_t *nfs_rdata_cachep;
988
989 static __inline__ struct nfs_read_data *nfs_readdata_alloc(void)
990 @@ -92,7 +87,6 @@
991 int rsize = NFS_SERVER(inode)->rsize;
992 int result;
993 int count = PAGE_CACHE_SIZE;
994 - int flags = IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0;
995 int eof;
996
997 dprintk("NFS: nfs_readpage_sync(%p)\n", page);
998 @@ -114,7 +108,7 @@
999 offset, rsize, page);
1000
1001 lock_kernel();
1002 - result = NFS_PROTO(inode)->read(inode, cred, &fattr, flags,
1003 + result = NFS_PROTO(inode)->read(inode, cred, &fattr, 0,
1004 offset, rsize, page, &eof);
1005 nfs_refresh_inode(inode, &fattr);
1006 unlock_kernel();
1007 @@ -246,7 +240,7 @@
1008 task = &data->task;
1009
1010 /* N.B. Do we need to test? Never called for swapfile inode */
1011 - flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
1012 + flags = RPC_TASK_ASYNC;
1013
1014 nfs_read_rpcsetup(head, data);
1015
1016 @@ -476,8 +470,6 @@
1017 }
1018
1019 error = nfs_readpage_sync(file, inode, page);
1020 - if (error < 0 && IS_SWAPFILE(inode))
1021 - printk("Aiee.. nfs swap-in of page failed!\n");
1022 out:
1023 return error;
1024
1025 diff -Nurb src/linux/linux.orig/fs/nfs/write.c src/linux/linux/fs/nfs/write.c
1026 --- src/linux/linux.orig/fs/nfs/write.c 2003-07-04 04:12:08.000000000 -0400
1027 +++ src/linux/linux/fs/nfs/write.c 2004-05-31 02:20:47.000000000 -0400
1028 @@ -3,7 +3,6 @@
1029 #include <linux/config.h>
1030 #include <linux/types.h>
1031 #include <linux/slab.h>
1032 -#include <linux/swap.h>
1033 #include <linux/pagemap.h>
1034 #include <linux/file.h>
1035
1036 @@ -46,11 +45,6 @@
1037 static void nfs_commit_done(struct rpc_task *);
1038 #endif
1039
1040 -/* Hack for future NFS swap support */
1041 -#ifndef IS_SWAPFILE
1042 -# define IS_SWAPFILE(inode) (0)
1043 -#endif
1044 -
1045 static kmem_cache_t *nfs_wdata_cachep;
1046
1047 static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
1048 @@ -82,7 +76,7 @@
1049 * For the moment, we just call nfs_refresh_inode().
1050 */
1051 static __inline__ int
1052 -nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
1053 +__nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
1054 {
1055 if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) {
1056 fattr->pre_size = NFS_CACHE_ISIZE(inode);
1057 @@ -93,6 +87,11 @@
1058 return nfs_refresh_inode(inode, fattr);
1059 }
1060
1061 +int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
1062 +{
1063 + return __nfs_write_attributes(inode, fattr);
1064 +}
1065 +
1066 /*
1067 * Write a page synchronously.
1068 * Offset is the data offset within the page.
1069 @@ -104,8 +103,7 @@
1070 struct rpc_cred *cred = NULL;
1071 loff_t base;
1072 unsigned int wsize = NFS_SERVER(inode)->wsize;
1073 - int result, refresh = 0, written = 0, flags;
1074 - u8 *buffer;
1075 + int result, refresh = 0, written = 0;
1076 struct nfs_fattr fattr;
1077 struct nfs_writeverf verf;
1078
1079 @@ -121,15 +119,14 @@
1080
1081 base = page_offset(page) + offset;
1082
1083 - flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
1084 -
1085 do {
1086 - if (count < wsize && !IS_SWAPFILE(inode))
1087 + if (count < wsize)
1088 wsize = count;
1089
1090 - result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
1091 + result = NFS_PROTO(inode)->write(inode, cred, &fattr,
1092 + NFS_RW_SYNC,
1093 offset, wsize, page, &verf);
1094 - nfs_write_attributes(inode, &fattr);
1095 + __nfs_write_attributes(inode, &fattr);
1096
1097 if (result < 0) {
1098 /* Must mark the page invalid after I/O error */
1099 @@ -140,7 +137,6 @@
1100 printk("NFS: short write, wsize=%u, result=%d\n",
1101 wsize, result);
1102 refresh = 1;
1103 - buffer += wsize;
1104 base += wsize;
1105 offset += wsize;
1106 written += wsize;
1107 @@ -979,7 +975,7 @@
1108 }
1109 #endif
1110
1111 - nfs_write_attributes(inode, resp->fattr);
1112 + __nfs_write_attributes(inode, resp->fattr);
1113 while (!list_empty(&data->pages)) {
1114 req = nfs_list_entry(data->pages.next);
1115 nfs_list_remove_request(req);
1116 @@ -1133,7 +1129,7 @@
1117 if (nfs_async_handle_jukebox(task))
1118 return;
1119
1120 - nfs_write_attributes(inode, resp->fattr);
1121 + __nfs_write_attributes(inode, resp->fattr);
1122 while (!list_empty(&data->pages)) {
1123 req = nfs_list_entry(data->pages.next);
1124 nfs_list_remove_request(req);
1125 diff -Nurb src/linux/linux.orig/include/linux/fs.h src/linux/linux/include/linux/fs.h
1126 --- src/linux/linux.orig/include/linux/fs.h 2004-05-31 02:06:19.000000000 -0400
1127 +++ src/linux/linux/include/linux/fs.h 2004-05-31 02:18:03.000000000 -0400
1128 @@ -1500,6 +1500,10 @@
1129 extern int inode_change_ok(struct inode *, struct iattr *);
1130 extern int inode_setattr(struct inode *, struct iattr *);
1131
1132 +/* for swapping to block devices */
1133 +void create_empty_buffers(struct page *page, kdev_t dev, unsigned long blocksize);
1134 +void end_buffer_io_async(struct buffer_head * bh, int uptodate);
1135 +
1136 /*
1137 * Common dentry functions for inclusion in the VFS
1138 * or in other stackable file systems. Some of these
1139 diff -Nurb src/linux/linux.orig/include/linux/nfs_fs.h src/linux/linux/include/linux/nfs_fs.h
1140 --- src/linux/linux.orig/include/linux/nfs_fs.h 2004-05-31 02:06:28.000000000 -0400
1141 +++ src/linux/linux/include/linux/nfs_fs.h 2004-05-31 02:18:03.000000000 -0400
1142 @@ -40,8 +40,8 @@
1143 */
1144 #define NFS_MAX_DIRCACHE 16
1145
1146 -#define NFS_MAX_FILE_IO_BUFFER_SIZE 32768
1147 -#define NFS_DEF_FILE_IO_BUFFER_SIZE 4096
1148 +#define NFS_MAX_FILE_IO_BUFFER_SIZE (8*PAGE_SIZE)
1149 +#define NFS_DEF_FILE_IO_BUFFER_SIZE PAGE_SIZE
1150
1151 /*
1152 * The upper limit on timeouts for the exponential backoff algorithm.
1153 @@ -205,6 +205,8 @@
1154 extern int nfs_writepage(struct page *);
1155 extern int nfs_flush_incompatible(struct file *file, struct page *page);
1156 extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
1157 +extern int nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr);
1158 +
1159 /*
1160 * Try to write back everything synchronously (but check the
1161 * return value!)
1162 @@ -375,6 +377,7 @@
1163 #define NFSDBG_XDR 0x0020
1164 #define NFSDBG_FILE 0x0040
1165 #define NFSDBG_ROOT 0x0080
1166 +#define NFSDBG_SWAP 0x0100
1167 #define NFSDBG_ALL 0xFFFF
1168
1169 #ifdef __KERNEL__
1170 diff -Nurb src/linux/linux.orig/include/linux/slab.h src/linux/linux/include/linux/slab.h
1171 --- src/linux/linux.orig/include/linux/slab.h 2004-05-31 02:06:19.000000000 -0400
1172 +++ src/linux/linux/include/linux/slab.h 2004-05-31 02:18:03.000000000 -0400
1173 @@ -39,6 +39,7 @@
1174 #define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */
1175 #define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */
1176 #define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */
1177 +#define SLAB_LOW_GFP_ORDER 0x00010000UL /* use as low a gfp order as possible */
1178
1179 /* flags passed to a constructor func */
1180 #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* if not set, then deconstructor */
1181 diff -Nurb src/linux/linux.orig/include/linux/swap.h src/linux/linux/include/linux/swap.h
1182 --- src/linux/linux.orig/include/linux/swap.h 2004-05-31 02:06:19.000000000 -0400
1183 +++ src/linux/linux/include/linux/swap.h 2004-05-31 02:18:03.000000000 -0400
1184 @@ -58,15 +58,29 @@
1185 #define SWAP_MAP_MAX 0x7fff
1186 #define SWAP_MAP_BAD 0x8000
1187
1188 +struct swap_ops {
1189 + int (*open)(struct file *swapf, void **data);
1190 + int (*release)(struct file *swapf, void *data);
1191 + int (*rw_page)(int rw,
1192 + struct page *page, unsigned long offset, void *data);
1193 +};
1194 +
1195 +struct swap_method {
1196 + struct swap_method *next;
1197 + char * name;
1198 + struct swap_ops *ops;
1199 + int use_count;
1200 +};
1201 +
1202 /*
1203 * The in-memory structure used to track swap areas.
1204 */
1205 struct swap_info_struct {
1206 unsigned int flags;
1207 - kdev_t swap_device;
1208 + struct file *swap_file;
1209 + struct swap_method *method;
1210 + void *data;
1211 spinlock_t sdev_lock;
1212 - struct dentry * swap_file;
1213 - struct vfsmount *swap_vfsmnt;
1214 unsigned short * swap_map;
1215 unsigned int lowest_bit;
1216 unsigned int highest_bit;
1217 @@ -141,11 +155,15 @@
1218 extern int total_swap_pages;
1219 extern unsigned int nr_swapfiles;
1220 extern struct swap_info_struct swap_info[];
1221 -extern int is_swap_partition(kdev_t);
1222 +extern int register_swap_method(char *name, struct swap_ops *ops);
1223 +extern int unregister_swap_method(char *name);
1224 +extern int swap_run_test(int (*test_fct)(unsigned int flags,
1225 + struct file *swap_file,
1226 + void *testdata), void *testdata);
1227 extern void si_swapinfo(struct sysinfo *);
1228 extern swp_entry_t get_swap_page(void);
1229 -extern void get_swaphandle_info(swp_entry_t, unsigned long *, kdev_t *,
1230 - struct inode **);
1231 +struct swap_method *get_swaphandle_info(swp_entry_t entry,
1232 + unsigned long *offset, void **data);
1233 extern int swap_duplicate(swp_entry_t);
1234 extern int swap_count(struct page *);
1235 extern int valid_swaphandles(swp_entry_t, unsigned long *);
1236 diff -Nurb src/linux/linux.orig/include/net/netswapping.h src/linux/linux/include/net/netswapping.h
1237 --- src/linux/linux.orig/include/net/netswapping.h 1969-12-31 19:00:00.000000000 -0500
1238 +++ src/linux/linux/include/net/netswapping.h 2004-05-31 02:18:03.000000000 -0400
1239 @@ -0,0 +1,47 @@
1240 +#ifndef _LINUX_NETSWAPPING_H
1241 +#define _LINUX_NETSWAPPING_H
1242 +
1243 +#include <linux/swap.h>
1244 +#include <linux/init.h>
1245 +
1246 +/* It is a mess. Socket options are defined in asm-ARCH/socket.h */
1247 +
1248 +#define SO_SWAPPING 0x00100000 /* hopefully not used by anybody else */
1249 +
1250 +#ifdef __KERNEL__
1251 +
1252 +#define CTL_NETSWAP 0x00100000
1253 +
1254 +enum {
1255 + NET_SWAP_DROPPED = 1,
1256 + NET_SWAP_DROP_THRESHOLD = 2,
1257 + NET_SWAP_SOCK_COUNT = 3
1258 +};
1259 +
1260 +extern unsigned int netswap_free_pages_min;
1261 +extern int netswap_sock_count;
1262 +extern unsigned int netswap_dropped;
1263 +
1264 +/* this is "#defined" and not inline because sock.h includes us, but we need
1265 + * the "struct sock" definition.
1266 + */
1267 +#define netswap_low_memory(sk, skb) \
1268 +({ \
1269 + int _ret = 0; \
1270 + \
1271 + if (netswap_sock_count > 0 && /* anybody swapping via network? */ \
1272 + !(sk)->swapping && /* but we are not needed for swapping */ \
1273 + nr_free_pages() < netswap_free_pages_min) { /* so drop us */ \
1274 + printk("netswap_low_memory: " \
1275 + "dropping skb 0x%p@0x%p\n", skb, sk); \
1276 + netswap_dropped ++; \
1277 + _ret = 1; \
1278 + } \
1279 + _ret; \
1280 +})
1281 +
1282 +extern int __init netswap_init(void);
1283 +
1284 +#endif
1285 +
1286 +#endif
1287 diff -Nurb src/linux/linux.orig/include/net/sock.h src/linux/linux/include/net/sock.h
1288 --- src/linux/linux.orig/include/net/sock.h 2004-05-31 02:07:17.000000000 -0400
1289 +++ src/linux/linux/include/net/sock.h 2004-05-31 02:18:03.000000000 -0400
1290 @@ -103,6 +103,10 @@
1291 #include <linux/filter.h>
1292 #endif
1293
1294 +#ifdef CONFIG_NETSWAP
1295 +#include <net/netswapping.h>
1296 +#endif
1297 +
1298 #include <asm/atomic.h>
1299 #include <net/dst.h>
1300
1301 @@ -536,6 +540,12 @@
1302 no_check,
1303 broadcast,
1304 bsdism;
1305 +#ifdef CONFIG_NETSWAP
1306 + /* Increased by SO_SWAPPING with arg != 0, decreased by
1307 + * SO_SWAPPING with arg 0
1308 + */
1309 + int swapping;
1310 +#endif
1311 unsigned char debug;
1312 unsigned char rcvtstamp;
1313 unsigned char use_write_queue;
1314 @@ -1165,6 +1175,11 @@
1315 return err; /* Toss packet */
1316 }
1317 #endif /* CONFIG_FILTER */
1318 +#ifdef CONFIG_NETSWAP
1319 + /* an inline function defined in net/netswapping.h */
1320 + if (netswap_low_memory(sk, skb))
1321 + return -ENOMEM;
1322 +#endif /* CONFIG_NETSWAP */
1323
1324 skb->dev = NULL;
1325 skb_set_owner_r(skb, sk);
1326 diff -Nurb src/linux/linux.orig/kernel/ksyms.c src/linux/linux/kernel/ksyms.c
1327 --- src/linux/linux.orig/kernel/ksyms.c 2004-05-31 02:02:43.000000000 -0400
1328 +++ src/linux/linux/kernel/ksyms.c 2004-05-31 02:18:03.000000000 -0400
1329 @@ -41,6 +41,7 @@
1330 #include <linux/mm.h>
1331 #include <linux/capability.h>
1332 #include <linux/highuid.h>
1333 +#include <linux/swapctl.h>
1334 #include <linux/brlock.h>
1335 #include <linux/fs.h>
1336 #include <linux/tty.h>
1337 @@ -127,6 +128,11 @@
1338 EXPORT_SYMBOL(kmap_prot);
1339 EXPORT_SYMBOL(kmap_pte);
1340 #endif
1341 +EXPORT_SYMBOL(nr_free_pages);
1342 +/* EXPORT_SYMBOL(freepages); */
1343 +EXPORT_SYMBOL(register_swap_method);
1344 +EXPORT_SYMBOL(unregister_swap_method);
1345 +EXPORT_SYMBOL(swap_run_test);
1346
1347 /* filesystem internal functions */
1348 EXPORT_SYMBOL(def_blk_fops);
1349 @@ -531,7 +537,7 @@
1350 EXPORT_SYMBOL(make_bad_inode);
1351 EXPORT_SYMBOL(is_bad_inode);
1352 EXPORT_SYMBOL(event);
1353 -EXPORT_SYMBOL(brw_page);
1354 +EXPORT_SYMBOL(end_buffer_io_async);
1355 EXPORT_SYMBOL(__inode_dir_notify);
1356
1357 #ifdef CONFIG_UID16
1358 diff -Nurb src/linux/linux.orig/mm/page_io.c src/linux/linux/mm/page_io.c
1359 --- src/linux/linux.orig/mm/page_io.c 2003-07-04 04:12:29.000000000 -0400
1360 +++ src/linux/linux/mm/page_io.c 2004-05-31 02:18:03.000000000 -0400
1361 @@ -36,11 +36,8 @@
1362 static int rw_swap_page_base(int rw, swp_entry_t entry, struct page *page)
1363 {
1364 unsigned long offset;
1365 - int zones[PAGE_SIZE/512];
1366 - int zones_used;
1367 - kdev_t dev = 0;
1368 - int block_size;
1369 - struct inode *swapf = 0;
1370 + struct swap_method *method;
1371 + void *data;
1372
1373 if (rw == READ) {
1374 ClearPageUptodate(page);
1375 @@ -48,30 +45,11 @@
1376 } else
1377 kstat.pswpout++;
1378
1379 - get_swaphandle_info(entry, &offset, &dev, &swapf);
1380 - if (dev) {
1381 - zones[0] = offset;
1382 - zones_used = 1;
1383 - block_size = PAGE_SIZE;
1384 - } else if (swapf) {
1385 - int i, j;
1386 - unsigned int block = offset
1387 - << (PAGE_SHIFT - swapf->i_sb->s_blocksize_bits);
1388 -
1389 - block_size = swapf->i_sb->s_blocksize;
1390 - for (i=0, j=0; j< PAGE_SIZE ; i++, j += block_size)
1391 - if (!(zones[i] = bmap(swapf,block++))) {
1392 - printk("rw_swap_page: bad swap file\n");
1393 - return 0;
1394 - }
1395 - zones_used = i;
1396 - dev = swapf->i_dev;
1397 - } else {
1398 + method = get_swaphandle_info(entry, &offset, &data);
1399 + if (!method || !method->ops->rw_page(rw, page, offset, data)) {
1400 return 0;
1401 }
1402
1403 - /* block_size == PAGE_SIZE/zones_used */
1404 - brw_page(rw, page, dev, zones, block_size);
1405 return 1;
1406 }
1407
1408 diff -Nurb src/linux/linux.orig/mm/slab.c src/linux/linux/mm/slab.c
1409 --- src/linux/linux.orig/mm/slab.c 2003-07-04 04:12:29.000000000 -0400
1410 +++ src/linux/linux/mm/slab.c 2004-05-31 02:18:03.000000000 -0400
1411 @@ -111,10 +111,12 @@
1412 # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
1413 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
1414 SLAB_NO_REAP | SLAB_CACHE_DMA | \
1415 - SLAB_MUST_HWCACHE_ALIGN)
1416 + SLAB_MUST_HWCACHE_ALIGN | \
1417 + SLAB_LOW_GFP_ORDER)
1418 #else
1419 # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
1420 - SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN)
1421 + SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
1422 + SLAB_LOW_GFP_ORDER)
1423 #endif
1424
1425 /*
1426 @@ -247,8 +249,13 @@
1427 };
1428
1429 /* internal c_flags */
1430 -#define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
1431 -#define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
1432 +#define CFLGS_OFF_SLAB 0x020000UL /* slab management in own cache */
1433 +#define CFLGS_OPTIMIZE 0x040000UL /* optimized slab lookup */
1434 +#define CFLGS_MASK (CFLGS_OFF_SLAB | CFLGS_OPTIMIZE)
1435 +
1436 +#if (CFLGS_MASK & CREATE_MASK)
1437 +# error BUG: internal and external SLAB flags overlap
1438 +#endif
1439
1440 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
1441 #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
1442 @@ -452,7 +459,12 @@
1443 snprintf(name, sizeof(name), "size-%Zd",sizes->cs_size);
1444 if (!(sizes->cs_cachep =
1445 kmem_cache_create(name, sizes->cs_size,
1446 - 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
1447 + 0,
1448 +#if CONFIG_NETSWAP
1449 + SLAB_LOW_GFP_ORDER| /* sorry */
1450 +#endif
1451 + SLAB_HWCACHE_ALIGN,
1452 + NULL, NULL))) {
1453 BUG();
1454 }
1455
1456 @@ -731,6 +743,8 @@
1457 break;
1458 if (!cachep->num)
1459 goto next;
1460 + if (cachep->gfporder == 0 && (flags & SLAB_LOW_GFP_ORDER))
1461 + break;
1462 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
1463 /* Oops, this num of objs will cause problems. */
1464 cachep->gfporder--;
1465 diff -Nurb src/linux/linux.orig/mm/swapfile.c src/linux/linux/mm/swapfile.c
1466 --- src/linux/linux.orig/mm/swapfile.c 2003-07-04 04:12:29.000000000 -0400
1467 +++ src/linux/linux/mm/swapfile.c 2004-05-31 02:18:03.000000000 -0400
1468 @@ -11,12 +11,17 @@
1469 #include <linux/swap.h>
1470 #include <linux/swapctl.h>
1471 #include <linux/blkdev.h> /* for blk_size */
1472 +#include <linux/file.h>
1473 #include <linux/vmalloc.h>
1474 #include <linux/pagemap.h>
1475 #include <linux/shm.h>
1476
1477 #include <asm/pgtable.h>
1478
1479 +#ifdef CONFIG_KMOD
1480 +#include <linux/kmod.h>
1481 +#endif
1482 +
1483 spinlock_t swaplock = SPIN_LOCK_UNLOCKED;
1484 unsigned int nr_swapfiles;
1485 int total_swap_pages;
1486 @@ -31,8 +36,78 @@
1487
1488 struct swap_info_struct swap_info[MAX_SWAPFILES];
1489
1490 +static struct swap_method *swap_methods = NULL;
1491 +
1492 #define SWAPFILE_CLUSTER 256
1493
1494 +int register_swap_method(char *name, struct swap_ops *ops)
1495 +{
1496 + struct swap_method *pos;
1497 + struct swap_method *new;
1498 + int result = 0;
1499 +
1500 + lock_kernel();
1501 +
1502 + for (pos = swap_methods; pos; pos = pos->next) {
1503 + if (strcmp(pos->name, name) == 0) {
1504 + printk(KERN_ERR "register_swap_method: "
1505 + "method %s already registered\n", name);
1506 + result = -EBUSY;
1507 + goto out;
1508 + }
1509 + }
1510 +
1511 + if (!(new = kmalloc(sizeof(*new), GFP_KERNEL))) {
1512 + printk(KERN_ERR "register_swap_method: "
1513 + "no memory for new method \"%s\"\n", name);
1514 + result = -ENOMEM;
1515 + goto out;
1516 + }
1517 +
1518 + new->name = name;
1519 + new->ops = ops;
1520 + new->use_count = 0;
1521 +
1522 + /* ok, insert at top of list */
1523 + printk("register_swap_method: method %s\n", name);
1524 + new->next = swap_methods;
1525 + swap_methods = new;
1526 + out:
1527 + unlock_kernel();
1528 + return result;
1529 +}
1530 +
1531 +int unregister_swap_method(char *name)
1532 +{
1533 + struct swap_method **method, *next;
1534 + int result = 0;
1535 +
1536 + lock_kernel();
1537 +
1538 + for (method = &swap_methods; *method; method = &(*method)->next) {
1539 + if (strcmp((*method)->name, name) == 0) {
1540 + if ((*method)->use_count > 0) {
1541 + printk(KERN_ERR "unregister_swap_method: "
1542 + "method \"%s\" is in use\n", name);
1543 + result = -EBUSY;
1544 + goto out;
1545 + }
1546 +
1547 + next = (*method)->next;
1548 + kfree(*method);
1549 + *method = next;
1550 + printk("unregister_swap_method: method %s\n", name);
1551 + goto out;
1552 + }
1553 + }
1554 + /* not found */
1555 + printk("unregister_swap_method: no such method %s\n", name);
1556 + result = -ENOENT;
1557 + out:
1558 + unlock_kernel();
1559 + return result;
1560 +}
1561 +
1562 static inline int scan_swap_map(struct swap_info_struct *si)
1563 {
1564 unsigned long offset;
1565 @@ -711,13 +786,14 @@
1566 struct nameidata nd;
1567 int i, type, prev;
1568 int err;
1569 + struct file *swap_file;
1570
1571 if (!capable(CAP_SYS_ADMIN))
1572 return -EPERM;
1573
1574 err = user_path_walk(specialfile, &nd);
1575 if (err)
1576 - goto out;
1577 + return err;
1578
1579 lock_kernel();
1580 prev = -1;
1581 @@ -725,15 +801,20 @@
1582 for (type = swap_list.head; type >= 0; type = swap_info[type].next) {
1583 p = swap_info + type;
1584 if ((p->flags & SWP_WRITEOK) == SWP_WRITEOK) {
1585 - if (p->swap_file == nd.dentry)
1586 + if (p->swap_file &&
1587 + p->swap_file->f_dentry == nd.dentry)
1588 break;
1589 }
1590 prev = type;
1591 }
1592 err = -EINVAL;
1593 + /* p->swap_file contains all needed info, no need to keep nd, so
1594 + * release it now.
1595 + */
1596 + path_release(&nd);
1597 if (type < 0) {
1598 swap_list_unlock();
1599 - goto out_dput;
1600 + goto out;
1601 }
1602
1603 if (prev < 0) {
1604 @@ -767,32 +848,30 @@
1605 total_swap_pages += p->pages;
1606 p->flags = SWP_WRITEOK;
1607 swap_list_unlock();
1608 - goto out_dput;
1609 + goto out;
1610 }
1611 - if (p->swap_device)
1612 - blkdev_put(p->swap_file->d_inode->i_bdev, BDEV_SWAP);
1613 - path_release(&nd);
1614
1615 + if (p->method->ops->release)
1616 + p->method->ops->release(p->swap_file, p->data);
1617 swap_list_lock();
1618 swap_device_lock(p);
1619 - nd.mnt = p->swap_vfsmnt;
1620 - nd.dentry = p->swap_file;
1621 - p->swap_vfsmnt = NULL;
1622 + p->method->use_count --;
1623 + p->method = NULL;
1624 + p->data = NULL;
1625 + swap_file = p->swap_file;
1626 p->swap_file = NULL;
1627 - p->swap_device = 0;
1628 p->max = 0;
1629 swap_map = p->swap_map;
1630 p->swap_map = NULL;
1631 p->flags = 0;
1632 swap_device_unlock(p);
1633 swap_list_unlock();
1634 + filp_close(swap_file, NULL);
1635 vfree(swap_map);
1636 err = 0;
1637
1638 -out_dput:
1639 - unlock_kernel();
1640 - path_release(&nd);
1641 out:
1642 + unlock_kernel();
1643 return err;
1644 }
1645
1646 @@ -805,18 +884,17 @@
1647 if (!page)
1648 return -ENOMEM;
1649
1650 - len += sprintf(buf, "Filename\t\t\tType\t\tSize\tUsed\tPriority\n");
1651 + len += sprintf(buf, "%-32s%-16s%-8s%-8sPriority\n",
1652 + "Filename", "Type", "Size", "Used");
1653 for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
1654 if ((ptr->flags & SWP_USED) && ptr->swap_map) {
1655 - char * path = d_path(ptr->swap_file, ptr->swap_vfsmnt,
1656 + char * path = d_path(ptr->swap_file->f_dentry,
1657 + ptr->swap_file->f_vfsmnt,
1658 page, PAGE_SIZE);
1659
1660 len += sprintf(buf + len, "%-31s ", path);
1661
1662 - if (!ptr->swap_device)
1663 - len += sprintf(buf + len, "file\t\t");
1664 - else
1665 - len += sprintf(buf + len, "partition\t");
1666 + len += sprintf(buf + len, "%-15s ", ptr->method->name);
1667
1668 usedswap = 0;
1669 for (j = 0; j < ptr->max; ++j)
1670 @@ -827,7 +905,7 @@
1671 default:
1672 usedswap++;
1673 }
1674 - len += sprintf(buf + len, "%d\t%d\t%d\n", ptr->pages << (PAGE_SHIFT - 10),
1675 + len += sprintf(buf + len, "%-8d%-8d%d\n", ptr->pages << (PAGE_SHIFT - 10),
1676 usedswap << (PAGE_SHIFT - 10), ptr->prio);
1677 }
1678 }
1679 @@ -835,18 +913,55 @@
1680 return len;
1681 }
1682
1683 -int is_swap_partition(kdev_t dev) {
1684 +/* apply a test function to all active swap objects. E.g. for checking
1685 + * whether a partition is used for swapping
1686 + */
1687 +int swap_run_test(int (*test_fct)(unsigned int flags,
1688 + struct file * swap_file,
1689 + void *testdata), void *testdata)
1690 +{
1691 struct swap_info_struct *ptr = swap_info;
1692 int i;
1693
1694 for (i = 0 ; i < nr_swapfiles ; i++, ptr++) {
1695 - if (ptr->flags & SWP_USED)
1696 - if (ptr->swap_device == dev)
1697 + if (ptr->swap_file &&
1698 + test_fct(ptr->flags, ptr->swap_file, testdata))
1699 return 1;
1700 }
1701 return 0;
1702 }
1703
1704 +/* Walk through the list of known swap method until somebody wants to
1705 + * handle this file. Pick the first one which claims to be able to
1706 + * swap to this kind of file.
1707 + *
1708 + * return value: < 0: error, 0: not found, > 0: swapfilesize
1709 + */
1710 +int find_swap_method(struct file *swap_file,
1711 + struct swap_info_struct *p)
1712 +{
1713 + int swapfilesize = 0;
1714 + struct swap_method *method;
1715 +
1716 + p->method = NULL;
1717 + for (method = swap_methods; method; method = method->next) {
1718 + swapfilesize = method->ops->open(swap_file, &p->data);
1719 + if (swapfilesize == 0) {
1720 + continue;
1721 + }
1722 + if (swapfilesize > 0) {
1723 + p->method = method;
1724 + p->method->use_count ++;
1725 + p->swap_file = swap_file;
1726 + break;
1727 + }
1728 + if (swapfilesize < 0) {
1729 + break;
1730 + }
1731 + }
1732 + return swapfilesize;
1733 +}
1734 +
1735 /*
1736 * Written 01/25/92 by Simmule Turner, heavily changed by Linus.
1737 *
1738 @@ -855,8 +970,6 @@
1739 asmlinkage long sys_swapon(const char * specialfile, int swap_flags)
1740 {
1741 struct swap_info_struct * p;
1742 - struct nameidata nd;
1743 - struct inode * swap_inode;
1744 unsigned int type;
1745 int i, j, prev;
1746 int error;
1747 @@ -866,8 +979,9 @@
1748 int nr_good_pages = 0;
1749 unsigned long maxpages = 1;
1750 int swapfilesize;
1751 - struct block_device *bdev = NULL;
1752 unsigned short *swap_map;
1753 + char * tmp_specialfile;
1754 + struct file *swap_file;
1755
1756 if (!capable(CAP_SYS_ADMIN))
1757 return -EPERM;
1758 @@ -886,8 +1000,7 @@
1759 nr_swapfiles = type+1;
1760 p->flags = SWP_USED;
1761 p->swap_file = NULL;
1762 - p->swap_vfsmnt = NULL;
1763 - p->swap_device = 0;
1764 + p->method = NULL;
1765 p->swap_map = NULL;
1766 p->lowest_bit = 0;
1767 p->highest_bit = 0;
1768 @@ -901,53 +1014,56 @@
1769 p->prio = --least_priority;
1770 }
1771 swap_list_unlock();
1772 - error = user_path_walk(specialfile, &nd);
1773 - if (error)
1774 +
1775 + /* Open the swap using filp_open. Bail out on any errors. */
1776 + tmp_specialfile = getname(specialfile);
1777 + if (IS_ERR(tmp_specialfile)) {
1778 + error = PTR_ERR(tmp_specialfile);
1779 goto bad_swap_2;
1780 + }
1781 + p->swap_file = filp_open(tmp_specialfile, O_RDWR, 0600);
1782 + putname(tmp_specialfile);
1783 + if (IS_ERR(p->swap_file)) {
1784 + error = PTR_ERR(p->swap_file);
1785 + goto bad_swap_1;
1786 + }
1787
1788 - p->swap_file = nd.dentry;
1789 - p->swap_vfsmnt = nd.mnt;
1790 - swap_inode = nd.dentry->d_inode;
1791 error = -EINVAL;
1792
1793 - if (S_ISBLK(swap_inode->i_mode)) {
1794 - kdev_t dev = swap_inode->i_rdev;
1795 - struct block_device_operations *bdops;
1796 - devfs_handle_t de;
1797 -
1798 - p->swap_device = dev;
1799 - set_blocksize(dev, PAGE_SIZE);
1800 -
1801 - bd_acquire(swap_inode);
1802 - bdev = swap_inode->i_bdev;
1803 - de = devfs_get_handle_from_inode(swap_inode);
1804 - bdops = devfs_get_ops(de); /* Increments module use count */
1805 - if (bdops) bdev->bd_op = bdops;
1806 -
1807 - error = blkdev_get(bdev, FMODE_READ|FMODE_WRITE, 0, BDEV_SWAP);
1808 - devfs_put_ops(de);/*Decrement module use count now we're safe*/
1809 - if (error)
1810 - goto bad_swap_2;
1811 - set_blocksize(dev, PAGE_SIZE);
1812 - error = -ENODEV;
1813 - if (!dev || (blk_size[MAJOR(dev)] &&
1814 - !blk_size[MAJOR(dev)][MINOR(dev)]))
1815 - goto bad_swap;
1816 - swapfilesize = 0;
1817 - if (blk_size[MAJOR(dev)])
1818 - swapfilesize = blk_size[MAJOR(dev)][MINOR(dev)]
1819 - >> (PAGE_SHIFT - 10);
1820 - } else if (S_ISREG(swap_inode->i_mode))
1821 - swapfilesize = swap_inode->i_size >> PAGE_SHIFT;
1822 - else
1823 - goto bad_swap;
1824 + swapfilesize = find_swap_method(p->swap_file, p);
1825 + if (swapfilesize < 0) {
1826 + error = swapfilesize;
1827 + goto bad_swap_1;
1828 + }
1829 +#ifdef CONFIG_KMOD
1830 + if (swapfilesize == 0) {
1831 + (void)request_module("swapfile-mod");
1832 +
1833 + swapfilesize = find_swap_method(p->swap_file, p);
1834 + if (swapfilesize < 0) {
1835 + error = swapfilesize;
1836 + goto bad_swap_1;
1837 + }
1838 + }
1839 +#endif
1840 + if (swapfilesize == 0) {
1841 + printk("Don't know how to swap to this kind of file\n");
1842 + goto bad_swap_1; /* free swap map */
1843 + }
1844 +
1845 + /* After this point, the swap-file has been opened by the swap
1846 + * method. We must make sure to use the bad_swap label for any
1847 + * errors.
1848 + */
1849
1850 error = -EBUSY;
1851 for (i = 0 ; i < nr_swapfiles ; i++) {
1852 struct swap_info_struct *q = &swap_info[i];
1853 if (i == type || !q->swap_file)
1854 continue;
1855 - if (swap_inode->i_mapping == q->swap_file->d_inode->i_mapping)
1856 + if (p->swap_file->f_dentry->d_inode->i_mapping
1857 + ==
1858 + q->swap_file->f_dentry->d_inode->i_mapping)
1859 goto bad_swap;
1860 }
1861
1862 @@ -1083,17 +1199,27 @@
1863 swap_list_unlock();
1864 error = 0;
1865 goto out;
1866 +
1867 bad_swap:
1868 - if (bdev)
1869 - blkdev_put(bdev, BDEV_SWAP);
1870 + if (p->method->ops->release)
1871 + p->method->ops->release(p->swap_file, p->data);
1872 + swap_list_lock();
1873 + p->method->use_count --;
1874 + p->method = NULL;
1875 + p->data = NULL;
1876 + swap_list_unlock();
1877 +
1878 +bad_swap_1:
1879 + swap_list_lock();
1880 + swap_file = p->swap_file;
1881 + p->swap_file = NULL;
1882 + swap_list_unlock();
1883 + filp_close(swap_file, NULL);
1884 +
1885 bad_swap_2:
1886 +
1887 swap_list_lock();
1888 swap_map = p->swap_map;
1889 - nd.mnt = p->swap_vfsmnt;
1890 - nd.dentry = p->swap_file;
1891 - p->swap_device = 0;
1892 - p->swap_file = NULL;
1893 - p->swap_vfsmnt = NULL;
1894 p->swap_map = NULL;
1895 p->flags = 0;
1896 if (!(swap_flags & SWAP_FLAG_PREFER))
1897 @@ -1101,7 +1227,7 @@
1898 swap_list_unlock();
1899 if (swap_map)
1900 vfree(swap_map);
1901 - path_release(&nd);
1902 +
1903 out:
1904 if (swap_header)
1905 free_page((long) swap_header);
1906 @@ -1217,8 +1343,8 @@
1907 /*
1908 * Prior swap_duplicate protects against swap device deletion.
1909 */
1910 -void get_swaphandle_info(swp_entry_t entry, unsigned long *offset,
1911 - kdev_t *dev, struct inode **swapf)
1912 +struct swap_method *get_swaphandle_info(swp_entry_t entry,
1913 + unsigned long *offset, void **data)
1914 {
1915 unsigned long type;
1916 struct swap_info_struct *p;
1917 @@ -1226,32 +1352,26 @@
1918 type = SWP_TYPE(entry);
1919 if (type >= nr_swapfiles) {
1920 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_file, entry.val);
1921 - return;
1922 + return NULL;
1923 }
1924
1925 p = &swap_info[type];
1926 *offset = SWP_OFFSET(entry);
1927 if (*offset >= p->max && *offset != 0) {
1928 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Bad_offset, entry.val);
1929 - return;
1930 + return NULL;
1931 }
1932 if (p->swap_map && !p->swap_map[*offset]) {
1933 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_offset, entry.val);
1934 - return;
1935 + return NULL;
1936 }
1937 if (!(p->flags & SWP_USED)) {
1938 printk(KERN_ERR "rw_swap_page: %s%08lx\n", Unused_file, entry.val);
1939 - return;
1940 + return NULL;
1941 }
1942
1943 - if (p->swap_device) {
1944 - *dev = p->swap_device;
1945 - } else if (p->swap_file) {
1946 - *swapf = p->swap_file->d_inode;
1947 - } else {
1948 - printk(KERN_ERR "rw_swap_page: no swap file or device\n");
1949 - }
1950 - return;
1951 + *data = p->data;
1952 + return p->method;
1953 }
1954
1955 /*
1956 diff -Nurb src/linux/linux.orig/net/Config.in src/linux/linux/net/Config.in
1957 --- src/linux/linux.orig/net/Config.in 2003-07-04 04:12:29.000000000 -0400
1958 +++ src/linux/linux/net/Config.in 2004-05-31 02:18:03.000000000 -0400
1959 @@ -16,6 +16,9 @@
1960 fi
1961 bool 'Socket Filtering' CONFIG_FILTER
1962 tristate 'Unix domain sockets' CONFIG_UNIX
1963 +if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
1964 + bool 'Swapping via network sockets (EXPERIMENTAL)' CONFIG_NETSWAP
1965 +fi
1966 bool 'TCP/IP networking' CONFIG_INET
1967 if [ "$CONFIG_INET" = "y" ]; then
1968 source net/ipv4/Config.in
1969 diff -Nurb src/linux/linux.orig/net/Makefile src/linux/linux/net/Makefile
1970 --- src/linux/linux.orig/net/Makefile 2003-07-04 04:12:29.000000000 -0400
1971 +++ src/linux/linux/net/Makefile 2004-05-31 02:18:03.000000000 -0400
1972 @@ -51,6 +51,7 @@
1973 ifeq ($(CONFIG_NET),y)
1974 obj-$(CONFIG_MODULES) += netsyms.o
1975 obj-$(CONFIG_SYSCTL) += sysctl_net.o
1976 +obj-$(CONFIG_NETSWAP) += netswapping.o
1977 endif
1978
1979 include $(TOPDIR)/Rules.make
1980 diff -Nurb src/linux/linux.orig/net/core/sock.c src/linux/linux/net/core/sock.c
1981 --- src/linux/linux.orig/net/core/sock.c 2003-10-14 04:09:32.000000000 -0400
1982 +++ src/linux/linux/net/core/sock.c 2004-05-31 02:18:03.000000000 -0400
1983 @@ -402,6 +402,21 @@
1984 ret = -ENONET;
1985 break;
1986 #endif
1987 +#ifdef CONFIG_NETSWAP
1988 + case SO_SWAPPING:
1989 + if (valbool) {
1990 + if (!sk->swapping) {
1991 + netswap_sock_count ++;
1992 + }
1993 + sk->swapping ++;
1994 + } else if (sk->swapping > 0) {
1995 + sk->swapping --;
1996 + if (!sk->swapping) {
1997 + netswap_sock_count --;
1998 + }
1999 + }
2000 + break;
2001 +#endif
2002 /* We implement the SO_SNDLOWAT etc to
2003 not be settable (1003.1g 5.3) */
2004 default:
2005 @@ -552,6 +567,12 @@
2006 goto lenout;
2007 }
2008
2009 +#ifdef CONFIG_NETSWAP
2010 + case SO_SWAPPING:
2011 + v.val = sk->swapping;
2012 + break;
2013 +#endif
2014 +
2015 /* Dubious BSD thing... Probably nobody even uses it, but
2016 * the UNIX standard wants it for whatever reason... -DaveM
2017 */
2018 diff -Nurb src/linux/linux.orig/net/ipv4/tcp_ipv4.c src/linux/linux/net/ipv4/tcp_ipv4.c
2019 --- src/linux/linux.orig/net/ipv4/tcp_ipv4.c 2003-10-14 04:09:33.000000000 -0400
2020 +++ src/linux/linux/net/ipv4/tcp_ipv4.c 2004-05-31 02:18:03.000000000 -0400
2021 @@ -1657,6 +1657,12 @@
2022 if (filter && sk_filter(skb, filter))
2023 goto discard;
2024 #endif /* CONFIG_FILTER */
2025 +#ifdef CONFIG_NETSWAP
2026 + /* tcp doesn't use sock_queue_rcv_skb() ... */
2027 + /* an inline function defined in net/netswapping.h */
2028 + if (netswap_low_memory(sk, skb))
2029 + goto discard;
2030 +#endif /* CONFIG_NETSWAP */
2031
2032 IP_INC_STATS_BH(IpInDelivers);
2033
2034 diff -Nurb src/linux/linux.orig/net/ipv6/tcp_ipv6.c src/linux/linux/net/ipv6/tcp_ipv6.c
2035 --- src/linux/linux.orig/net/ipv6/tcp_ipv6.c 2003-10-14 04:09:34.000000000 -0400
2036 +++ src/linux/linux/net/ipv6/tcp_ipv6.c 2004-05-31 02:18:03.000000000 -0400
2037 @@ -1424,6 +1424,12 @@
2038 if (filter && sk_filter(skb, filter))
2039 goto discard;
2040 #endif /* CONFIG_FILTER */
2041 +#ifdef CONFIG_NETSWAP
2042 + /* tcp doesn't use sock_queue_rcv_skb() ... */
2043 + /* an inline function defined in net/netswapping.h */
2044 + if (netswap_low_memory(sk, skb))
2045 + goto discard;
2046 +#endif /* CONFIG_NETSWAP */
2047
2048 /*
2049 * socket locking is here for SMP purposes as backlog rcv
2050 diff -Nurb src/linux/linux.orig/net/netswapping.c src/linux/linux/net/netswapping.c
2051 --- src/linux/linux.orig/net/netswapping.c 1969-12-31 19:00:00.000000000 -0500
2052 +++ src/linux/linux/net/netswapping.c 2004-05-31 02:18:03.000000000 -0400
2053 @@ -0,0 +1,76 @@
2054 +/*
2055 + * linux/net/swapping.c
2056 + *
2057 + * Support paging over network connections (inet only)
2058 + *
2059 + * (c) 2000 Claus-Justus Heine <heine@instmath.rwth-aachen.de>
2060 + */
2061 +
2062 +#include <linux/slab.h>
2063 +#include <linux/swap.h>
2064 +#include <linux/swapctl.h>
2065 +#include <linux/skbuff.h>
2066 +#include <linux/module.h>
2067 +#include <linux/sysctl.h>
2068 +#include <linux/init.h>
2069 +#include <net/netswapping.h>
2070 +#include <net/sock.h>
2071 +#include <asm/uaccess.h>
2072 +
2073 +unsigned int netswap_dropped; /* statistics */
2074 +unsigned int netswap_free_pages_min;
2075 +int netswap_sock_count; /* how many sockets have swapping option set */
2076 +
2077 +#ifdef CONFIG_SYSCTL
2078 +
2079 +static ctl_table netswap_table[] = {
2080 + {NET_SWAP_DROPPED, "dropped",
2081 + &netswap_dropped, sizeof(int), 0644, NULL, &proc_dointvec },
2082 + {NET_SWAP_DROP_THRESHOLD, "threshold",
2083 + &netswap_free_pages_min, sizeof(int), 0644, NULL, &proc_dointvec },
2084 + {NET_SWAP_SOCK_COUNT, "sock_count",
2085 + &netswap_sock_count, sizeof(int), 0444, NULL, &proc_dointvec },
2086 + {0},
2087 +};
2088 +
2089 +static struct ctl_table_header *netswap_sysctl_header;
2090 +
2091 +static ctl_table netswap_net_table[] = {
2092 + {CTL_NETSWAP, "swapping", NULL, 0, 0555, netswap_table},
2093 + {0}
2094 +};
2095 +
2096 +static ctl_table netswap_root_table[] = {
2097 + {CTL_NET, "net", NULL, 0, 0555, netswap_net_table},
2098 + {0}
2099 +};
2100 +
2101 +#endif
2102 +
2103 +int __init netswap_init(void)
2104 +{
2105 + /* drop packets when below this threshold */
2106 + netswap_free_pages_min = 32 /* freepages.min */;
2107 +#ifdef CONFIG_SYSCTL
2108 + netswap_sysctl_header = register_sysctl_table(netswap_root_table, 0);
2109 +#endif
2110 + return 0;
2111 +}
2112 +
2113 +void __exit netswap_exit(void)
2114 +{
2115 +#ifdef CONFIG_SYSCTL
2116 + unregister_sysctl_table(netswap_sysctl_header);
2117 +#endif
2118 +}
2119 +
2120 +/* linux/init.h -- VERY nice :-)
2121 + *
2122 + * On the other hand, we have no control over the order the initcalls
2123 + * are performed ...
2124 + *
2125 + * Actually, we are not compiled as module ...
2126 + */
2127 +
2128 +module_init(netswap_init)
2129 +module_exit(netswap_exit)
2130 diff -Nurb src/linux/linux.orig/net/netsyms.c src/linux/linux/net/netsyms.c
2131 --- src/linux/linux.orig/net/netsyms.c 2004-05-31 02:02:49.000000000 -0400
2132 +++ src/linux/linux/net/netsyms.c 2004-05-31 02:18:03.000000000 -0400
2133 @@ -601,4 +601,10 @@
2134 EXPORT_SYMBOL(wireless_send_event);
2135 #endif /* CONFIG_NET_RADIO || CONFIG_NET_PCMCIA_RADIO */
2136
2137 +#ifdef CONFIG_NETSWAP
2138 +EXPORT_SYMBOL(netswap_sock_count);
2139 +EXPORT_SYMBOL(netswap_free_pages_min);
2140 +EXPORT_SYMBOL(netswap_dropped);
2141 +#endif
2142 +
2143 #endif /* CONFIG_NET */
2144 diff -Nurb src/linux/linux.orig/net/packet/af_packet.c src/linux/linux/net/packet/af_packet.c
2145 --- src/linux/linux.orig/net/packet/af_packet.c 2003-10-14 04:09:35.000000000 -0400
2146 +++ src/linux/linux/net/packet/af_packet.c 2004-05-31 02:18:03.000000000 -0400
2147 @@ -449,6 +449,12 @@
2148 snaplen = res;
2149 }
2150 #endif /* CONFIG_FILTER */
2151 +#ifdef CONFIG_NETSWAP
2152 + /* packet doesn't use sock_queue_rcv_skb() ... */
2153 + /* an inline function defined in net/netswapping.h */
2154 + if (netswap_low_memory(sk, skb))
2155 + goto drop_n_restore;
2156 +#endif /* CONFIG_NETSWAP */
2157
2158 if (atomic_read(&sk->rmem_alloc) + skb->truesize >= (unsigned)sk->rcvbuf)
2159 goto drop_n_acct;
2160 @@ -496,7 +502,7 @@
2161 po->stats.tp_drops++;
2162 spin_unlock(&sk->receive_queue.lock);
2163
2164 -#ifdef CONFIG_FILTER
2165 +#if defined(CONFIG_FILTER) || defined(CONFIG_NETSWAP)
2166 drop_n_restore:
2167 #endif
2168 if (skb_head != skb->data && skb_shared(skb)) {
2169 @@ -557,6 +563,12 @@
2170 snaplen = res;
2171 }
2172 #endif
2173 +#ifdef CONFIG_NETSWAP
2174 + /* packet doesn't use sock_queue_rcv_skb() ... */
2175 + /* an inline function defined in net/netswapping.h */
2176 + if (netswap_low_memory(sk, skb))
2177 + goto drop_n_restore;
2178 +#endif /* CONFIG_NETSWAP */
2179
2180 if (sk->type == SOCK_DGRAM) {
2181 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
2182 diff -Nurb src/linux/linux.orig/net/sunrpc/sched.c src/linux/linux/net/sunrpc/sched.c
2183 --- src/linux/linux.orig/net/sunrpc/sched.c 2003-07-04 04:12:33.000000000 -0400
2184 +++ src/linux/linux/net/sunrpc/sched.c 2004-05-31 02:18:03.000000000 -0400
2185 @@ -79,10 +79,11 @@
2186 */
2187 static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED;
2188
2189 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2190 /*
2191 * This is the last-ditch buffer for NFS swap requests
2192 */
2193 -static u32 swap_buffer[PAGE_SIZE >> 2];
2194 +static u32 swap_buffer[2*PAGE_SIZE >> 2];
2195 static long swap_buffer_used;
2196
2197 /*
2198 @@ -96,6 +97,7 @@
2199 {
2200 clear_bit(1, &swap_buffer_used);
2201 }
2202 +#endif
2203
2204 /*
2205 * Disable the timer for a given RPC task. Should be called with
2206 @@ -501,6 +503,7 @@
2207 __rpc_execute(struct rpc_task *task)
2208 {
2209 int status = 0;
2210 + unsigned long alloc_flag = current->flags & PF_MEMALLOC;
2211
2212 dprintk("RPC: %4d rpc_execute flgs %x\n",
2213 task->tk_pid, task->tk_flags);
2214 @@ -510,6 +513,13 @@
2215 return 0;
2216 }
2217
2218 + if (task->tk_flags & RPC_TASK_SWAPPER) {
2219 + if (!current->flags & PF_MEMALLOC) {
2220 + dprintk("__rpc_execute: Setting PF_MEMALLOC\n");
2221 + }
2222 + current->flags |= PF_MEMALLOC;
2223 + }
2224 +
2225 restarted:
2226 while (1) {
2227 /*
2228 @@ -554,7 +564,8 @@
2229 rpc_set_sleeping(task);
2230 if (RPC_IS_ASYNC(task)) {
2231 spin_unlock_bh(&rpc_queue_lock);
2232 - return 0;
2233 + status = 0;
2234 + goto out;
2235 }
2236 }
2237 spin_unlock_bh(&rpc_queue_lock);
2238 @@ -563,7 +574,12 @@
2239 /* sync task: sleep here */
2240 dprintk("RPC: %4d sync task going to sleep\n",
2241 task->tk_pid);
2242 - if (current->pid == rpciod_pid)
2243 + /* it's ok to wait for rpciod when swapping,
2244 + * because this means it needed memory and is
2245 + * doing the swap-out itself.
2246 + */
2247 + if (current->pid == rpciod_pid &&
2248 + !(task->tk_flags & RPC_TASK_SWAPPER))
2249 printk(KERN_ERR "RPC: rpciod waiting on sync task!\n");
2250
2251 __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task));
2252 @@ -608,6 +624,10 @@
2253 /* Release all resources associated with the task */
2254 rpc_release_task(task);
2255
2256 + out:
2257 + if (!alloc_flag) {
2258 + current->flags &= ~PF_MEMALLOC;
2259 + }
2260 return status;
2261 }
2262
2263 @@ -699,10 +719,16 @@
2264 {
2265 u32 *buffer;
2266 int gfp;
2267 + unsigned long alloc_flag = current->flags & PF_MEMALLOC;
2268 + void *ret = NULL;
2269
2270 - if (flags & RPC_TASK_SWAPPER)
2271 + if (flags & RPC_TASK_SWAPPER) {
2272 gfp = GFP_ATOMIC;
2273 - else if (flags & RPC_TASK_ASYNC)
2274 + if (!(current->flags & PF_MEMALLOC)) {
2275 + dprintk("rpc_allocate: Setting PF_MEMALLOC\n");
2276 + }
2277 + current->flags |= PF_MEMALLOC;
2278 + } else if (flags & RPC_TASK_ASYNC)
2279 gfp = GFP_RPC;
2280 else
2281 gfp = GFP_KERNEL;
2282 @@ -710,29 +736,44 @@
2283 do {
2284 if ((buffer = (u32 *) kmalloc(size, gfp)) != NULL) {
2285 dprintk("RPC: allocated buffer %p\n", buffer);
2286 - return buffer;
2287 + ret = buffer;
2288 + goto out;
2289 }
2290 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2291 if ((flags & RPC_TASK_SWAPPER) && size <= sizeof(swap_buffer)
2292 && rpc_lock_swapbuf()) {
2293 dprintk("RPC: used last-ditch swap buffer\n");
2294 - return swap_buffer;
2295 + ret = swap_buffer;
2296 + goto out;
2297 +#endif
2298 + }
2299 + if (flags & RPC_TASK_ASYNC) {
2300 + ret = NULL;
2301 + goto out;
2302 }
2303 - if (flags & RPC_TASK_ASYNC)
2304 - return NULL;
2305 yield();
2306 } while (!signalled());
2307
2308 - return NULL;
2309 + out:
2310 + if (!alloc_flag) {
2311 + current->flags &= ~PF_MEMALLOC;
2312 + }
2313 + return ret;
2314 }
2315
2316 void
2317 rpc_free(void *buffer)
2318 {
2319 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2320 if (buffer != swap_buffer) {
2321 +#endif
2322 kfree(buffer);
2323 return;
2324 +#if CONFIG_SWAP_VIA_NFS || CONFIG_SWAP_VIA_NFS_MODULE
2325 }
2326 rpc_unlock_swapbuf();
2327 + printk("RPC: Released swap buffer\n");
2328 +#endif
2329 }
2330
2331 /*
2332 diff -Nurb src/linux/linux.orig/net/sunrpc/xprt.c src/linux/linux/net/sunrpc/xprt.c
2333 --- src/linux/linux.orig/net/sunrpc/xprt.c 2003-07-04 04:12:33.000000000 -0400
2334 +++ src/linux/linux/net/sunrpc/xprt.c 2004-05-31 02:18:03.000000000 -0400
2335 @@ -139,7 +139,7 @@
2336 __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
2337 {
2338 if (!xprt->snd_task) {
2339 - if (xprt->nocong || __xprt_get_cong(xprt, task))
2340 + if (__xprt_get_cong(xprt, task))
2341 xprt->snd_task = task;
2342 }
2343 if (xprt->snd_task != task) {
2344 @@ -179,7 +179,7 @@
2345 if (!task)
2346 return;
2347 }
2348 - if (xprt->nocong || __xprt_get_cong(xprt, task))
2349 + if (__xprt_get_cong(xprt, task))
2350 xprt->snd_task = task;
2351 }
2352
2353 @@ -276,6 +276,9 @@
2354 {
2355 struct rpc_rqst *req = task->tk_rqstp;
2356
2357 + if (xprt->nocong || RPC_IS_SWAPPER(task))
2358 + return 1;
2359 +
2360 if (req->rq_cong)
2361 return 1;
2362 dprintk("RPC: %4d xprt_cwnd_limited cong = %ld cwnd = %ld\n",