summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Robbins <drobbins@gentoo.org>2001-11-16 19:05:45 +0000
committerDaniel Robbins <drobbins@gentoo.org>2001-11-16 19:05:45 +0000
commit715b351ca0263d384c9491eab685e69f13a333ad (patch)
tree6f36e5ce84c451a5f5caa4c1c8c1c92f1c27ec97 /sys-kernel/linux-sources
parentfix (diff)
downloadgentoo-2-715b351ca0263d384c9491eab685e69f13a333ad.tar.gz
gentoo-2-715b351ca0263d384c9491eab685e69f13a333ad.tar.bz2
gentoo-2-715b351ca0263d384c9491eab685e69f13a333ad.zip
andrea patches...
Diffstat (limited to 'sys-kernel/linux-sources')
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2160
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-292
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-112
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1125
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-224
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-111
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-128
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2105
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2bin0 -> 30928 bytes
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-110
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5119
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-592
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2bin0 -> 5274 bytes
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4304
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-145
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-212
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-152
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2457
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-231400
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4271
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10394
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2136
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-120
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-135
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-110
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-112
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-118
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-122
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-277
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-110
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-180
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-139
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-264
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-256
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13800
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-861
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-131595
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-737
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1327
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-241
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2bin0 -> 105675 bytes
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-560
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-118
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-655
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5137
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1183
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-369
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2bin0 -> 85178 bytes
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-122
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-123
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-112
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-240
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1122
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2136
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-142
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2113
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-269
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-111
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-279
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-221
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-111
-rw-r--r--sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-132
62 files changed, 8408 insertions, 0 deletions
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2
new file mode 100644
index 000000000000..f27b0347208d
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2
@@ -0,0 +1,160 @@
+diff -urN 2.4.11pre6/arch/i386/Makefile 3.5G/arch/i386/Makefile
+--- 2.4.11pre6/arch/i386/Makefile Tue May 1 19:35:18 2001
++++ 3.5G/arch/i386/Makefile Tue Oct 9 04:45:22 2001
+@@ -106,6 +106,9 @@
+
+ MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot
+
++arch/i386/vmlinux.lds: arch/i386/vmlinux.lds.S FORCE
++ $(CPP) -C -P -I$(HPATH) -imacros $(HPATH)/asm-i386/page_offset.h -Ui386 arch/i386/vmlinux.lds.S >arch/i386/vmlinux.lds
++
+ vmlinux: arch/i386/vmlinux.lds
+
+ FORCE: ;
+diff -urN 2.4.11pre6/arch/i386/config.in 3.5G/arch/i386/config.in
+--- 2.4.11pre6/arch/i386/config.in Tue Oct 9 00:10:11 2001
++++ 3.5G/arch/i386/config.in Tue Oct 9 04:45:36 2001
+@@ -158,12 +158,15 @@
+ "off CONFIG_NOHIGHMEM \
+ 4GB CONFIG_HIGHMEM4G \
+ 64GB CONFIG_HIGHMEM64G" off
+-if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
++if [ "$CONFIG_HIGHMEM4G" = "y" -o "$CONFIG_HIGHMEM64G" = "y" ]; then
+ define_bool CONFIG_HIGHMEM y
++else
++ define_bool CONFIG_HIGHMEM n
+ fi
+ if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
+- define_bool CONFIG_HIGHMEM y
+ define_bool CONFIG_X86_PAE y
++else
++ bool '3.5GB user address space' CONFIG_05GB
+ fi
+
+ bool 'Math emulation' CONFIG_MATH_EMULATION
+diff -urN 2.4.11pre6/arch/i386/vmlinux.lds.S 3.5G/arch/i386/vmlinux.lds.S
+--- 2.4.11pre6/arch/i386/vmlinux.lds.S Thu Jan 1 01:00:00 1970
++++ 3.5G/arch/i386/vmlinux.lds.S Tue Oct 9 04:45:22 2001
+@@ -0,0 +1,83 @@
++/* ld script to make i386 Linux kernel
++ * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>;
++ */
++OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
++OUTPUT_ARCH(i386)
++ENTRY(_start)
++SECTIONS
++{
++ . = PAGE_OFFSET_RAW + 0x100000;
++ _text = .; /* Text and read-only data */
++ .text : {
++ *(.text)
++ *(.fixup)
++ *(.gnu.warning)
++ } = 0x9090
++ .text.lock : { *(.text.lock) } /* out-of-line lock text */
++
++ _etext = .; /* End of text section */
++
++ .rodata : { *(.rodata) *(.rodata.*) }
++ .kstrtab : { *(.kstrtab) }
++
++ . = ALIGN(16); /* Exception table */
++ __start___ex_table = .;
++ __ex_table : { *(__ex_table) }
++ __stop___ex_table = .;
++
++ __start___ksymtab = .; /* Kernel symbol table */
++ __ksymtab : { *(__ksymtab) }
++ __stop___ksymtab = .;
++
++ .data : { /* Data */
++ *(.data)
++ CONSTRUCTORS
++ }
++
++ _edata = .; /* End of data section */
++
++ . = ALIGN(8192); /* init_task */
++ .data.init_task : { *(.data.init_task) }
++
++ . = ALIGN(4096); /* Init code and data */
++ __init_begin = .;
++ .text.init : { *(.text.init) }
++ .data.init : { *(.data.init) }
++ . = ALIGN(16);
++ __setup_start = .;
++ .setup.init : { *(.setup.init) }
++ __setup_end = .;
++ __initcall_start = .;
++ .initcall.init : { *(.initcall.init) }
++ __initcall_end = .;
++ . = ALIGN(4096);
++ __init_end = .;
++
++ . = ALIGN(4096);
++ .data.page_aligned : { *(.data.idt) }
++
++ . = ALIGN(32);
++ .data.cacheline_aligned : { *(.data.cacheline_aligned) }
++
++ __bss_start = .; /* BSS */
++ .bss : {
++ *(.bss)
++ }
++ _end = . ;
++
++ /* Sections to be discarded */
++ /DISCARD/ : {
++ *(.text.exit)
++ *(.data.exit)
++ *(.exitcall.exit)
++ }
++
++ /* Stabs debugging sections. */
++ .stab 0 : { *(.stab) }
++ .stabstr 0 : { *(.stabstr) }
++ .stab.excl 0 : { *(.stab.excl) }
++ .stab.exclstr 0 : { *(.stab.exclstr) }
++ .stab.index 0 : { *(.stab.index) }
++ .stab.indexstr 0 : { *(.stab.indexstr) }
++ .comment 0 : { *(.comment) }
++}
+diff -urN 2.4.11pre6/include/asm-i386/page.h 3.5G/include/asm-i386/page.h
+--- 2.4.11pre6/include/asm-i386/page.h Sun Sep 23 21:11:40 2001
++++ 3.5G/include/asm-i386/page.h Tue Oct 9 04:45:22 2001
+@@ -78,7 +78,9 @@
+ * and CONFIG_HIGHMEM64G options in the kernel configuration.
+ */
+
+-#define __PAGE_OFFSET (0xC0000000)
++#include <asm/page_offset.h>
++
++#define __PAGE_OFFSET (PAGE_OFFSET_RAW)
+
+ #ifndef __ASSEMBLY__
+
+diff -urN 2.4.11pre6/include/asm-i386/page_offset.h 3.5G/include/asm-i386/page_offset.h
+--- 2.4.11pre6/include/asm-i386/page_offset.h Thu Jan 1 01:00:00 1970
++++ 3.5G/include/asm-i386/page_offset.h Tue Oct 9 04:45:22 2001
+@@ -0,0 +1,6 @@
++#include <linux/config.h>
++#ifndef CONFIG_05GB
++#define PAGE_OFFSET_RAW 0xC0000000
++#else
++#define PAGE_OFFSET_RAW 0xE0000000
++#endif
+diff -urN 2.4.11pre6/include/asm-i386/processor.h 3.5G/include/asm-i386/processor.h
+--- 2.4.11pre6/include/asm-i386/processor.h Tue Oct 9 00:11:19 2001
++++ 3.5G/include/asm-i386/processor.h Tue Oct 9 04:45:22 2001
+@@ -270,7 +270,11 @@
+ /* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
++#ifndef CONFIG_05GB
+ #define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
++#else
++#define TASK_UNMAPPED_BASE (TASK_SIZE / 16)
++#endif
+
+ /*
+ * Size of io_bitmap in longwords: 32 is ports 0-0x3ff.
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-2
new file mode 100644
index 000000000000..4925f0442f08
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-2
@@ -0,0 +1,92 @@
+diff -urN 2.4.10pre9/arch/alpha/kernel/entry.S alpha-fp/arch/alpha/kernel/entry.S
+--- 2.4.10pre9/arch/alpha/kernel/entry.S Sat Aug 11 08:03:53 2001
++++ alpha-fp/arch/alpha/kernel/entry.S Fri Sep 14 06:30:18 2001
+@@ -290,6 +290,8 @@
+ .end __kernel_execve
+
+ .align 3
++.globl do_switch_fp_start
++.globl do_switch_fp_end
+ .ent do_switch_stack
+ do_switch_stack:
+ lda $30,-SWITCH_STACK_SIZE($30)
+@@ -301,6 +303,7 @@
+ stq $14,40($30)
+ stq $15,48($30)
+ stq $26,56($30)
++do_switch_fp_start:
+ stt $f0,64($30)
+ stt $f1,72($30)
+ stt $f2,80($30)
+@@ -335,10 +338,13 @@
+ stt $f30,304($30)
+ stt $f0,312($30) # save fpcr in slot of $f31
+ ldt $f0,64($30) # dont let "do_switch_stack" change fp state.
++do_switch_fp_end:
+ ret $31,($1),1
+ .end do_switch_stack
+
+ .align 3
++.globl undo_switch_fp_start
++.globl undo_switch_fp_end
+ .ent undo_switch_stack
+ undo_switch_stack:
+ ldq $9,0($30)
+@@ -349,6 +355,7 @@
+ ldq $14,40($30)
+ ldq $15,48($30)
+ ldq $26,56($30)
++undo_switch_fp_start:
+ ldt $f30,312($30) # get saved fpcr
+ ldt $f0,64($30)
+ ldt $f1,72($30)
+@@ -382,6 +389,7 @@
+ ldt $f28,288($30)
+ ldt $f29,296($30)
+ ldt $f30,304($30)
++undo_switch_fp_end:
+ lda $30,SWITCH_STACK_SIZE($30)
+ ret $31,($1),1
+ .end undo_switch_stack
+diff -urN 2.4.10pre9/arch/alpha/kernel/proto.h alpha-fp/arch/alpha/kernel/proto.h
+--- 2.4.10pre9/arch/alpha/kernel/proto.h Sun Apr 1 20:36:06 2001
++++ alpha-fp/arch/alpha/kernel/proto.h Fri Sep 14 06:30:18 2001
+@@ -134,6 +134,11 @@
+ extern void entUna(void);
+ extern void entDbg(void);
+
++extern void do_switch_fp_start(void);
++extern void do_switch_fp_end(void);
++extern void undo_switch_fp_start(void);
++extern void undo_switch_fp_end(void);
++
+ /* process.c */
+ extern void cpu_idle(void) __attribute__((noreturn));
+
+diff -urN 2.4.10pre9/arch/alpha/kernel/traps.c alpha-fp/arch/alpha/kernel/traps.c
+--- 2.4.10pre9/arch/alpha/kernel/traps.c Fri Sep 14 04:05:38 2001
++++ alpha-fp/arch/alpha/kernel/traps.c Fri Sep 14 06:32:19 2001
+@@ -218,6 +218,23 @@
+ unsigned long a2, unsigned long a3, unsigned long a4,
+ unsigned long a5, struct pt_regs regs)
+ {
++ if (type == 3 && !(regs.ps & 8) ) {
++ /*
++ * Handle a rare case where the user has disabled floating
++ * point using the clrfen PALcall and the kernel is attempting
++ * to view floating point state. This happens in two asm stubs:
++ * do_switch_stack and undo_switch_stack.
++ * If this is the case, we modify the return value to pass
++ * over this section and resume from there.
++ */
++ if (regs.pc == (unsigned long) do_switch_fp_start) {
++ regs.pc = (unsigned long) do_switch_fp_end;
++ return;
++ } else if (regs.pc == (unsigned long) undo_switch_fp_start) {
++ regs.pc = (unsigned long) undo_switch_fp_end;
++ return;
++ }
++ }
+ if (!opDEC_testing || type != 4) {
+ die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"),
+ &regs, type, 0);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1
new file mode 100644
index 000000000000..b7c7f999d36a
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1
@@ -0,0 +1,12 @@
+diff -urN 2.4.6pre3/kernel/timer.c backoutgcc/kernel/timer.c
+--- 2.4.6pre3/kernel/timer.c Wed Jun 13 04:02:52 2001
++++ backoutgcc/kernel/timer.c Wed Jun 13 15:49:13 2001
+@@ -32,7 +32,7 @@
+ long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */
+
+ /* The current time */
+-struct timeval xtime __attribute__ ((aligned (16)));
++volatile struct timeval xtime __attribute__ ((aligned (16)));
+
+ /* Don't completely fail for HZ > 500. */
+ int tickadj = 500/HZ ? : 1; /* microsecs */
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1
new file mode 100644
index 000000000000..a37218b4ad2e
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1
@@ -0,0 +1,125 @@
+diff -urN 2.4.11pre3/fs/binfmt_elf.c elf/fs/binfmt_elf.c
+--- 2.4.11pre3/fs/binfmt_elf.c Thu Oct 4 10:06:57 2001
++++ elf/fs/binfmt_elf.c Thu Oct 4 18:23:34 2001
+@@ -78,13 +78,13 @@
+
+ #define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE)
+
+-static void set_brk(unsigned long start, unsigned long end)
++static unsigned long set_brk(unsigned long start, unsigned long end)
+ {
+ start = ELF_PAGEALIGN(start);
+ end = ELF_PAGEALIGN(end);
+ if (end <= start)
+- return;
+- do_brk(start, end - start);
++ return 0;
++ return do_brk(start, end - start);
+ }
+
+
+@@ -300,6 +300,7 @@
+ elf_type |= MAP_FIXED;
+
+ map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type);
++ error = map_addr;
+ if (BAD_ADDR(map_addr))
+ goto out_close;
+
+@@ -338,8 +339,11 @@
+ elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */
+
+ /* Map the last of the bss segment */
+- if (last_bss > elf_bss)
+- do_brk(elf_bss, last_bss - elf_bss);
++ if (last_bss > elf_bss) {
++ error = do_brk(elf_bss, last_bss - elf_bss);
++ if (BAD_ADDR(error))
++ goto out_close;
++ }
+
+ *interp_load_addr = load_addr;
+ error = ((unsigned long) interp_elf_ex->e_entry) + load_addr;
+@@ -626,7 +630,11 @@
+ /* There was a PT_LOAD segment with p_memsz > p_filesz
+ before this one. Map anonymous pages, if needed,
+ and clear the area. */
+- set_brk (elf_bss + load_bias, elf_brk + load_bias);
++ error = set_brk (elf_bss + load_bias, elf_brk + load_bias);
++ /* here retval is zero */
++ if (BAD_ADDR(error))
++ goto out_free_dentry;
++
+ nbyte = ELF_PAGEOFFSET(elf_bss);
+ if (nbyte) {
+ nbyte = ELF_MIN_ALIGN - nbyte;
+@@ -653,8 +661,9 @@
+ }
+
+ error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags);
++ /* here retval is zero */
+ if (BAD_ADDR(error))
+- continue;
++ goto out_free_dentry;
+
+ if (!load_addr_set) {
+ load_addr_set = 1;
+@@ -703,11 +712,10 @@
+ fput(interpreter);
+ kfree(elf_interpreter);
+
++ /* here retval is zero */
+ if (BAD_ADDR(elf_entry)) {
+- printk(KERN_ERR "Unable to load interpreter\n");
+- kfree(elf_phdata);
+- send_sig(SIGSEGV, current, 0);
+- return 0;
++ printk(KERN_WARNING "Unable to load interpreter\n");
++ goto out_free_ph;
+ }
+ }
+
+@@ -741,7 +749,10 @@
+ /* Calling set_brk effectively mmaps the pages that we need
+ * for the bss and break sections
+ */
+- set_brk(elf_bss, elf_brk);
++ error = set_brk(elf_bss, elf_brk);
++ /* here retval is zero */
++ if (BAD_ADDR(error))
++ goto out;
+
+ padzero(elf_bss);
+
+@@ -781,14 +792,15 @@
+ start_thread(regs, elf_entry, bprm->p);
+ if (current->ptrace & PT_PTRACED)
+ send_sig(SIGTRAP, current, 0);
+- retval = 0;
++ /* here retval is zero */
+ out:
+ return retval;
+
+ /* error cleanup */
+ out_free_dentry:
+ allow_write_access(interpreter);
+- fput(interpreter);
++ if (interpreter)
++ fput(interpreter);
+ out_free_interp:
+ if (elf_interpreter)
+ kfree(elf_interpreter);
+@@ -866,8 +878,11 @@
+
+ len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1);
+ bss = elf_phdata->p_memsz + elf_phdata->p_vaddr;
+- if (bss > len)
+- do_brk(len, bss - len);
++ if (bss > len) {
++ error = do_brk(len, bss - len);
++ if (BAD_ADDR(error))
++ goto out_free_ph;
++ }
+ error = 0;
+
+ out_free_ph:
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-2
new file mode 100644
index 000000000000..aeaa1e95788c
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-2
@@ -0,0 +1,24 @@
+diff -urN 2.4.10pre12/fs/inode.c inode-resched/fs/inode.c
+--- 2.4.10pre12/fs/inode.c Thu Sep 20 01:44:07 2001
++++ inode-resched/fs/inode.c Thu Sep 20 20:02:35 2001
+@@ -17,6 +17,7 @@
+ #include <linux/swapctl.h>
+ #include <linux/prefetch.h>
+ #include <linux/locks.h>
++#include <linux/compiler.h>
+
+ /*
+ * New inode.c implementation.
+@@ -295,6 +296,12 @@
+ * so we have to start looking from the list head.
+ */
+ tmp = head;
++
++ if (unlikely(current->need_resched)) {
++ spin_unlock(&inode_lock);
++ schedule();
++ spin_lock(&inode_lock);
++ }
+ }
+ }
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1
new file mode 100644
index 000000000000..474ad52ee803
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1
@@ -0,0 +1,11 @@
+--- 2.4.6pre2aa1/include/linux/sched.h.~1~ Wed Jun 13 00:44:45 2001
++++ 2.4.6pre2aa1/include/linux/sched.h Wed Jun 13 00:47:23 2001
+@@ -541,7 +541,7 @@
+ extern unsigned long volatile jiffies;
+ extern unsigned long itimer_ticks;
+ extern unsigned long itimer_next;
+-extern struct timeval xtime;
++extern volatile struct timeval xtime;
+ extern void do_timer(struct pt_regs *);
+
+ extern unsigned int * prof_buffer;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-1
new file mode 100644
index 000000000000..b9da3b6ff726
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-1
@@ -0,0 +1,28 @@
+diff -urN 2.4.9ac13/drivers/block/loop.c 2.4.9ac14/drivers/block/loop.c
+--- 2.4.9ac13/drivers/block/loop.c Fri Sep 21 22:09:16 2001
++++ 2.4.9ac14/drivers/block/loop.c Sat Sep 22 06:09:39 2001
+@@ -177,6 +177,8 @@
+ unsigned size, offset;
+ int len;
+
++ down(&(file->f_dentry->d_inode->i_sem));
++
+ index = pos >> PAGE_CACHE_SHIFT;
+ offset = pos & (PAGE_CACHE_SIZE - 1);
+ len = bh->b_size;
+@@ -210,6 +212,7 @@
+ deactivate_page(page);
+ page_cache_release(page);
+ }
++ up(&(file->f_dentry->d_inode->i_sem));
+ return 0;
+
+ write_fail:
+@@ -221,6 +224,7 @@
+ deactivate_page(page);
+ page_cache_release(page);
+ fail:
++ up(&(file->f_dentry->d_inode->i_sem));
+ return -1;
+ }
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2
new file mode 100644
index 000000000000..6546915bc284
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2
@@ -0,0 +1,105 @@
+diff -urN 2.4.13pre3/fs/buffer.c sched/fs/buffer.c
+--- 2.4.13pre3/fs/buffer.c Tue Oct 16 02:03:44 2001
++++ sched/fs/buffer.c Wed Oct 17 23:40:56 2001
+@@ -231,6 +231,7 @@
+ static void write_unlocked_buffers(kdev_t dev)
+ {
+ do {
++ conditional_schedule();
+ spin_lock(&lru_list_lock);
+ } while (write_some_buffers(dev));
+ run_task_queue(&tq_disk);
+@@ -280,6 +281,7 @@
+ static int wait_for_locked_buffers(kdev_t dev, int index, int refile)
+ {
+ do {
++ conditional_schedule();
+ spin_lock(&lru_list_lock);
+ } while (wait_for_buffers(dev, index, refile));
+ return 0;
+diff -urN 2.4.13pre3/fs/proc/array.c sched/fs/proc/array.c
+--- 2.4.13pre3/fs/proc/array.c Tue Oct 16 02:03:45 2001
++++ sched/fs/proc/array.c Wed Oct 17 23:40:56 2001
+@@ -415,6 +415,8 @@
+ pte_t page = *pte;
+ struct page *ptpage;
+
++ conditional_schedule();
++
+ address += PAGE_SIZE;
+ pte++;
+ if (pte_none(page))
+diff -urN 2.4.13pre3/fs/proc/generic.c sched/fs/proc/generic.c
+--- 2.4.13pre3/fs/proc/generic.c Sun Sep 23 21:11:40 2001
++++ sched/fs/proc/generic.c Wed Oct 17 23:40:56 2001
+@@ -98,7 +98,9 @@
+ retval = n;
+ break;
+ }
+-
++
++ conditional_schedule();
++
+ /* This is a hack to allow mangling of file pos independent
+ * of actual bytes read. Simply place the data at page,
+ * return the bytes, and set `start' to the desired offset
+diff -urN 2.4.13pre3/include/linux/condsched.h sched/include/linux/condsched.h
+--- 2.4.13pre3/include/linux/condsched.h Thu Jan 1 01:00:00 1970
++++ sched/include/linux/condsched.h Wed Oct 17 23:40:56 2001
+@@ -0,0 +1,14 @@
++#ifndef _LINUX_CONDSCHED_H
++#define _LINUX_CONDSCHED_H
++
++#ifndef __ASSEMBLY__
++#define conditional_schedule() \
++do { \
++ if (unlikely(current->need_resched)) { \
++ __set_current_state(TASK_RUNNING); \
++ schedule(); \
++ } \
++} while(0)
++#endif
++
++#endif
+diff -urN 2.4.13pre3/include/linux/sched.h sched/include/linux/sched.h
+--- 2.4.13pre3/include/linux/sched.h Thu Oct 11 10:41:52 2001
++++ sched/include/linux/sched.h Wed Oct 17 23:40:56 2001
+@@ -13,6 +13,7 @@
+ #include <linux/times.h>
+ #include <linux/timex.h>
+ #include <linux/rbtree.h>
++#include <linux/condsched.h>
+
+ #include <asm/system.h>
+ #include <asm/semaphore.h>
+diff -urN 2.4.13pre3/mm/filemap.c sched/mm/filemap.c
+--- 2.4.13pre3/mm/filemap.c Tue Oct 16 02:03:47 2001
++++ sched/mm/filemap.c Wed Oct 17 23:55:51 2001
+@@ -671,6 +671,8 @@
+ struct page **hash = page_hash(mapping, offset);
+ struct page *page;
+
++ conditional_schedule();
++
+ spin_lock(&pagecache_lock);
+ page = __find_page_nolock(mapping, offset, *hash);
+ spin_unlock(&pagecache_lock);
+@@ -1263,6 +1265,9 @@
+ offset &= ~PAGE_CACHE_MASK;
+
+ page_cache_release(page);
++
++ conditional_schedule();
++
+ if (ret == nr && desc->count)
+ continue;
+ break;
+@@ -2753,6 +2758,8 @@
+ SetPageReferenced(page);
+ UnlockPage(page);
+ page_cache_release(page);
++
++ conditional_schedule();
+
+ if (status < 0)
+ break;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2
new file mode 100644
index 000000000000..d4c7923c5211
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2
Binary files differ
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-1
new file mode 100644
index 000000000000..5bc437c15fe1
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-1
@@ -0,0 +1,10 @@
+--- 2.4.10pre11aa1/mm/mmap.c.~1~ Tue Sep 18 06:01:02 2001
++++ 2.4.10pre11aa1/mm/mmap.c Tue Sep 18 06:02:45 2001
+@@ -479,7 +479,6 @@
+ }
+
+ /* Clear old maps */
+- error = -ENOMEM;
+ munmap_back:
+ vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
+ if (vma && vma->vm_start < addr + len) {
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5
new file mode 100644
index 000000000000..391123e25175
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5
@@ -0,0 +1,119 @@
+diff -urN 2.4.10pre12/include/asm-alpha/module.h module-gfp/include/asm-alpha/module.h
+--- 2.4.10pre12/include/asm-alpha/module.h Thu Sep 20 01:44:11 2001
++++ module-gfp/include/asm-alpha/module.h Thu Sep 20 06:40:25 2001
+@@ -4,8 +4,8 @@
+ * This file contains the alpha architecture specific module code.
+ */
+
+-#define module_map(x) vmalloc(x)
+-#define module_unmap(x) vfree(x)
++#define module_map(x) alloc_exact(x)
++#define module_unmap(x) free_exact((x), (x)->size)
+ #define module_arch_init(x) alpha_module_init(x)
+ #define arch_init_modules(x) alpha_init_modules(x)
+
+diff -urN 2.4.10pre12/include/asm-i386/module.h module-gfp/include/asm-i386/module.h
+--- 2.4.10pre12/include/asm-i386/module.h Thu Sep 20 01:44:11 2001
++++ module-gfp/include/asm-i386/module.h Thu Sep 20 06:40:25 2001
+@@ -4,8 +4,8 @@
+ * This file contains the i386 architecture specific module code.
+ */
+
+-#define module_map(x) vmalloc(x)
+-#define module_unmap(x) vfree(x)
++#define module_map(x) alloc_exact(x)
++#define module_unmap(x) free_exact((x), (x)->size)
+ #define module_arch_init(x) (0)
+ #define arch_init_modules(x) do { } while (0)
+
+diff -urN 2.4.10pre12/include/linux/mm.h module-gfp/include/linux/mm.h
+--- 2.4.10pre12/include/linux/mm.h Thu Sep 20 05:09:07 2001
++++ module-gfp/include/linux/mm.h Thu Sep 20 06:40:25 2001
+@@ -411,6 +411,9 @@
+ extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
+ extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
+
++extern void * FASTCALL(alloc_exact(unsigned int size));
++extern void FASTCALL(free_exact(void * addr, unsigned int size));
++
+ #define __free_page(page) __free_pages((page), 0)
+ #define free_page(addr) free_pages((addr),0)
+
+diff -urN 2.4.10pre12/kernel/ksyms.c module-gfp/kernel/ksyms.c
+--- 2.4.10pre12/kernel/ksyms.c Thu Sep 20 01:44:19 2001
++++ module-gfp/kernel/ksyms.c Thu Sep 20 06:40:25 2001
+@@ -96,6 +96,8 @@
+ EXPORT_SYMBOL(get_zeroed_page);
+ EXPORT_SYMBOL(__free_pages);
+ EXPORT_SYMBOL(free_pages);
++EXPORT_SYMBOL(free_exact);
++EXPORT_SYMBOL(alloc_exact);
+ EXPORT_SYMBOL(num_physpages);
+ EXPORT_SYMBOL(kmem_find_general_cachep);
+ EXPORT_SYMBOL(kmem_cache_create);
+diff -urN 2.4.10pre12/mm/page_alloc.c module-gfp/mm/page_alloc.c
+--- 2.4.10pre12/mm/page_alloc.c Thu Sep 20 01:44:20 2001
++++ module-gfp/mm/page_alloc.c Thu Sep 20 06:40:42 2001
+@@ -18,6 +18,7 @@
+ #include <linux/bootmem.h>
+ #include <linux/slab.h>
+ #include <linux/compiler.h>
++#include <linux/vmalloc.h>
+
+ int nr_swap_pages;
+ int nr_active_pages;
+@@ -443,6 +444,54 @@
+ if (addr != 0)
+ __free_pages(virt_to_page(addr), order);
+ }
++
++static inline int nextorder(unsigned int x)
++{
++ int c = -PAGE_SHIFT;
++ while (x) {
++ x >>= 1;
++ c++;
++ }
++ if (c < 0)
++ c = 0;
++ return c;
++}
++
++void * alloc_exact(unsigned int size)
++{
++ struct page *p, *w;
++ int order = nextorder(size);
++
++ p = alloc_pages(GFP_KERNEL, order);
++ if (p) {
++ struct page *end = p + (1UL << order);
++ for (w = p+1; w < end; ++w)
++ set_page_count(w, 1);
++ for (w = p + (size>>PAGE_SHIFT)+1; w < end; ++w)
++ __free_pages(w, 0);
++ return (void *) page_address(p);
++ }
++
++ return vmalloc(size);
++}
++
++void free_exact(void * addr, unsigned int size)
++{
++ struct page * w;
++ unsigned long mptr = (unsigned long) addr;
++ int sz;
++
++ if (mptr >= VMALLOC_START && mptr + size <= VMALLOC_END) {
++ vfree(addr);
++ return;
++ }
++ w = virt_to_page(addr);
++ for (sz = size; sz > 0; sz -= PAGE_SIZE, ++w) {
++ if (atomic_read(&w->count) != 1)
++ BUG();
++ __free_pages(w, 0);
++ }
++}
+
+ /*
+ * Total amount of free (allocatable) RAM:
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-5
new file mode 100644
index 000000000000..6a9f637daaf1
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-5
@@ -0,0 +1,92 @@
+diff -urN 2.4.6pre6/include/linux/time.h nanosleep/include/linux/time.h
+--- 2.4.6pre6/include/linux/time.h Thu Jun 14 18:07:48 2001
++++ nanosleep/include/linux/time.h Thu Jun 28 11:47:14 2001
+@@ -48,6 +48,27 @@
+ value->tv_sec = jiffies / HZ;
+ }
+
++static __inline__ int
++timespec_before(struct timespec a, struct timespec b)
++{
++ if (a.tv_sec == b.tv_sec)
++ return a.tv_nsec < b.tv_nsec;
++ return a.tv_sec < b.tv_sec;
++}
++
++/* computes `a - b' and write the result in `result', assumes `a >= b' */
++static inline void
++timespec_less(struct timespec a, struct timespec b, struct timespec * result)
++{
++ if (a.tv_nsec < b.tv_nsec)
++ {
++ a.tv_sec--;
++ a.tv_nsec += 1000000000;
++ }
++
++ result->tv_sec = a.tv_sec - b.tv_sec;
++ result->tv_nsec = a.tv_nsec - b.tv_nsec;
++}
+
+ /* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+ * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
+@@ -89,6 +110,27 @@
+ time_t tv_sec; /* seconds */
+ suseconds_t tv_usec; /* microseconds */
+ };
++
++/* computes `a - b' and write the result in `result', assumes `a >= b' */
++static inline void
++timeval_less(struct timeval a, struct timeval b, struct timeval * result)
++{
++ if (a.tv_usec < b.tv_usec)
++ {
++ a.tv_sec--;
++ a.tv_usec += 1000000;
++ }
++
++ result->tv_sec = a.tv_sec - b.tv_sec;
++ result->tv_usec = a.tv_usec - b.tv_usec;
++}
++
++static __inline__ void
++timeval_to_timespec(struct timeval tv, struct timespec * ts)
++{
++ ts->tv_sec = tv.tv_sec;
++ ts->tv_nsec = (long) tv.tv_usec * 1000;
++}
+
+ struct timezone {
+ int tz_minuteswest; /* minutes west of Greenwich */
+diff -urN 2.4.6pre6/kernel/timer.c nanosleep/kernel/timer.c
+--- 2.4.6pre6/kernel/timer.c Thu Jun 28 11:38:09 2001
++++ nanosleep/kernel/timer.c Thu Jun 28 11:48:47 2001
+@@ -798,6 +798,7 @@
+ {
+ struct timespec t;
+ unsigned long expire;
++ struct timeval before, after;
+
+ if(copy_from_user(&t, rqtp, sizeof(struct timespec)))
+ return -EFAULT;
+@@ -822,11 +823,20 @@
+ expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+
+ current->state = TASK_INTERRUPTIBLE;
++ get_fast_time(&before);
+ expire = schedule_timeout(expire);
++ get_fast_time(&after);
+
+ if (expire) {
+ if (rmtp) {
+- jiffies_to_timespec(expire, &t);
++ struct timespec elapsed;
++
++ timeval_less(after, before, &after);
++ timeval_to_timespec(after, &elapsed);
++ if (timespec_before(elapsed, t))
++ timespec_less(t, elapsed, &t);
++ else
++ t.tv_nsec = t.tv_sec = 0;
+ if (copy_to_user(rmtp, &t, sizeof(struct timespec)))
+ return -EFAULT;
+ }
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2
new file mode 100644
index 000000000000..0ca831610811
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2
Binary files differ
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4
new file mode 100644
index 000000000000..ad3eecfcf6a9
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4
@@ -0,0 +1,304 @@
+diff -urN 2.4.13pre6/fs/block_dev.c o_direct/fs/block_dev.c
+--- 2.4.13pre6/fs/block_dev.c Sun Oct 21 20:03:47 2001
++++ o_direct/fs/block_dev.c Tue Oct 23 14:18:35 2001
+@@ -113,6 +113,11 @@
+ return 0;
+ }
+
++static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
++{
++ return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, blkdev_get_block);
++}
++
+ static int blkdev_writepage(struct page * page)
+ {
+ return block_write_full_page(page, blkdev_get_block);
+@@ -632,6 +637,7 @@
+ sync_page: block_sync_page,
+ prepare_write: blkdev_prepare_write,
+ commit_write: blkdev_commit_write,
++ direct_IO: blkdev_direct_IO,
+ };
+
+ struct file_operations def_blk_fops = {
+diff -urN 2.4.13pre6/fs/buffer.c o_direct/fs/buffer.c
+--- 2.4.13pre6/fs/buffer.c Sun Oct 21 20:03:47 2001
++++ o_direct/fs/buffer.c Tue Oct 23 14:18:35 2001
+@@ -1942,6 +1942,47 @@
+ return tmp.b_blocknr;
+ }
+
++int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block)
++{
++ int i, nr_blocks, retval;
++ unsigned long * blocks = iobuf->blocks;
++
++ nr_blocks = iobuf->length / blocksize;
++ /* build the blocklist */
++ for (i = 0; i < nr_blocks; i++, blocknr++) {
++ struct buffer_head bh;
++
++ bh.b_state = 0;
++ bh.b_dev = inode->i_dev;
++ bh.b_size = blocksize;
++
++ retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1);
++ if (retval)
++ goto out;
++
++ if (rw == READ) {
++ if (buffer_new(&bh))
++ BUG();
++ if (!buffer_mapped(&bh)) {
++ /* there was an hole in the filesystem */
++ blocks[i] = -1UL;
++ continue;
++ }
++ } else {
++ if (buffer_new(&bh))
++ unmap_underlying_metadata(&bh);
++ if (!buffer_mapped(&bh))
++ BUG();
++ }
++ blocks[i] = bh.b_blocknr;
++ }
++
++ retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize);
++
++ out:
++ return retval;
++}
++
+ /*
+ * IO completion routine for a buffer_head being used for kiobuf IO: we
+ * can't dispatch the kiobuf callback until io_count reaches 0.
+diff -urN 2.4.13pre6/fs/ext2/inode.c o_direct/fs/ext2/inode.c
+--- 2.4.13pre6/fs/ext2/inode.c Sun Oct 21 20:03:47 2001
++++ o_direct/fs/ext2/inode.c Tue Oct 23 14:18:35 2001
+@@ -592,13 +592,18 @@
+ {
+ return generic_block_bmap(mapping,block,ext2_get_block);
+ }
++static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize)
++{
++ return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block);
++}
+ struct address_space_operations ext2_aops = {
+ readpage: ext2_readpage,
+ writepage: ext2_writepage,
+ sync_page: block_sync_page,
+ prepare_write: ext2_prepare_write,
+ commit_write: generic_commit_write,
+- bmap: ext2_bmap
++ bmap: ext2_bmap,
++ direct_IO: ext2_direct_IO,
+ };
+
+ /*
+diff -urN 2.4.13pre6/include/linux/fs.h o_direct/include/linux/fs.h
+--- 2.4.13pre6/include/linux/fs.h Sun Oct 21 20:03:51 2001
++++ o_direct/include/linux/fs.h Tue Oct 23 14:18:35 2001
+@@ -1368,6 +1368,7 @@
+ int generic_block_bmap(struct address_space *, long, get_block_t *);
+ int generic_commit_write(struct file *, struct page *, unsigned, unsigned);
+ int block_truncate_page(struct address_space *, loff_t, get_block_t *);
++extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *);
+ extern void create_empty_buffers(struct page *, kdev_t, unsigned long);
+
+ extern int waitfor_one_page(struct page*);
+diff -urN 2.4.13pre6/kernel/ksyms.c o_direct/kernel/ksyms.c
+--- 2.4.13pre6/kernel/ksyms.c Sun Oct 21 20:03:52 2001
++++ o_direct/kernel/ksyms.c Tue Oct 23 14:18:47 2001
+@@ -199,6 +199,7 @@
+ EXPORT_SYMBOL(unlock_buffer);
+ EXPORT_SYMBOL(__wait_on_buffer);
+ EXPORT_SYMBOL(___wait_on_page);
++EXPORT_SYMBOL(generic_direct_IO);
+ EXPORT_SYMBOL(block_write_full_page);
+ EXPORT_SYMBOL(block_read_full_page);
+ EXPORT_SYMBOL(block_prepare_write);
+diff -urN 2.4.13pre6/mm/filemap.c o_direct/mm/filemap.c
+--- 2.4.13pre6/mm/filemap.c Sun Oct 21 20:03:52 2001
++++ o_direct/mm/filemap.c Tue Oct 23 14:18:35 2001
+@@ -1356,6 +1356,87 @@
+ UPDATE_ATIME(inode);
+ }
+
++static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset)
++{
++ ssize_t retval;
++ int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress;
++ struct kiobuf * iobuf;
++ struct inode * inode = filp->f_dentry->d_inode;
++ struct address_space * mapping = inode->i_mapping;
++
++ new_iobuf = 0;
++ iobuf = filp->f_iobuf;
++ if (test_and_set_bit(0, &filp->f_iobuf_lock)) {
++ /*
++ * A parallel read/write is using the preallocated iobuf
++ * so just run slow and allocate a new one.
++ */
++ retval = alloc_kiovec(1, &iobuf);
++ if (retval)
++ goto out;
++ new_iobuf = 1;
++ }
++
++ blocksize = 1 << inode->i_blkbits;
++ blocksize_bits = inode->i_blkbits;
++ blocksize_mask = blocksize - 1;
++ chunk_size = KIO_MAX_ATOMIC_IO << 10;
++
++ retval = -EINVAL;
++ if ((offset & blocksize_mask) || (count & blocksize_mask))
++ goto out_free;
++ if (!mapping->a_ops->direct_IO)
++ goto out_free;
++
++ /*
++ * Flush to disk exlusively the _data_, metadata must remains
++ * completly asynchronous or performance will go to /dev/null.
++ */
++ filemap_fdatasync(mapping);
++ retval = fsync_inode_data_buffers(inode);
++ filemap_fdatawait(mapping);
++ if (retval < 0)
++ goto out_free;
++
++ progress = retval = 0;
++ while (count > 0) {
++ iosize = count;
++ if (iosize > chunk_size)
++ iosize = chunk_size;
++
++ retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
++ if (retval)
++ break;
++
++ retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize);
++
++ if (rw == READ && retval > 0)
++ mark_dirty_kiobuf(iobuf, retval);
++
++ if (retval >= 0) {
++ count -= retval;
++ buf += retval;
++ progress += retval;
++ }
++
++ unmap_kiobuf(iobuf);
++
++ if (retval != iosize)
++ break;
++ }
++
++ if (progress)
++ retval = progress;
++
++ out_free:
++ if (!new_iobuf)
++ clear_bit(0, &filp->f_iobuf_lock);
++ else
++ free_kiovec(1, &iobuf);
++ out:
++ return retval;
++}
++
+ int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
+ {
+ char *kaddr;
+@@ -1389,6 +1470,9 @@
+ if ((ssize_t) count < 0)
+ return -EINVAL;
+
++ if (filp->f_flags & O_DIRECT)
++ goto o_direct;
++
+ retval = -EFAULT;
+ if (access_ok(VERIFY_WRITE, buf, count)) {
+ retval = 0;
+@@ -1407,7 +1491,29 @@
+ retval = desc.error;
+ }
+ }
++ out:
+ return retval;
++
++ o_direct:
++ {
++ loff_t pos = *ppos, size;
++ struct address_space *mapping = filp->f_dentry->d_inode->i_mapping;
++ struct inode *inode = mapping->host;
++
++ retval = 0;
++ if (!count)
++ goto out; /* skip atime */
++ size = inode->i_size;
++ if (pos < size) {
++ if (pos + count > size)
++ count = size - pos;
++ retval = generic_file_direct_IO(READ, filp, buf, count, pos);
++ if (retval > 0)
++ *ppos = pos + retval;
++ }
++ UPDATE_ATIME(filp->f_dentry->d_inode);
++ goto out;
++ }
+ }
+
+ static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
+@@ -2660,7 +2766,8 @@
+
+ written = 0;
+
+- if (file->f_flags & O_APPEND)
++ /* FIXME: this is for backwards compatibility with 2.4 */
++ if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND)
+ pos = inode->i_size;
+
+ /*
+@@ -2740,6 +2847,9 @@
+ inode->i_ctime = inode->i_mtime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+
++ if (file->f_flags & O_DIRECT)
++ goto o_direct;
++
+ do {
+ unsigned long index, offset;
+ long page_fault;
+@@ -2814,6 +2924,7 @@
+ if ((status >= 0) && (file->f_flags & O_SYNC))
+ status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA);
+
++out_status:
+ err = written ? written : status;
+ out:
+
+@@ -2822,6 +2933,25 @@
+ fail_write:
+ status = -EFAULT;
+ goto unlock;
++
++o_direct:
++ written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos);
++ if (written > 0) {
++ loff_t end = pos + written;
++ if (end > inode->i_size && !S_ISBLK(inode->i_mode)) {
++ inode->i_size = end;
++ mark_inode_dirty(inode);
++ }
++ *ppos = end;
++ invalidate_inode_pages2(mapping);
++ }
++ /*
++ * Sync the fs metadata but not the minor inode changes and
++ * of course not the data as we did direct DMA for the IO.
++ */
++ if (written >= 0 && file->f_flags & O_SYNC)
++ status = generic_osync_inode(inode, OSYNC_METADATA);
++ goto out_status;
+ }
+
+ void __init page_cache_init(unsigned long mempages)
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-1
new file mode 100644
index 000000000000..8f67e8b9f8f5
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-1
@@ -0,0 +1,45 @@
+diff -urN 2.4.13pre1/arch/i386/mm/init.c ordered/arch/i386/mm/init.c
+--- 2.4.13pre1/arch/i386/mm/init.c Sun Sep 23 21:11:28 2001
++++ ordered/arch/i386/mm/init.c Fri Oct 12 19:04:54 2001
+@@ -469,7 +469,7 @@
+ if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
+ reservedpages++;
+ #ifdef CONFIG_HIGHMEM
+- for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
++ for (tmp = highend_pfn-1; tmp >= highstart_pfn; tmp--) {
+ struct page *page = mem_map + tmp;
+
+ if (!page_is_ram(tmp)) {
+diff -urN 2.4.13pre1/mm/bootmem.c ordered/mm/bootmem.c
+--- 2.4.13pre1/mm/bootmem.c Sun Sep 23 21:11:43 2001
++++ ordered/mm/bootmem.c Fri Oct 12 19:04:56 2001
+@@ -237,14 +237,15 @@
+ {
+ struct page *page = pgdat->node_mem_map;
+ bootmem_data_t *bdata = pgdat->bdata;
+- unsigned long i, count, total = 0;
+- unsigned long idx;
++ unsigned long count, total = 0;
++ long idx, i;
+
+ if (!bdata->node_bootmem_map) BUG();
+
+ count = 0;
+ idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT);
+- for (i = 0; i < idx; i++, page++) {
++ page += --idx;
++ for (i = idx; i >= 0; i--, page--) {
+ if (!test_bit(i, bdata->node_bootmem_map)) {
+ count++;
+ ClearPageReserved(page);
+@@ -260,7 +261,9 @@
+ */
+ page = virt_to_page(bdata->node_bootmem_map);
+ count = 0;
+- for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
++ idx = ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE;
++ page += --idx;
++ for (i = idx; i >= 0; i--, page--) {
+ count++;
+ ClearPageReserved(page);
+ set_page_count(page, 1);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-2
new file mode 100644
index 000000000000..388d9b600ee9
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-2
@@ -0,0 +1,12 @@
+diff -urN 2.4.9/fs/select.c poll/fs/select.c
+--- 2.4.9/fs/select.c Thu Aug 16 22:03:38 2001
++++ poll/fs/select.c Sat Aug 18 03:09:11 2001
+@@ -416,7 +416,7 @@
+ int nchunks, nleft;
+
+ /* Do a sanity check on nfds ... */
+- if (nfds > NR_OPEN)
++ if (nfds > current->rlim[RLIMIT_NOFILE].rlim_cur)
+ return -EINVAL;
+
+ if (timeout) {
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-1
new file mode 100644
index 000000000000..64b2914b2816
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-1
@@ -0,0 +1,52 @@
+Date: Mon, 24 Sep 2001 02:17:58 -0500
+From: Mark J Roberts <mjr@znex.org>
+To: andrea@suse.de
+Subject: [PATCH] Export lib/rbtree.c symbols so modules can use it too.
+Message-ID: <20010924021758.A202@znex>
+Mime-Version: 1.0
+Content-Type: text/plain; charset=us-ascii
+Content-Disposition: inline
+
+Hi,
+
+I'd like to use your rbtree implementation in a module, but
+rb_insert_color and rb_erase aren't exported. This patch against
+2.4.10 exports them.
+
+diff -uX dontdiff linux-2.4.10/lib/Makefile linux/lib/Makefile
+--- linux-2.4.10/lib/Makefile Mon Sep 17 22:31:15 2001
++++ linux/lib/Makefile Sun Sep 23 23:21:56 2001
+@@ -8,7 +8,7 @@
+
+ L_TARGET := lib.a
+
+-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o
++export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o
+
+ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o
+
+diff -uX dontdiff linux-2.4.10/lib/rbtree.c linux/lib/rbtree.c
+--- linux-2.4.10/lib/rbtree.c Mon Sep 17 22:30:23 2001
++++ linux/lib/rbtree.c Sun Sep 23 23:23:13 2001
+@@ -20,6 +20,7 @@
+ */
+
+ #include <linux/rbtree.h>
++#include <linux/module.h>
+
+ static void __rb_rotate_left(rb_node_t * node, rb_root_t * root)
+ {
+@@ -125,6 +126,7 @@
+
+ root->rb_node->rb_color = RB_BLACK;
+ }
++EXPORT_SYMBOL(rb_insert_color);
+
+ static void __rb_erase_color(rb_node_t * node, rb_node_t * parent,
+ rb_root_t * root)
+@@ -291,3 +293,4 @@
+ if (color == RB_BLACK)
+ __rb_erase_color(child, parent, root);
+ }
++EXPORT_SYMBOL(rb_erase);
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2
new file mode 100644
index 000000000000..030dab952cf4
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2
@@ -0,0 +1,457 @@
+diff -urN 2.4.14pre3/include/linux/rcupdate.h rcu/include/linux/rcupdate.h
+--- 2.4.14pre3/include/linux/rcupdate.h Thu Jan 1 01:00:00 1970
++++ rcu/include/linux/rcupdate.h Sun Oct 28 15:24:02 2001
+@@ -0,0 +1,59 @@
++/*
++ * Read-Copy Update mechanism for mutual exclusion
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright (c) International Business Machines Corp., 2001
++ *
++ * Author: Dipankar Sarma <dipankar@in.ibm.com>
++ *
++ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
++ * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc.
++ * Papers:
++ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
++ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
++ *
++ * For detailed explanation of Read-Copy Update mechanism see -
++ * http://lse.sourceforge.net/locking/rcupdate.html
++ *
++ */
++
++#ifndef __LINUX_RCUPDATE_H
++#define __LINUX_RCUPDATE_H
++
++#include <linux/list.h>
++
++/*
++ * Callback structure for use with call_rcu().
++ */
++struct rcu_head {
++ struct list_head list;
++ void (*func)(void *obj);
++ void *arg;
++};
++
++#define RCU_HEAD_INIT(head) { LIST_HEAD_INIT(head.list), NULL, NULL }
++#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head)
++#define INIT_RCU_HEAD(ptr) do { \
++ INIT_LIST_HEAD(&(ptr)->list); (ptr)->func = NULL; (ptr)->arg = NULL; \
++} while (0)
++
++
++extern void FASTCALL(call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg));
++extern void synchronize_kernel(void);
++
++extern void rcu_init(void);
++
++#endif /* __LINUX_RCUPDATE_H */
+diff -urN 2.4.14pre3/include/linux/sched.h rcu/include/linux/sched.h
+--- 2.4.14pre3/include/linux/sched.h Thu Oct 11 10:41:52 2001
++++ rcu/include/linux/sched.h Sun Oct 28 15:24:37 2001
+@@ -159,6 +159,7 @@
+ extern void flush_scheduled_tasks(void);
+ extern int start_context_thread(void);
+ extern int current_is_keventd(void);
++extern void force_cpu_reschedule(int cpu);
+
+ /*
+ * The default fd array needs to be at least BITS_PER_LONG,
+@@ -547,6 +548,18 @@
+ extern unsigned long itimer_next;
+ extern struct timeval xtime;
+ extern void do_timer(struct pt_regs *);
++
++/* per-cpu schedule data */
++typedef struct schedule_data_s {
++ struct task_struct * curr;
++ cycles_t last_schedule;
++ long quiescent;
++} schedule_data_t ____cacheline_aligned;
++
++extern schedule_data_t schedule_data[NR_CPUS];
++#define cpu_curr(cpu) (schedule_data[(cpu)].curr)
++#define last_schedule(cpu) (schedule_data[(cpu)].last_schedule)
++#define RCU_quiescent(cpu) (schedule_data[(cpu)].quiescent)
+
+ extern unsigned int * prof_buffer;
+ extern unsigned long prof_len;
+diff -urN 2.4.14pre3/init/main.c rcu/init/main.c
+--- 2.4.14pre3/init/main.c Wed Oct 24 08:04:27 2001
++++ rcu/init/main.c Sun Oct 28 15:26:58 2001
+@@ -27,6 +27,7 @@
+ #include <linux/iobuf.h>
+ #include <linux/bootmem.h>
+ #include <linux/tty.h>
++#include <linux/rcupdate.h>
+
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+@@ -554,6 +555,7 @@
+ printk("Kernel command line: %s\n", saved_command_line);
+ parse_options(command_line);
+ trap_init();
++ rcu_init();
+ init_IRQ();
+ sched_init();
+ softirq_init();
+diff -urN 2.4.14pre3/kernel/Makefile rcu/kernel/Makefile
+--- 2.4.14pre3/kernel/Makefile Sun Sep 23 21:11:42 2001
++++ rcu/kernel/Makefile Sun Oct 28 15:23:48 2001
+@@ -9,12 +9,12 @@
+
+ O_TARGET := kernel.o
+
+-export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o
++export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o rcupdate.o
+
+ obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \
+ module.o exit.o itimer.o info.o time.o softirq.o resource.o \
+ sysctl.o acct.o capability.o ptrace.o timer.o user.o \
+- signal.o sys.o kmod.o context.o
++ signal.o sys.o kmod.o context.o rcupdate.o
+
+ obj-$(CONFIG_UID16) += uid16.o
+ obj-$(CONFIG_MODULES) += ksyms.o
+diff -urN 2.4.14pre3/kernel/rcupdate.c rcu/kernel/rcupdate.c
+--- 2.4.14pre3/kernel/rcupdate.c Thu Jan 1 01:00:00 1970
++++ rcu/kernel/rcupdate.c Sun Oct 28 15:26:37 2001
+@@ -0,0 +1,229 @@
++/*
++ * Read-Copy Update mechanism for mutual exclusion
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright (c) International Business Machines Corp., 2001
++ * Copyright (C) Andrea Arcangeli <andrea@suse.de> SuSE, 2001
++ *
++ * Author: Dipankar Sarma <dipankar@in.ibm.com>,
++ * Andrea Arcangeli <andrea@suse.de>
++ *
++ * Based on the original work by Paul McKenney <paul.mckenney@us.ibm.com>
++ * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc.
++ * Papers:
++ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf
++ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001)
++ *
++ * For detailed explanation of Read-Copy Update mechanism see -
++ * http://lse.sourceforge.net/locking/rcupdate.html
++ *
++ */
++
++#include <linux/init.h>
++#include <linux/kernel.h>
++#include <linux/spinlock.h>
++#include <linux/sched.h>
++#include <linux/smp.h>
++#include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/completion.h>
++#include <linux/rcupdate.h>
++
++#define DEBUG
++
++#ifdef CONFIG_SMP
++/* Definition for rcupdate control block. */
++static spinlock_t rcu_lock;
++static struct list_head rcu_nxtlist;
++static struct list_head rcu_curlist;
++static struct tasklet_struct rcu_tasklet;
++static unsigned long rcu_qsmask;
++static int rcu_polling_in_progress;
++static long rcu_quiescent_checkpoint[NR_CPUS];
++#endif
++
++/*
++ * Register a new rcu callback. This will be invoked as soon
++ * as all CPUs have performed a context switch or been seen in the
++ * idle loop or in a user process.
++ */
++void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg)
++{
++#ifdef CONFIG_SMP
++ head->func = func;
++ head->arg = arg;
++
++ spin_lock_bh(&rcu_lock);
++ list_add(&head->list, &rcu_nxtlist);
++ spin_unlock_bh(&rcu_lock);
++
++ tasklet_hi_schedule(&rcu_tasklet);
++#else
++ local_bh_disable();
++ func(arg);
++ local_bh_enable();
++#endif
++}
++
++#ifdef CONFIG_SMP
++static int rcu_prepare_polling(void)
++{
++ int stop;
++ int i;
++
++#ifdef DEBUG
++ if (!list_empty(&rcu_curlist))
++ BUG();
++#endif
++
++ stop = 1;
++ if (!list_empty(&rcu_nxtlist)) {
++ list_splice(&rcu_nxtlist, &rcu_curlist);
++ INIT_LIST_HEAD(&rcu_nxtlist);
++
++ rcu_polling_in_progress = 1;
++
++ for (i = 0; i < smp_num_cpus; i++) {
++ int cpu = cpu_logical_map(i);
++
++ if (cpu != smp_processor_id()) {
++ rcu_qsmask |= 1UL << cpu;
++ rcu_quiescent_checkpoint[cpu] = RCU_quiescent(cpu);
++ force_cpu_reschedule(cpu);
++ }
++ }
++ stop = 0;
++ }
++
++ return stop;
++}
++
++/*
++ * Invoke the completed RCU callbacks.
++ */
++static void rcu_invoke_callbacks(void)
++{
++ struct list_head *entry;
++ struct rcu_head *head;
++
++#ifdef DEBUG
++ if (list_empty(&rcu_curlist))
++ BUG();
++#endif
++
++ entry = rcu_curlist.prev;
++ do {
++ head = list_entry(entry, struct rcu_head, list);
++ entry = entry->prev;
++
++ head->func(head->arg);
++ } while (entry != &rcu_curlist);
++
++ INIT_LIST_HEAD(&rcu_curlist);
++}
++
++static int rcu_completion(void)
++{
++ int stop;
++
++ rcu_polling_in_progress = 0;
++ rcu_invoke_callbacks();
++
++ stop = rcu_prepare_polling();
++
++ return stop;
++}
++
++static int rcu_polling(void)
++{
++ int i;
++ int stop;
++
++ for (i = 0; i < smp_num_cpus; i++) {
++ int cpu = cpu_logical_map(i);
++
++ if (rcu_qsmask & (1UL << cpu))
++ if (rcu_quiescent_checkpoint[cpu] != RCU_quiescent(cpu))
++ rcu_qsmask &= ~(1UL << cpu);
++ }
++
++ stop = 0;
++ if (!rcu_qsmask)
++ stop = rcu_completion();
++
++ return stop;
++}
++
++/*
++ * Look into the per-cpu callback information to see if there is
++ * any processing necessary - if so do it.
++ */
++static void rcu_process_callbacks(unsigned long data)
++{
++ int stop;
++
++ spin_lock(&rcu_lock);
++ if (!rcu_polling_in_progress)
++ stop = rcu_prepare_polling();
++ else
++ stop = rcu_polling();
++ spin_unlock(&rcu_lock);
++
++ if (!stop)
++ tasklet_hi_schedule(&rcu_tasklet);
++}
++
++/* Because of FASTCALL declaration of complete, we use this wrapper */
++static void wakeme_after_rcu(void *completion)
++{
++ complete(completion);
++}
++
++#endif /* CONFIG_SMP */
++
++/*
++ * Initializes rcu mechanism. Assumed to be called early.
++ * That is before local timer(SMP) or jiffie timer (uniproc) is setup.
++ */
++void __init rcu_init(void)
++{
++#ifdef CONFIG_SMP
++ tasklet_init(&rcu_tasklet, rcu_process_callbacks, 0UL);
++ INIT_LIST_HEAD(&rcu_nxtlist);
++ INIT_LIST_HEAD(&rcu_curlist);
++ spin_lock_init(&rcu_lock);
++#endif
++}
++
++/*
++ * Wait until all the CPUs have gone through a "quiescent" state.
++ */
++void synchronize_kernel(void)
++{
++#ifdef CONFIG_SMP
++ struct rcu_head rcu;
++ DECLARE_COMPLETION(completion);
++
++ /* Will wake me after RCU finished */
++ call_rcu(&rcu, wakeme_after_rcu, &completion);
++
++ /* Wait for it */
++ wait_for_completion(&completion);
++#endif
++}
++
++EXPORT_SYMBOL(call_rcu);
++EXPORT_SYMBOL(synchronize_kernel);
+diff -urN 2.4.14pre3/kernel/sched.c rcu/kernel/sched.c
+--- 2.4.14pre3/kernel/sched.c Wed Oct 24 08:04:27 2001
++++ rcu/kernel/sched.c Sun Oct 28 15:27:24 2001
+@@ -28,6 +28,7 @@
+ #include <linux/kernel_stat.h>
+ #include <linux/completion.h>
+ #include <linux/prefetch.h>
++#include <linux/rcupdate.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/mmu_context.h>
+@@ -97,16 +98,7 @@
+ * We align per-CPU scheduling data on cacheline boundaries,
+ * to prevent cacheline ping-pong.
+ */
+-static union {
+- struct schedule_data {
+- struct task_struct * curr;
+- cycles_t last_schedule;
+- } schedule_data;
+- char __pad [SMP_CACHE_BYTES];
+-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}};
+-
+-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr
+-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule
++schedule_data_t schedule_data[NR_CPUS] __cacheline_aligned = {{&init_task,0}};
+
+ struct kernel_stat kstat;
+ extern struct task_struct *child_reaper;
+@@ -532,7 +524,7 @@
+ */
+ asmlinkage void schedule(void)
+ {
+- struct schedule_data * sched_data;
++ schedule_data_t * sched_data;
+ struct task_struct *prev, *next, *p;
+ struct list_head *tmp;
+ int this_cpu, c;
+@@ -554,7 +546,7 @@
+ * 'sched_data' is protected by the fact that we can run
+ * only one process per CPU.
+ */
+- sched_data = & aligned_data[this_cpu].schedule_data;
++ sched_data = &schedule_data[this_cpu];
+
+ spin_lock_irq(&runqueue_lock);
+
+@@ -608,6 +600,8 @@
+ */
+ sched_data->curr = next;
+ #ifdef CONFIG_SMP
++ RCU_quiescent(this_cpu)++;
++
+ next->has_cpu = 1;
+ next->processor = this_cpu;
+ #endif
+@@ -861,6 +855,17 @@
+
+ void scheduling_functions_end_here(void) { }
+
++void force_cpu_reschedule(int cpu)
++{
++ spin_lock_irq(&runqueue_lock);
++ cpu_curr(cpu)->need_resched = 1;
++ spin_unlock_irq(&runqueue_lock);
++
++#ifdef CONFIG_SMP
++ smp_send_reschedule(cpu);
++#endif
++}
++
+ #ifndef __alpha__
+
+ /*
+@@ -1057,7 +1062,7 @@
+ // Subtract non-idle processes running on other CPUs.
+ for (i = 0; i < smp_num_cpus; i++) {
+ int cpu = cpu_logical_map(i);
+- if (aligned_data[cpu].schedule_data.curr != idle_task(cpu))
++ if (cpu_curr(cpu) != idle_task(cpu))
+ nr_pending--;
+ }
+ #else
+@@ -1314,8 +1319,8 @@
+
+ void __init init_idle(void)
+ {
+- struct schedule_data * sched_data;
+- sched_data = &aligned_data[smp_processor_id()].schedule_data;
++ schedule_data_t * sched_data;
++ sched_data = &schedule_data[smp_processor_id()];
+
+ if (current != &init_task && task_on_runqueue(current)) {
+ printk("UGH! (%d:%d) was on the runqueue, removing.\n",
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23
new file mode 100644
index 000000000000..3762a6cf1244
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23
@@ -0,0 +1,1400 @@
+diff -urN rwsem-ref/arch/alpha/config.in rwsem/arch/alpha/config.in
+--- rwsem-ref/arch/alpha/config.in Wed Oct 10 02:14:52 2001
++++ rwsem/arch/alpha/config.in Fri Oct 12 08:14:19 2001
+@@ -5,8 +5,6 @@
+
+ define_bool CONFIG_ALPHA y
+ define_bool CONFIG_UID16 n
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+
+ mainmenu_name "Kernel configuration of Linux for Alpha machines"
+
+diff -urN rwsem-ref/arch/arm/config.in rwsem/arch/arm/config.in
+--- rwsem-ref/arch/arm/config.in Fri Oct 12 06:14:51 2001
++++ rwsem/arch/arm/config.in Fri Oct 12 08:14:19 2001
+@@ -9,8 +9,6 @@
+ define_bool CONFIG_SBUS n
+ define_bool CONFIG_MCA n
+ define_bool CONFIG_UID16 y
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+ define_bool CONFIG_GENERIC_BUST_SPINLOCK n
+
+
+diff -urN rwsem-ref/arch/cris/config.in rwsem/arch/cris/config.in
+--- rwsem-ref/arch/cris/config.in Wed Oct 10 02:14:53 2001
++++ rwsem/arch/cris/config.in Fri Oct 12 08:14:19 2001
+@@ -5,8 +5,6 @@
+ mainmenu_name "Linux/CRIS Kernel Configuration"
+
+ define_bool CONFIG_UID16 y
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ mainmenu_option next_comment
+ comment 'Code maturity level options'
+diff -urN rwsem-ref/arch/i386/config.in rwsem/arch/i386/config.in
+--- rwsem-ref/arch/i386/config.in Wed Oct 10 02:14:55 2001
++++ rwsem/arch/i386/config.in Fri Oct 12 08:14:19 2001
+@@ -50,8 +50,6 @@
+ define_bool CONFIG_X86_CMPXCHG n
+ define_bool CONFIG_X86_XADD n
+ define_int CONFIG_X86_L1_CACHE_SHIFT 4
+- define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+- define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+ else
+ define_bool CONFIG_X86_WP_WORKS_OK y
+ define_bool CONFIG_X86_INVLPG y
+@@ -59,8 +57,6 @@
+ define_bool CONFIG_X86_XADD y
+ define_bool CONFIG_X86_BSWAP y
+ define_bool CONFIG_X86_POPAD_OK y
+- define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+- define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+ fi
+ if [ "$CONFIG_M486" = "y" ]; then
+ define_int CONFIG_X86_L1_CACHE_SHIFT 4
+diff -urN rwsem-ref/arch/ia64/config.in rwsem/arch/ia64/config.in
+--- rwsem-ref/arch/ia64/config.in Sat Aug 11 08:03:54 2001
++++ rwsem/arch/ia64/config.in Fri Oct 12 08:14:19 2001
+@@ -23,8 +23,6 @@
+ define_bool CONFIG_EISA n
+ define_bool CONFIG_MCA n
+ define_bool CONFIG_SBUS n
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ if [ "$CONFIG_IA64_HP_SIM" = "n" ]; then
+ define_bool CONFIG_ACPI y
+diff -urN rwsem-ref/arch/m68k/config.in rwsem/arch/m68k/config.in
+--- rwsem-ref/arch/m68k/config.in Wed Jul 4 04:03:45 2001
++++ rwsem/arch/m68k/config.in Fri Oct 12 08:14:19 2001
+@@ -4,8 +4,6 @@
+ #
+
+ define_bool CONFIG_UID16 y
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ mainmenu_name "Linux/68k Kernel Configuration"
+
+diff -urN rwsem-ref/arch/mips/config.in rwsem/arch/mips/config.in
+--- rwsem-ref/arch/mips/config.in Sun Sep 23 21:11:28 2001
++++ rwsem/arch/mips/config.in Fri Oct 12 08:14:19 2001
+@@ -68,8 +68,6 @@
+ fi
+ bool 'Support for Alchemy Semi PB1000 board' CONFIG_MIPS_PB1000
+
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ #
+ # Select some configuration options automatically for certain systems.
+diff -urN rwsem-ref/arch/mips64/config.in rwsem/arch/mips64/config.in
+--- rwsem-ref/arch/mips64/config.in Sun Sep 23 21:11:29 2001
++++ rwsem/arch/mips64/config.in Fri Oct 12 08:14:19 2001
+@@ -27,9 +27,6 @@
+ fi
+ endmenu
+
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+-
+ #
+ # Select some configuration options automatically based on user selections
+ #
+diff -urN rwsem-ref/arch/parisc/config.in rwsem/arch/parisc/config.in
+--- rwsem-ref/arch/parisc/config.in Tue May 1 19:35:20 2001
++++ rwsem/arch/parisc/config.in Fri Oct 12 08:14:19 2001
+@@ -7,8 +7,6 @@
+
+ define_bool CONFIG_PARISC y
+ define_bool CONFIG_UID16 n
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ mainmenu_option next_comment
+ comment 'Code maturity level options'
+diff -urN rwsem-ref/arch/ppc/config.in rwsem/arch/ppc/config.in
+--- rwsem-ref/arch/ppc/config.in Sun Sep 23 21:11:29 2001
++++ rwsem/arch/ppc/config.in Fri Oct 12 08:14:19 2001
+@@ -4,8 +4,6 @@
+ # see Documentation/kbuild/config-language.txt.
+ #
+ define_bool CONFIG_UID16 n
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+
+ mainmenu_name "Linux/PowerPC Kernel Configuration"
+
+diff -urN rwsem-ref/arch/s390/config.in rwsem/arch/s390/config.in
+--- rwsem-ref/arch/s390/config.in Sat Aug 11 08:03:56 2001
++++ rwsem/arch/s390/config.in Fri Oct 12 08:14:19 2001
+@@ -7,8 +7,6 @@
+ define_bool CONFIG_EISA n
+ define_bool CONFIG_MCA n
+ define_bool CONFIG_UID16 y
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ mainmenu_name "Linux Kernel Configuration"
+ define_bool CONFIG_ARCH_S390 y
+diff -urN rwsem-ref/arch/s390x/config.in rwsem/arch/s390x/config.in
+--- rwsem-ref/arch/s390x/config.in Fri Oct 12 06:14:55 2001
++++ rwsem/arch/s390x/config.in Fri Oct 12 08:14:19 2001
+@@ -6,8 +6,6 @@
+ define_bool CONFIG_ISA n
+ define_bool CONFIG_EISA n
+ define_bool CONFIG_MCA n
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+ define_bool CONFIG_GENERIC_BUST_SPINLOCK n
+
+ mainmenu_name "Linux Kernel Configuration"
+diff -urN rwsem-ref/arch/sh/config.in rwsem/arch/sh/config.in
+--- rwsem-ref/arch/sh/config.in Sun Sep 23 21:11:30 2001
++++ rwsem/arch/sh/config.in Fri Oct 12 08:14:19 2001
+@@ -7,8 +7,6 @@
+ define_bool CONFIG_SUPERH y
+
+ define_bool CONFIG_UID16 y
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ mainmenu_option next_comment
+ comment 'Code maturity level options'
+diff -urN rwsem-ref/arch/sparc/config.in rwsem/arch/sparc/config.in
+--- rwsem-ref/arch/sparc/config.in Wed Jul 4 04:03:45 2001
++++ rwsem/arch/sparc/config.in Fri Oct 12 08:14:19 2001
+@@ -48,8 +48,6 @@
+ define_bool CONFIG_SUN_CONSOLE y
+ define_bool CONFIG_SUN_AUXIO y
+ define_bool CONFIG_SUN_IO y
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n
+
+ bool 'Support for SUN4 machines (disables SUN4[CDM] support)' CONFIG_SUN4
+ if [ "$CONFIG_SUN4" != "y" ]; then
+diff -urN rwsem-ref/arch/sparc64/config.in rwsem/arch/sparc64/config.in
+--- rwsem-ref/arch/sparc64/config.in Sun Sep 23 21:11:30 2001
++++ rwsem/arch/sparc64/config.in Fri Oct 12 08:14:19 2001
+@@ -33,8 +33,6 @@
+
+ # Global things across all Sun machines.
+ define_bool CONFIG_HAVE_DEC_LOCK y
+-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n
+-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y
+ define_bool CONFIG_ISA n
+ define_bool CONFIG_ISAPNP n
+ define_bool CONFIG_EISA n
+diff -urN rwsem-ref/include/asm-alpha/rwsem.h rwsem/include/asm-alpha/rwsem.h
+--- rwsem-ref/include/asm-alpha/rwsem.h Wed Oct 10 02:16:18 2001
++++ rwsem/include/asm-alpha/rwsem.h Thu Jan 1 01:00:00 1970
+@@ -1,208 +0,0 @@
+-#ifndef _ALPHA_RWSEM_H
+-#define _ALPHA_RWSEM_H
+-
+-/*
+- * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001.
+- * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
+- */
+-
+-#ifndef _LINUX_RWSEM_H
+-#error please dont include asm/rwsem.h directly, use linux/rwsem.h instead
+-#endif
+-
+-#ifdef __KERNEL__
+-
+-#include <linux/compiler.h>
+-#include <linux/list.h>
+-#include <linux/spinlock.h>
+-
+-struct rwsem_waiter;
+-
+-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
+-
+-/*
+- * the semaphore definition
+- */
+-struct rw_semaphore {
+- long count;
+-#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
+-#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
+-#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
+-#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
+-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
+-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+- spinlock_t wait_lock;
+- struct list_head wait_list;
+-#if RWSEM_DEBUG
+- int debug;
+-#endif
+-};
+-
+-#if RWSEM_DEBUG
+-#define __RWSEM_DEBUG_INIT , 0
+-#else
+-#define __RWSEM_DEBUG_INIT /* */
+-#endif
+-
+-#define __RWSEM_INITIALIZER(name) \
+- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \
+- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT }
+-
+-#define DECLARE_RWSEM(name) \
+- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+-
+-static inline void init_rwsem(struct rw_semaphore *sem)
+-{
+- sem->count = RWSEM_UNLOCKED_VALUE;
+- spin_lock_init(&sem->wait_lock);
+- INIT_LIST_HEAD(&sem->wait_list);
+-#if RWSEM_DEBUG
+- sem->debug = 0;
+-#endif
+-}
+-
+-static inline void __down_read(struct rw_semaphore *sem)
+-{
+- long oldcount;
+-#ifndef CONFIG_SMP
+- oldcount = sem->count;
+- sem->count += RWSEM_ACTIVE_READ_BIAS;
+-#else
+- long temp;
+- __asm__ __volatile__(
+- "1: ldq_l %0,%1\n"
+- " addq %0,%3,%2\n"
+- " stq_c %2,%1\n"
+- " beq %2,2f\n"
+- " mb\n"
+- ".subsection 2\n"
+- "2: br 1b\n"
+- ".previous"
+- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
+- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
+-#endif
+- if (__builtin_expect(oldcount < 0, 0))
+- rwsem_down_read_failed(sem);
+-}
+-
+-static inline void __down_write(struct rw_semaphore *sem)
+-{
+- long oldcount;
+-#ifndef CONFIG_SMP
+- oldcount = sem->count;
+- sem->count += RWSEM_ACTIVE_WRITE_BIAS;
+-#else
+- long temp;
+- __asm__ __volatile__(
+- "1: ldq_l %0,%1\n"
+- " addq %0,%3,%2\n"
+- " stq_c %2,%1\n"
+- " beq %2,2f\n"
+- " mb\n"
+- ".subsection 2\n"
+- "2: br 1b\n"
+- ".previous"
+- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
+- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
+-#endif
+- if (__builtin_expect(oldcount, 0))
+- rwsem_down_write_failed(sem);
+-}
+-
+-static inline void __up_read(struct rw_semaphore *sem)
+-{
+- long oldcount;
+-#ifndef CONFIG_SMP
+- oldcount = sem->count;
+- sem->count -= RWSEM_ACTIVE_READ_BIAS;
+-#else
+- long temp;
+- __asm__ __volatile__(
+- " mb\n"
+- "1: ldq_l %0,%1\n"
+- " subq %0,%3,%2\n"
+- " stq_c %2,%1\n"
+- " beq %2,2f\n"
+- ".subsection 2\n"
+- "2: br 1b\n"
+- ".previous"
+- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
+- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
+-#endif
+- if (__builtin_expect(oldcount < 0, 0))
+- if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0)
+- rwsem_wake(sem);
+-}
+-
+-static inline void __up_write(struct rw_semaphore *sem)
+-{
+- long count;
+-#ifndef CONFIG_SMP
+- sem->count -= RWSEM_ACTIVE_WRITE_BIAS;
+- count = sem->count;
+-#else
+- long temp;
+- __asm__ __volatile__(
+- " mb\n"
+- "1: ldq_l %0,%1\n"
+- " subq %0,%3,%2\n"
+- " stq_c %2,%1\n"
+- " beq %2,2f\n"
+- " subq %0,%3,%0\n"
+- ".subsection 2\n"
+- "2: br 1b\n"
+- ".previous"
+- :"=&r" (count), "=m" (sem->count), "=&r" (temp)
+- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
+-#endif
+- if (__builtin_expect(count, 0))
+- if ((int)count == 0)
+- rwsem_wake(sem);
+-}
+-
+-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem)
+-{
+-#ifndef CONFIG_SMP
+- sem->count += val;
+-#else
+- long temp;
+- __asm__ __volatile__(
+- "1: ldq_l %0,%1\n"
+- " addq %0,%2,%0\n"
+- " stq_c %0,%1\n"
+- " beq %0,2f\n"
+- ".subsection 2\n"
+- "2: br 1b\n"
+- ".previous"
+- :"=&r" (temp), "=m" (sem->count)
+- :"Ir" (val), "m" (sem->count));
+-#endif
+-}
+-
+-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem)
+-{
+-#ifndef CONFIG_SMP
+- sem->count += val;
+- return sem->count;
+-#else
+- long ret, temp;
+- __asm__ __volatile__(
+- "1: ldq_l %0,%1\n"
+- " addq %0,%3,%2\n"
+- " addq %0,%3,%0\n"
+- " stq_c %2,%1\n"
+- " beq %2,2f\n"
+- ".subsection 2\n"
+- "2: br 1b\n"
+- ".previous"
+- :"=&r" (ret), "=m" (sem->count), "=&r" (temp)
+- :"Ir" (val), "m" (sem->count));
+-
+- return ret;
+-#endif
+-}
+-
+-#endif /* __KERNEL__ */
+-#endif /* _ALPHA_RWSEM_H */
+diff -urN rwsem-ref/include/asm-i386/rwsem.h rwsem/include/asm-i386/rwsem.h
+--- rwsem-ref/include/asm-i386/rwsem.h Sat Sep 22 22:07:29 2001
++++ rwsem/include/asm-i386/rwsem.h Thu Jan 1 01:00:00 1970
+@@ -1,226 +0,0 @@
+-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
+- *
+- * Written by David Howells (dhowells@redhat.com).
+- *
+- * Derived from asm-i386/semaphore.h
+- *
+- *
+- * The MSW of the count is the negated number of active writers and waiting
+- * lockers, and the LSW is the total number of active locks
+- *
+- * The lock count is initialized to 0 (no active and no waiting lockers).
+- *
+- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
+- * uncontended lock. This can be determined because XADD returns the old value.
+- * Readers increment by 1 and see a positive value when uncontended, negative
+- * if there are writers (and maybe) readers waiting (in which case it goes to
+- * sleep).
+- *
+- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
+- * be extended to 65534 by manually checking the whole MSW rather than relying
+- * on the S flag.
+- *
+- * The value of ACTIVE_BIAS supports up to 65535 active processes.
+- *
+- * This should be totally fair - if anything is waiting, a process that wants a
+- * lock will go to the back of the queue. When the currently active lock is
+- * released, if there's a writer at the front of the queue, then that and only
+- * that will be woken up; if there's a bunch of consequtive readers at the
+- * front, then they'll all be woken up, but no other readers will be.
+- */
+-
+-#ifndef _I386_RWSEM_H
+-#define _I386_RWSEM_H
+-
+-#ifndef _LINUX_RWSEM_H
+-#error please dont include asm/rwsem.h directly, use linux/rwsem.h instead
+-#endif
+-
+-#ifdef __KERNEL__
+-
+-#include <linux/list.h>
+-#include <linux/spinlock.h>
+-
+-struct rwsem_waiter;
+-
+-extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem));
+-extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem));
+-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *));
+-
+-/*
+- * the semaphore definition
+- */
+-struct rw_semaphore {
+- signed long count;
+-#define RWSEM_UNLOCKED_VALUE 0x00000000
+-#define RWSEM_ACTIVE_BIAS 0x00000001
+-#define RWSEM_ACTIVE_MASK 0x0000ffff
+-#define RWSEM_WAITING_BIAS (-0x00010000)
+-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
+-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+- spinlock_t wait_lock;
+- struct list_head wait_list;
+-#if RWSEM_DEBUG
+- int debug;
+-#endif
+-};
+-
+-/*
+- * initialisation
+- */
+-#if RWSEM_DEBUG
+-#define __RWSEM_DEBUG_INIT , 0
+-#else
+-#define __RWSEM_DEBUG_INIT /* */
+-#endif
+-
+-#define __RWSEM_INITIALIZER(name) \
+-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \
+- __RWSEM_DEBUG_INIT }
+-
+-#define DECLARE_RWSEM(name) \
+- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+-
+-static inline void init_rwsem(struct rw_semaphore *sem)
+-{
+- sem->count = RWSEM_UNLOCKED_VALUE;
+- spin_lock_init(&sem->wait_lock);
+- INIT_LIST_HEAD(&sem->wait_list);
+-#if RWSEM_DEBUG
+- sem->debug = 0;
+-#endif
+-}
+-
+-/*
+- * lock for reading
+- */
+-static inline void __down_read(struct rw_semaphore *sem)
+-{
+- __asm__ __volatile__(
+- "# beginning down_read\n\t"
+-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */
+- " js 2f\n\t" /* jump if we weren't granted the lock */
+- "1:\n\t"
+- ".section .text.lock,\"ax\"\n"
+- "2:\n\t"
+- " pushl %%ecx\n\t"
+- " pushl %%edx\n\t"
+- " call rwsem_down_read_failed\n\t"
+- " popl %%edx\n\t"
+- " popl %%ecx\n\t"
+- " jmp 1b\n"
+- ".previous"
+- "# ending down_read\n\t"
+- : "+m"(sem->count)
+- : "a"(sem)
+- : "memory", "cc");
+-}
+-
+-/*
+- * lock for writing
+- */
+-static inline void __down_write(struct rw_semaphore *sem)
+-{
+- int tmp;
+-
+- tmp = RWSEM_ACTIVE_WRITE_BIAS;
+- __asm__ __volatile__(
+- "# beginning down_write\n\t"
+-LOCK_PREFIX " xadd %0,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */
+- " testl %0,%0\n\t" /* was the count 0 before? */
+- " jnz 2f\n\t" /* jump if we weren't granted the lock */
+- "1:\n\t"
+- ".section .text.lock,\"ax\"\n"
+- "2:\n\t"
+- " pushl %%ecx\n\t"
+- " call rwsem_down_write_failed\n\t"
+- " popl %%ecx\n\t"
+- " jmp 1b\n"
+- ".previous\n"
+- "# ending down_write"
+- : "+d"(tmp), "+m"(sem->count)
+- : "a"(sem)
+- : "memory", "cc");
+-}
+-
+-/*
+- * unlock after reading
+- */
+-static inline void __up_read(struct rw_semaphore *sem)
+-{
+- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+- __asm__ __volatile__(
+- "# beginning __up_read\n\t"
+-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */
+- " js 2f\n\t" /* jump if the lock is being waited upon */
+- "1:\n\t"
+- ".section .text.lock,\"ax\"\n"
+- "2:\n\t"
+- " decw %%dx\n\t" /* do nothing if still outstanding active readers */
+- " jnz 1b\n\t"
+- " pushl %%ecx\n\t"
+- " call rwsem_wake\n\t"
+- " popl %%ecx\n\t"
+- " jmp 1b\n"
+- ".previous\n"
+- "# ending __up_read\n"
+- : "+m"(sem->count), "+d"(tmp)
+- : "a"(sem)
+- : "memory", "cc");
+-}
+-
+-/*
+- * unlock after writing
+- */
+-static inline void __up_write(struct rw_semaphore *sem)
+-{
+- __asm__ __volatile__(
+- "# beginning __up_write\n\t"
+- " movl %2,%%edx\n\t"
+-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */
+- " jnz 2f\n\t" /* jump if the lock is being waited upon */
+- "1:\n\t"
+- ".section .text.lock,\"ax\"\n"
+- "2:\n\t"
+- " decw %%dx\n\t" /* did the active count reduce to 0? */
+- " jnz 1b\n\t" /* jump back if not */
+- " pushl %%ecx\n\t"
+- " call rwsem_wake\n\t"
+- " popl %%ecx\n\t"
+- " jmp 1b\n"
+- ".previous\n"
+- "# ending __up_write\n"
+- : "+m"(sem->count)
+- : "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS)
+- : "memory", "cc", "edx");
+-}
+-
+-/*
+- * implement atomic add functionality
+- */
+-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
+-{
+- __asm__ __volatile__(
+-LOCK_PREFIX "addl %1,%0"
+- :"=m"(sem->count)
+- :"ir"(delta), "m"(sem->count));
+-}
+-
+-/*
+- * implement exchange and add functionality
+- */
+-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
+-{
+- int tmp = delta;
+-
+- __asm__ __volatile__(
+-LOCK_PREFIX "xadd %0,(%2)"
+- : "+r"(tmp), "=m"(sem->count)
+- : "r"(sem), "m"(sem->count)
+- : "memory");
+-
+- return tmp+delta;
+-}
+-
+-#endif /* __KERNEL__ */
+-#endif /* _I386_RWSEM_H */
+diff -urN rwsem-ref/include/linux/rwsem-spinlock.h rwsem/include/linux/rwsem-spinlock.h
+--- rwsem-ref/include/linux/rwsem-spinlock.h Sat Sep 22 22:07:29 2001
++++ rwsem/include/linux/rwsem-spinlock.h Thu Jan 1 01:00:00 1970
+@@ -1,62 +0,0 @@
+-/* rwsem-spinlock.h: fallback C implementation
+- *
+- * Copyright (c) 2001 David Howells (dhowells@redhat.com).
+- * - Derived partially from ideas by Andrea Arcangeli <andrea@suse.de>
+- * - Derived also from comments by Linus
+- */
+-
+-#ifndef _LINUX_RWSEM_SPINLOCK_H
+-#define _LINUX_RWSEM_SPINLOCK_H
+-
+-#ifndef _LINUX_RWSEM_H
+-#error please dont include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead
+-#endif
+-
+-#include <linux/spinlock.h>
+-#include <linux/list.h>
+-
+-#ifdef __KERNEL__
+-
+-#include <linux/types.h>
+-
+-struct rwsem_waiter;
+-
+-/*
+- * the rw-semaphore definition
+- * - if activity is 0 then there are no active readers or writers
+- * - if activity is +ve then that is the number of active readers
+- * - if activity is -1 then there is one active writer
+- * - if wait_list is not empty, then there are processes waiting for the semaphore
+- */
+-struct rw_semaphore {
+- __s32 activity;
+- spinlock_t wait_lock;
+- struct list_head wait_list;
+-#if RWSEM_DEBUG
+- int debug;
+-#endif
+-};
+-
+-/*
+- * initialisation
+- */
+-#if RWSEM_DEBUG
+-#define __RWSEM_DEBUG_INIT , 0
+-#else
+-#define __RWSEM_DEBUG_INIT /* */
+-#endif
+-
+-#define __RWSEM_INITIALIZER(name) \
+-{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT }
+-
+-#define DECLARE_RWSEM(name) \
+- struct rw_semaphore name = __RWSEM_INITIALIZER(name)
+-
+-extern void FASTCALL(init_rwsem(struct rw_semaphore *sem));
+-extern void FASTCALL(__down_read(struct rw_semaphore *sem));
+-extern void FASTCALL(__down_write(struct rw_semaphore *sem));
+-extern void FASTCALL(__up_read(struct rw_semaphore *sem));
+-extern void FASTCALL(__up_write(struct rw_semaphore *sem));
+-
+-#endif /* __KERNEL__ */
+-#endif /* _LINUX_RWSEM_SPINLOCK_H */
+diff -urN rwsem-ref/include/linux/rwsem.h rwsem/include/linux/rwsem.h
+--- rwsem-ref/include/linux/rwsem.h Sat Sep 22 22:07:29 2001
++++ rwsem/include/linux/rwsem.h Fri Oct 12 08:14:19 2001
+@@ -1,80 +1,120 @@
+-/* rwsem.h: R/W semaphores, public interface
+- *
+- * Written by David Howells (dhowells@redhat.com).
+- * Derived from asm-i386/semaphore.h
+- */
+-
+ #ifndef _LINUX_RWSEM_H
+ #define _LINUX_RWSEM_H
+
+-#include <linux/linkage.h>
+-
+-#define RWSEM_DEBUG 0
+-
+ #ifdef __KERNEL__
+
+-#include <linux/config.h>
+-#include <linux/types.h>
++#include <linux/compiler.h>
+ #include <linux/kernel.h>
+-#include <asm/system.h>
+-#include <asm/atomic.h>
+
+-struct rw_semaphore;
+-
+-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+-#include <linux/rwsem-spinlock.h> /* use a generic implementation */
+-#else
+-#include <asm/rwsem.h> /* use an arch-specific implementation */
++struct rw_semaphore
++{
++ spinlock_t lock;
++ long count;
++#define RWSEM_READ_BIAS 1
++#define RWSEM_WRITE_BIAS (~(~0UL >> (BITS_PER_LONG>>1)))
++ struct list_head wait;
++#if RWSEM_DEBUG
++ long __magic;
+ #endif
++};
+
+-#ifndef rwsemtrace
+ #if RWSEM_DEBUG
+-extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str));
++#define __SEM_DEBUG_INIT(name) \
++ , (long)&(name).__magic
++#define RWSEM_MAGIC(x) \
++ do { \
++ if ((x) != (long)&(x)) { \
++ printk("rwsem bad magic %lx (should be %lx), ", \
++ (long)x, (long)&(x)); \
++ BUG(); \
++ } \
++ } while (0)
+ #else
+-#define rwsemtrace(SEM,FMT)
++#define __SEM_DEBUG_INIT(name)
++#define CHECK_MAGIC(x)
+ #endif
++
++#define __RWSEM_INITIALIZER(name, count) \
++{ \
++ SPIN_LOCK_UNLOCKED, \
++ (count), \
++ LIST_HEAD_INIT((name).wait) \
++ __SEM_DEBUG_INIT(name) \
++}
++#define RWSEM_INITIALIZER(name) __RWSEM_INITIALIZER(name, 0)
++
++#define __DECLARE_RWSEM(name, count) \
++ struct rw_semaphore name = __RWSEM_INITIALIZER(name, count)
++#define DECLARE_RWSEM(name) __DECLARE_RWSEM(name, 0)
++#define DECLARE_RWSEM_READ_LOCKED(name) __DECLARE_RWSEM(name, RWSEM_READ_BIAS)
++#define DECLARE_RWSEM_WRITE_LOCKED(name) __DECLARE_RWSEM(name, RWSEM_WRITE_BIAS)
++
++#define RWSEM_READ_BLOCKING_BIAS (RWSEM_WRITE_BIAS-RWSEM_READ_BIAS)
++#define RWSEM_WRITE_BLOCKING_BIAS (0)
++
++#define RWSEM_READ_MASK (~RWSEM_WRITE_BIAS)
++#define RWSEM_WRITE_MASK (RWSEM_WRITE_BIAS)
++
++extern void FASTCALL(rwsem_down_failed(struct rw_semaphore *, long));
++extern void FASTCALL(rwsem_wake(struct rw_semaphore *));
++
++static inline void init_rwsem(struct rw_semaphore *sem)
++{
++ spin_lock_init(&sem->lock);
++ sem->count = 0;
++ INIT_LIST_HEAD(&sem->wait);
++#if RWSEM_DEBUG
++ sem->__magic = (long)&sem->__magic;
+ #endif
++}
+
+-/*
+- * lock for reading
+- */
+ static inline void down_read(struct rw_semaphore *sem)
+ {
+- rwsemtrace(sem,"Entering down_read");
+- __down_read(sem);
+- rwsemtrace(sem,"Leaving down_read");
++ int count;
++ CHECK_MAGIC(sem->__magic);
++
++ spin_lock(&sem->lock);
++ count = sem->count;
++ sem->count += RWSEM_READ_BIAS;
++ if (unlikely(count < 0))
++ rwsem_down_failed(sem, RWSEM_READ_BLOCKING_BIAS);
++ spin_unlock(&sem->lock);
+ }
+
+-/*
+- * lock for writing
+- */
+ static inline void down_write(struct rw_semaphore *sem)
+ {
+- rwsemtrace(sem,"Entering down_write");
+- __down_write(sem);
+- rwsemtrace(sem,"Leaving down_write");
++ long count;
++ CHECK_MAGIC(sem->__magic);
++
++ spin_lock(&sem->lock);
++ count = sem->count;
++ sem->count += RWSEM_WRITE_BIAS;
++ if (unlikely(count))
++ rwsem_down_failed(sem, RWSEM_WRITE_BLOCKING_BIAS);
++ spin_unlock(&sem->lock);
+ }
+
+-/*
+- * release a read lock
+- */
+ static inline void up_read(struct rw_semaphore *sem)
+ {
+- rwsemtrace(sem,"Entering up_read");
+- __up_read(sem);
+- rwsemtrace(sem,"Leaving up_read");
++ CHECK_MAGIC(sem->__magic);
++
++ spin_lock(&sem->lock);
++ sem->count -= RWSEM_READ_BIAS;
++ if (unlikely(sem->count < 0 && !(sem->count & RWSEM_READ_MASK)))
++ rwsem_wake(sem);
++ spin_unlock(&sem->lock);
+ }
+
+-/*
+- * release a write lock
+- */
+ static inline void up_write(struct rw_semaphore *sem)
+ {
+- rwsemtrace(sem,"Entering up_write");
+- __up_write(sem);
+- rwsemtrace(sem,"Leaving up_write");
+-}
++ CHECK_MAGIC(sem->__magic);
+
++ spin_lock(&sem->lock);
++ sem->count -= RWSEM_WRITE_BIAS;
++ if (unlikely(sem->count))
++ rwsem_wake(sem);
++ spin_unlock(&sem->lock);
++}
+
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_RWSEM_H */
+diff -urN rwsem-ref/include/linux/sched.h rwsem/include/linux/sched.h
+--- rwsem-ref/include/linux/sched.h Thu Oct 11 10:41:52 2001
++++ rwsem/include/linux/sched.h Fri Oct 12 08:14:19 2001
+@@ -239,7 +239,7 @@
+ pgd: swapper_pg_dir, \
+ mm_users: ATOMIC_INIT(2), \
+ mm_count: ATOMIC_INIT(1), \
+- mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \
++ mmap_sem: RWSEM_INITIALIZER(name.mmap_sem), \
+ page_table_lock: SPIN_LOCK_UNLOCKED, \
+ mmlist: LIST_HEAD_INIT(name.mmlist), \
+ }
+diff -urN rwsem-ref/lib/Makefile rwsem/lib/Makefile
+--- rwsem-ref/lib/Makefile Fri Oct 12 08:14:03 2001
++++ rwsem/lib/Makefile Fri Oct 12 08:14:39 2001
+@@ -8,12 +8,9 @@
+
+ L_TARGET := lib.a
+
+-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o
++export-objs := cmdline.o dec_and_lock.o rwsem.o rbtree.o
+
+-obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o
+-
+-obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+-obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
++obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o rwsem.o
+
+ ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
+ obj-y += dec_and_lock.o
+diff -urN rwsem-ref/lib/rwsem-spinlock.c rwsem/lib/rwsem-spinlock.c
+--- rwsem-ref/lib/rwsem-spinlock.c Tue May 1 19:35:33 2001
++++ rwsem/lib/rwsem-spinlock.c Thu Jan 1 01:00:00 1970
+@@ -1,239 +0,0 @@
+-/* rwsem-spinlock.c: R/W semaphores: contention handling functions for generic spinlock
+- * implementation
+- *
+- * Copyright (c) 2001 David Howells (dhowells@redhat.com).
+- * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
+- * - Derived also from comments by Linus
+- */
+-#include <linux/rwsem.h>
+-#include <linux/sched.h>
+-#include <linux/module.h>
+-
+-struct rwsem_waiter {
+- struct list_head list;
+- struct task_struct *task;
+- unsigned int flags;
+-#define RWSEM_WAITING_FOR_READ 0x00000001
+-#define RWSEM_WAITING_FOR_WRITE 0x00000002
+-};
+-
+-#if RWSEM_DEBUG
+-void rwsemtrace(struct rw_semaphore *sem, const char *str)
+-{
+- if (sem->debug)
+- printk("[%d] %s({%d,%d})\n",
+- current->pid,str,sem->activity,list_empty(&sem->wait_list)?0:1);
+-}
+-#endif
+-
+-/*
+- * initialise the semaphore
+- */
+-void init_rwsem(struct rw_semaphore *sem)
+-{
+- sem->activity = 0;
+- spin_lock_init(&sem->wait_lock);
+- INIT_LIST_HEAD(&sem->wait_list);
+-#if RWSEM_DEBUG
+- sem->debug = 0;
+-#endif
+-}
+-
+-/*
+- * handle the lock being released whilst there are processes blocked on it that can now run
+- * - if we come here, then:
+- * - the 'active count' _reached_ zero
+- * - the 'waiting count' is non-zero
+- * - the spinlock must be held by the caller
+- * - woken process blocks are discarded from the list after having flags zeroised
+- */
+-static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem)
+-{
+- struct rwsem_waiter *waiter;
+- int woken;
+-
+- rwsemtrace(sem,"Entering __rwsem_do_wake");
+-
+- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
+-
+- /* try to grant a single write lock if there's a writer at the front of the queue
+- * - we leave the 'waiting count' incremented to signify potential contention
+- */
+- if (waiter->flags & RWSEM_WAITING_FOR_WRITE) {
+- sem->activity = -1;
+- list_del(&waiter->list);
+- waiter->flags = 0;
+- wake_up_process(waiter->task);
+- goto out;
+- }
+-
+- /* grant an infinite number of read locks to the readers at the front of the queue */
+- woken = 0;
+- do {
+- list_del(&waiter->list);
+- waiter->flags = 0;
+- wake_up_process(waiter->task);
+- woken++;
+- if (list_empty(&sem->wait_list))
+- break;
+- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
+- } while (waiter->flags&RWSEM_WAITING_FOR_READ);
+-
+- sem->activity += woken;
+-
+- out:
+- rwsemtrace(sem,"Leaving __rwsem_do_wake");
+- return sem;
+-}
+-
+-/*
+- * wake a single writer
+- */
+-static inline struct rw_semaphore *__rwsem_wake_one_writer(struct rw_semaphore *sem)
+-{
+- struct rwsem_waiter *waiter;
+-
+- sem->activity = -1;
+-
+- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
+- list_del(&waiter->list);
+-
+- waiter->flags = 0;
+- wake_up_process(waiter->task);
+- return sem;
+-}
+-
+-/*
+- * get a read lock on the semaphore
+- */
+-void __down_read(struct rw_semaphore *sem)
+-{
+- struct rwsem_waiter waiter;
+- struct task_struct *tsk;
+-
+- rwsemtrace(sem,"Entering __down_read");
+-
+- spin_lock(&sem->wait_lock);
+-
+- if (sem->activity>=0 && list_empty(&sem->wait_list)) {
+- /* granted */
+- sem->activity++;
+- spin_unlock(&sem->wait_lock);
+- goto out;
+- }
+-
+- tsk = current;
+- set_task_state(tsk,TASK_UNINTERRUPTIBLE);
+-
+- /* set up my own style of waitqueue */
+- waiter.task = tsk;
+- waiter.flags = RWSEM_WAITING_FOR_READ;
+-
+- list_add_tail(&waiter.list,&sem->wait_list);
+-
+- /* we don't need to touch the semaphore struct anymore */
+- spin_unlock(&sem->wait_lock);
+-
+- /* wait to be given the lock */
+- for (;;) {
+- if (!waiter.flags)
+- break;
+- schedule();
+- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+- }
+-
+- tsk->state = TASK_RUNNING;
+-
+- out:
+- rwsemtrace(sem,"Leaving __down_read");
+-}
+-
+-/*
+- * get a write lock on the semaphore
+- * - note that we increment the waiting count anyway to indicate an exclusive lock
+- */
+-void __down_write(struct rw_semaphore *sem)
+-{
+- struct rwsem_waiter waiter;
+- struct task_struct *tsk;
+-
+- rwsemtrace(sem,"Entering __down_write");
+-
+- spin_lock(&sem->wait_lock);
+-
+- if (sem->activity==0 && list_empty(&sem->wait_list)) {
+- /* granted */
+- sem->activity = -1;
+- spin_unlock(&sem->wait_lock);
+- goto out;
+- }
+-
+- tsk = current;
+- set_task_state(tsk,TASK_UNINTERRUPTIBLE);
+-
+- /* set up my own style of waitqueue */
+- waiter.task = tsk;
+- waiter.flags = RWSEM_WAITING_FOR_WRITE;
+-
+- list_add_tail(&waiter.list,&sem->wait_list);
+-
+- /* we don't need to touch the semaphore struct anymore */
+- spin_unlock(&sem->wait_lock);
+-
+- /* wait to be given the lock */
+- for (;;) {
+- if (!waiter.flags)
+- break;
+- schedule();
+- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+- }
+-
+- tsk->state = TASK_RUNNING;
+-
+- out:
+- rwsemtrace(sem,"Leaving __down_write");
+-}
+-
+-/*
+- * release a read lock on the semaphore
+- */
+-void __up_read(struct rw_semaphore *sem)
+-{
+- rwsemtrace(sem,"Entering __up_read");
+-
+- spin_lock(&sem->wait_lock);
+-
+- if (--sem->activity==0 && !list_empty(&sem->wait_list))
+- sem = __rwsem_wake_one_writer(sem);
+-
+- spin_unlock(&sem->wait_lock);
+-
+- rwsemtrace(sem,"Leaving __up_read");
+-}
+-
+-/*
+- * release a write lock on the semaphore
+- */
+-void __up_write(struct rw_semaphore *sem)
+-{
+- rwsemtrace(sem,"Entering __up_write");
+-
+- spin_lock(&sem->wait_lock);
+-
+- sem->activity = 0;
+- if (!list_empty(&sem->wait_list))
+- sem = __rwsem_do_wake(sem);
+-
+- spin_unlock(&sem->wait_lock);
+-
+- rwsemtrace(sem,"Leaving __up_write");
+-}
+-
+-EXPORT_SYMBOL(init_rwsem);
+-EXPORT_SYMBOL(__down_read);
+-EXPORT_SYMBOL(__down_write);
+-EXPORT_SYMBOL(__up_read);
+-EXPORT_SYMBOL(__up_write);
+-#if RWSEM_DEBUG
+-EXPORT_SYMBOL(rwsemtrace);
+-#endif
+diff -urN rwsem-ref/lib/rwsem.c rwsem/lib/rwsem.c
+--- rwsem-ref/lib/rwsem.c Sat Jul 21 00:04:34 2001
++++ rwsem/lib/rwsem.c Fri Oct 12 08:14:19 2001
+@@ -1,210 +1,63 @@
+-/* rwsem.c: R/W semaphores: contention handling functions
+- *
+- * Written by David Howells (dhowells@redhat.com).
+- * Derived from arch/i386/kernel/semaphore.c
++/*
++ * rw_semaphores generic spinlock version
++ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ */
+-#include <linux/rwsem.h>
++
+ #include <linux/sched.h>
+ #include <linux/module.h>
++#include <asm/semaphore.h>
+
+-struct rwsem_waiter {
+- struct list_head list;
+- struct task_struct *task;
+- unsigned int flags;
+-#define RWSEM_WAITING_FOR_READ 0x00000001
+-#define RWSEM_WAITING_FOR_WRITE 0x00000002
++struct rwsem_wait_queue {
++ unsigned long retire;
++ struct task_struct * task;
++ struct list_head task_list;
+ };
+
+-#if RWSEM_DEBUG
+-#undef rwsemtrace
+-void rwsemtrace(struct rw_semaphore *sem, const char *str)
+-{
+- printk("sem=%p\n",sem);
+- printk("(sem)=%08lx\n",sem->count);
+- if (sem->debug)
+- printk("[%d] %s({%08lx})\n",current->pid,str,sem->count);
+-}
+-#endif
+-
+-/*
+- * handle the lock being released whilst there are processes blocked on it that can now run
+- * - if we come here, then:
+- * - the 'active part' of the count (&0x0000ffff) reached zero but has been re-incremented
+- * - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so)
+- * - there must be someone on the queue
+- * - the spinlock must be held by the caller
+- * - woken process blocks are discarded from the list after having flags zeroised
+- */
+-static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem)
+-{
+- struct rwsem_waiter *waiter;
+- struct list_head *next;
+- signed long oldcount;
+- int woken, loop;
+-
+- rwsemtrace(sem,"Entering __rwsem_do_wake");
+-
+- /* only wake someone up if we can transition the active part of the count from 0 -> 1 */
+- try_again:
+- oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS,sem) - RWSEM_ACTIVE_BIAS;
+- if (oldcount & RWSEM_ACTIVE_MASK)
+- goto undo;
+-
+- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list);
+-
+- /* try to grant a single write lock if there's a writer at the front of the queue
+- * - note we leave the 'active part' of the count incremented by 1 and the waiting part
+- * incremented by 0x00010000
+- */
+- if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE))
+- goto readers_only;
+-
+- list_del(&waiter->list);
+- waiter->flags = 0;
+- wake_up_process(waiter->task);
+- goto out;
+-
+- /* grant an infinite number of read locks to the readers at the front of the queue
+- * - note we increment the 'active part' of the count by the number of readers (less one
+- * for the activity decrement we've already done) before waking any processes up
+- */
+- readers_only:
+- woken = 0;
+- do {
+- woken++;
+-
+- if (waiter->list.next==&sem->wait_list)
+- break;
+-
+- waiter = list_entry(waiter->list.next,struct rwsem_waiter,list);
+-
+- } while (waiter->flags & RWSEM_WAITING_FOR_READ);
+-
+- loop = woken;
+- woken *= RWSEM_ACTIVE_BIAS-RWSEM_WAITING_BIAS;
+- woken -= RWSEM_ACTIVE_BIAS;
+- rwsem_atomic_add(woken,sem);
+-
+- next = sem->wait_list.next;
+- for (; loop>0; loop--) {
+- waiter = list_entry(next,struct rwsem_waiter,list);
+- next = waiter->list.next;
+- waiter->flags = 0;
+- wake_up_process(waiter->task);
+- }
+-
+- sem->wait_list.next = next;
+- next->prev = &sem->wait_list;
+-
+- out:
+- rwsemtrace(sem,"Leaving __rwsem_do_wake");
+- return sem;
+-
+- /* undo the change to count, but check for a transition 1->0 */
+- undo:
+- if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS,sem)!=0)
+- goto out;
+- goto try_again;
+-}
+-
+-/*
+- * wait for a lock to be granted
+- */
+-static inline struct rw_semaphore *rwsem_down_failed_common(struct rw_semaphore *sem,
+- struct rwsem_waiter *waiter,
+- signed long adjustment)
++void rwsem_down_failed(struct rw_semaphore *sem, long retire)
+ {
+ struct task_struct *tsk = current;
+- signed long count;
+-
+- set_task_state(tsk,TASK_UNINTERRUPTIBLE);
+-
+- /* set up my own style of waitqueue */
+- spin_lock(&sem->wait_lock);
+- waiter->task = tsk;
+-
+- list_add_tail(&waiter->list,&sem->wait_list);
+-
+- /* note that we're now waiting on the lock, but no longer actively read-locking */
+- count = rwsem_atomic_update(adjustment,sem);
+-
+- /* if there are no longer active locks, wake the front queued process(es) up
+- * - it might even be this process, since the waker takes a more active part
+- */
+- if (!(count & RWSEM_ACTIVE_MASK))
+- sem = __rwsem_do_wake(sem);
++ struct rwsem_wait_queue wait;
+
+- spin_unlock(&sem->wait_lock);
++ sem->count += retire;
++ wait.retire = retire;
++ wait.task = tsk;
++ INIT_LIST_HEAD(&wait.task_list);
++ list_add(&wait.task_list, &sem->wait);
+
+- /* wait to be given the lock */
+- for (;;) {
+- if (!waiter->flags)
+- break;
++ do {
++ __set_task_state(tsk, TASK_UNINTERRUPTIBLE);
++ spin_unlock(&sem->lock);
+ schedule();
+- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+- }
+-
+- tsk->state = TASK_RUNNING;
+-
+- return sem;
+-}
+-
+-/*
+- * wait for the read lock to be granted
+- */
+-struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem)
+-{
+- struct rwsem_waiter waiter;
+-
+- rwsemtrace(sem,"Entering rwsem_down_read_failed");
+-
+- waiter.flags = RWSEM_WAITING_FOR_READ;
+- rwsem_down_failed_common(sem,&waiter,RWSEM_WAITING_BIAS-RWSEM_ACTIVE_BIAS);
+-
+- rwsemtrace(sem,"Leaving rwsem_down_read_failed");
+- return sem;
++ spin_lock(&sem->lock);
++ } while(wait.task_list.next);
+ }
+
+-/*
+- * wait for the write lock to be granted
+- */
+-struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem)
++void rwsem_wake(struct rw_semaphore *sem)
+ {
+- struct rwsem_waiter waiter;
++ struct list_head * entry, * head = &sem->wait;
++ int last = 0;
+
+- rwsemtrace(sem,"Entering rwsem_down_write_failed");
++ while ((entry = head->prev) != head) {
++ struct rwsem_wait_queue * wait;
+
+- waiter.flags = RWSEM_WAITING_FOR_WRITE;
+- rwsem_down_failed_common(sem,&waiter,-RWSEM_ACTIVE_BIAS);
+-
+- rwsemtrace(sem,"Leaving rwsem_down_write_failed");
+- return sem;
+-}
++ wait = list_entry(entry, struct rwsem_wait_queue, task_list);
+
+-/*
+- * handle waking up a waiter on the semaphore
+- * - up_read has decremented the active part of the count if we come here
+- */
+-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
+-{
+- rwsemtrace(sem,"Entering rwsem_wake");
++ if (wait->retire == RWSEM_WRITE_BLOCKING_BIAS) {
++ if (sem->count & RWSEM_READ_MASK)
++ break;
++ last = 1;
++ }
+
+- spin_lock(&sem->wait_lock);
+-
+- /* do nothing if list empty */
+- if (!list_empty(&sem->wait_list))
+- sem = __rwsem_do_wake(sem);
+-
+- spin_unlock(&sem->wait_lock);
+-
+- rwsemtrace(sem,"Leaving rwsem_wake");
+-
+- return sem;
++ /* convert write lock into read lock when read become active */
++ sem->count -= wait->retire;
++ list_del(entry);
++ entry->next = NULL;
++ wake_up_process(wait->task);
++
++ if (last)
++ break;
++ }
+ }
+
+-EXPORT_SYMBOL_NOVERS(rwsem_down_read_failed);
+-EXPORT_SYMBOL_NOVERS(rwsem_down_write_failed);
+-EXPORT_SYMBOL_NOVERS(rwsem_wake);
+-#if RWSEM_DEBUG
+-EXPORT_SYMBOL(rwsemtrace);
+-#endif
++EXPORT_SYMBOL(rwsem_down_failed);
++EXPORT_SYMBOL(rwsem_wake);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4
new file mode 100644
index 000000000000..d54be8d152a9
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4
@@ -0,0 +1,271 @@
+diff -urN 2.4.11pre3aa1/arch/alpha/mm/fault.c recurse/arch/alpha/mm/fault.c
+--- 2.4.11pre3aa1/arch/alpha/mm/fault.c Sun Sep 23 21:11:28 2001
++++ recurse/arch/alpha/mm/fault.c Thu Oct 4 18:50:12 2001
+@@ -113,7 +113,7 @@
+ goto vmalloc_fault;
+ #endif
+
+- down_read(&mm->mmap_sem);
++ down_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+@@ -147,7 +147,7 @@
+ * the fault.
+ */
+ fault = handle_mm_fault(mm, vma, address, cause > 0);
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+
+ if (fault < 0)
+ goto out_of_memory;
+@@ -161,7 +161,7 @@
+ * Fix it, but check if it's kernel or user first..
+ */
+ bad_area:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+
+ if (user_mode(regs)) {
+ force_sig(SIGSEGV, current);
+@@ -198,7 +198,7 @@
+ if (current->pid == 1) {
+ current->policy |= SCHED_YIELD;
+ schedule();
+- down_read(&mm->mmap_sem);
++ down_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ goto survive;
+ }
+ printk(KERN_ALERT "VM: killing process %s(%d)\n",
+diff -urN 2.4.11pre3aa1/arch/i386/mm/fault.c recurse/arch/i386/mm/fault.c
+--- 2.4.11pre3aa1/arch/i386/mm/fault.c Sun Sep 23 21:11:28 2001
++++ recurse/arch/i386/mm/fault.c Thu Oct 4 18:50:12 2001
+@@ -191,7 +191,7 @@
+ if (in_interrupt() || !mm)
+ goto no_context;
+
+- down_read(&mm->mmap_sem);
++ down_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+
+ vma = find_vma(mm, address);
+ if (!vma)
+@@ -265,7 +265,7 @@
+ if (bit < 32)
+ tsk->thread.screen_bitmap |= 1 << bit;
+ }
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ return;
+
+ /*
+@@ -273,7 +273,7 @@
+ * Fix it, but check if it's kernel or user first..
+ */
+ bad_area:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (error_code & 4) {
+@@ -341,11 +341,11 @@
+ * us unable to handle the page fault gracefully.
+ */
+ out_of_memory:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ if (tsk->pid == 1) {
+ tsk->policy |= SCHED_YIELD;
+ schedule();
+- down_read(&mm->mmap_sem);
++ down_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ goto survive;
+ }
+ printk("VM: killing process %s\n", tsk->comm);
+@@ -354,7 +354,7 @@
+ goto no_context;
+
+ do_sigbus:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+
+ /*
+ * Send a sigbus, regardless of whether we were in kernel
+diff -urN 2.4.11pre3aa1/arch/ia64/mm/fault.c recurse/arch/ia64/mm/fault.c
+--- 2.4.11pre3aa1/arch/ia64/mm/fault.c Tue May 1 19:35:18 2001
++++ recurse/arch/ia64/mm/fault.c Thu Oct 4 18:50:12 2001
+@@ -60,7 +60,7 @@
+ if (in_interrupt() || !mm)
+ goto no_context;
+
+- down_read(&mm->mmap_sem);
++ down_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+
+ vma = find_vma_prev(mm, address, &prev_vma);
+ if (!vma)
+@@ -112,7 +112,7 @@
+ default:
+ goto out_of_memory;
+ }
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ return;
+
+ check_expansion:
+@@ -135,7 +135,7 @@
+ goto good_area;
+
+ bad_area:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ if (isr & IA64_ISR_SP) {
+ /*
+ * This fault was due to a speculative load set the "ed" bit in the psr to
+@@ -184,7 +184,7 @@
+ return;
+
+ out_of_memory:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ printk("VM: killing process %s\n", current->comm);
+ if (user_mode(regs))
+ do_exit(SIGKILL);
+diff -urN 2.4.11pre3aa1/arch/ppc/mm/fault.c recurse/arch/ppc/mm/fault.c
+--- 2.4.11pre3aa1/arch/ppc/mm/fault.c Thu Oct 4 10:06:33 2001
++++ recurse/arch/ppc/mm/fault.c Thu Oct 4 18:50:12 2001
+@@ -103,7 +103,7 @@
+ bad_page_fault(regs, address, SIGSEGV);
+ return;
+ }
+- down_read(&mm->mmap_sem);
++ down_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ vma = find_vma(mm, address);
+ if (!vma)
+ goto bad_area;
+@@ -164,7 +164,7 @@
+ goto out_of_memory;
+ }
+
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ /*
+ * keep track of tlb+htab misses that are good addrs but
+ * just need pte's created via handle_mm_fault()
+@@ -174,7 +174,7 @@
+ return;
+
+ bad_area:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ pte_errors++;
+
+ /* User mode accesses cause a SIGSEGV */
+@@ -195,7 +195,7 @@
+ * us unable to handle the page fault gracefully.
+ */
+ out_of_memory:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ if (current->pid == 1) {
+ current->policy |= SCHED_YIELD;
+ schedule();
+@@ -209,7 +209,7 @@
+ return;
+
+ do_sigbus:
+- up_read(&mm->mmap_sem);
++ up_read_recursive(&mm->mmap_sem, &current->mm_recursor);
+ info.si_signo = SIGBUS;
+ info.si_errno = 0;
+ info.si_code = BUS_ADRERR;
+diff -urN 2.4.11pre3aa1/fs/exec.c recurse/fs/exec.c
+--- 2.4.11pre3aa1/fs/exec.c Sun Sep 23 21:11:39 2001
++++ recurse/fs/exec.c Thu Oct 4 18:50:12 2001
+@@ -969,9 +969,9 @@
+ if (do_truncate(file->f_dentry, 0) != 0)
+ goto close_fail;
+
+- down_read(&current->mm->mmap_sem);
++ down_read_recursive(&current->mm->mmap_sem, &current->mm_recursor);
+ retval = binfmt->core_dump(signr, regs, file);
+- up_read(&current->mm->mmap_sem);
++ up_read_recursive(&current->mm->mmap_sem, &current->mm_recursor);
+
+ close_fail:
+ filp_close(file, NULL);
+diff -urN 2.4.11pre3aa1/include/linux/rwsem.h recurse/include/linux/rwsem.h
+--- 2.4.11pre3aa1/include/linux/rwsem.h Thu Oct 4 18:49:53 2001
++++ recurse/include/linux/rwsem.h Thu Oct 4 18:50:12 2001
+@@ -18,6 +18,11 @@
+ #endif
+ };
+
++struct rw_sem_recursor
++{
++ int counter;
++};
++
+ #if RWSEM_DEBUG
+ #define __SEM_DEBUG_INIT(name) \
+ , (long)&(name).__magic
+@@ -42,6 +47,7 @@
+ __SEM_DEBUG_INIT(name) \
+ }
+ #define RWSEM_INITIALIZER(name) __RWSEM_INITIALIZER(name, 0)
++#define RWSEM_RECURSOR_INITIALIZER ((struct rw_sem_recursor) { 0, })
+
+ #define __DECLARE_RWSEM(name, count) \
+ struct rw_semaphore name = __RWSEM_INITIALIZER(name, count)
+@@ -112,6 +118,34 @@
+ spin_lock(&sem->lock);
+ sem->count -= RWSEM_WRITE_BIAS;
+ if (unlikely(sem->count))
++ rwsem_wake(sem);
++ spin_unlock(&sem->lock);
++}
++
++static inline void down_read_recursive(struct rw_semaphore *sem,
++ struct rw_sem_recursor * recursor)
++{
++ int count, counter;
++ CHECK_MAGIC(sem->__magic);
++
++ spin_lock(&sem->lock);
++ count = sem->count;
++ sem->count += RWSEM_READ_BIAS;
++ counter = recursor->counter++;
++ if (unlikely(count < 0 && !counter && !(count & RWSEM_READ_MASK)))
++ rwsem_down_failed(sem, RWSEM_READ_BLOCKING_BIAS);
++ spin_unlock(&sem->lock);
++}
++
++static inline void up_read_recursive(struct rw_semaphore *sem,
++ struct rw_sem_recursor * recursor)
++{
++ CHECK_MAGIC(sem->__magic);
++
++ spin_lock(&sem->lock);
++ sem->count -= RWSEM_READ_BIAS;
++ recursor->counter--;
++ if (unlikely(sem->count < 0 && !(sem->count & RWSEM_READ_MASK)))
+ rwsem_wake(sem);
+ spin_unlock(&sem->lock);
+ }
+diff -urN 2.4.11pre3aa1/include/linux/sched.h recurse/include/linux/sched.h
+--- 2.4.11pre3aa1/include/linux/sched.h Thu Oct 4 18:49:53 2001
++++ recurse/include/linux/sched.h Thu Oct 4 18:50:12 2001
+@@ -315,6 +315,7 @@
+
+ struct task_struct *next_task, *prev_task;
+ struct mm_struct *active_mm;
++ struct rw_sem_recursor mm_recursor;
+ struct list_head local_pages;
+ unsigned int allocation_order, nr_local_pages;
+
+@@ -460,6 +461,7 @@
+ policy: SCHED_OTHER, \
+ mm: NULL, \
+ active_mm: &init_mm, \
++ mm_recursor: RWSEM_RECURSOR_INITIALIZER, \
+ cpus_allowed: -1, \
+ run_list: LIST_HEAD_INIT(tsk.run_list), \
+ next_task: &tsk, \
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10
new file mode 100644
index 000000000000..9f5d7c11faaa
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10
@@ -0,0 +1,394 @@
+diff -urN 2.4.10pre11/arch/alpha/mm/fault.c silent-stack-overflow/arch/alpha/mm/fault.c
+--- 2.4.10pre11/arch/alpha/mm/fault.c Tue Sep 18 02:41:49 2001
++++ silent-stack-overflow/arch/alpha/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -121,7 +121,7 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/arch/arm/mm/fault-common.c silent-stack-overflow/arch/arm/mm/fault-common.c
+--- 2.4.10pre11/arch/arm/mm/fault-common.c Thu Aug 16 22:03:23 2001
++++ silent-stack-overflow/arch/arm/mm/fault-common.c Tue Sep 18 10:08:51 2001
+@@ -229,7 +229,7 @@
+ goto survive;
+
+ check_stack:
+- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr))
++ if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr, NULL))
+ goto good_area;
+ out:
+ return fault;
+diff -urN 2.4.10pre11/arch/cris/mm/fault.c silent-stack-overflow/arch/cris/mm/fault.c
+--- 2.4.10pre11/arch/cris/mm/fault.c Sat Aug 11 08:03:54 2001
++++ silent-stack-overflow/arch/cris/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -284,7 +284,7 @@
+ if (address + PAGE_SIZE < rdusp())
+ goto bad_area;
+ }
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+
+ /*
+diff -urN 2.4.10pre11/arch/i386/mm/fault.c silent-stack-overflow/arch/i386/mm/fault.c
+--- 2.4.10pre11/arch/i386/mm/fault.c Tue Sep 18 02:41:57 2001
++++ silent-stack-overflow/arch/i386/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -33,7 +33,7 @@
+ */
+ int __verify_write(const void * addr, unsigned long size)
+ {
+- struct vm_area_struct * vma;
++ struct vm_area_struct * vma, * prev_vma;
+ unsigned long start = (unsigned long) addr;
+
+ if (!size)
+@@ -79,7 +79,8 @@
+ check_stack:
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, start) == 0)
++ find_vma_prev(current->mm, start, &prev_vma);
++ if (expand_stack(vma, start, prev_vma) == 0)
+ goto good_area;
+
+ bad_area:
+@@ -152,7 +153,7 @@
+ {
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+- struct vm_area_struct * vma;
++ struct vm_area_struct * vma, * prev_vma;
+ unsigned long address;
+ unsigned long page;
+ unsigned long fixup;
+@@ -213,7 +214,8 @@
+ if (address + 32 < regs->esp)
+ goto bad_area;
+ }
+- if (expand_stack(vma, address))
++ find_vma_prev(mm, address, &prev_vma);
++ if (expand_stack(vma, address, prev_vma))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/arch/ia64/mm/fault.c silent-stack-overflow/arch/ia64/mm/fault.c
+--- 2.4.10pre11/arch/ia64/mm/fault.c Tue May 1 19:35:18 2001
++++ silent-stack-overflow/arch/ia64/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -122,7 +122,7 @@
+ if (rgn_index(address) != rgn_index(vma->vm_start)
+ || rgn_offset(address) >= RGN_MAP_LIMIT)
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ } else {
+ vma = prev_vma;
+diff -urN 2.4.10pre11/arch/m68k/mm/fault.c silent-stack-overflow/arch/m68k/mm/fault.c
+--- 2.4.10pre11/arch/m68k/mm/fault.c Sun Apr 1 01:17:08 2001
++++ silent-stack-overflow/arch/m68k/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -120,7 +120,7 @@
+ if (address + 256 < rdusp())
+ goto map_err;
+ }
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto map_err;
+
+ /*
+diff -urN 2.4.10pre11/arch/mips/mm/fault.c silent-stack-overflow/arch/mips/mm/fault.c
+--- 2.4.10pre11/arch/mips/mm/fault.c Sat Jul 21 00:04:05 2001
++++ silent-stack-overflow/arch/mips/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -80,7 +80,7 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/arch/mips64/mm/fault.c silent-stack-overflow/arch/mips64/mm/fault.c
+--- 2.4.10pre11/arch/mips64/mm/fault.c Tue Sep 18 02:42:13 2001
++++ silent-stack-overflow/arch/mips64/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -132,7 +132,7 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/arch/ppc/mm/fault.c silent-stack-overflow/arch/ppc/mm/fault.c
+--- 2.4.10pre11/arch/ppc/mm/fault.c Wed Jul 4 04:03:45 2001
++++ silent-stack-overflow/arch/ppc/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -64,7 +64,7 @@
+ void do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code)
+ {
+- struct vm_area_struct * vma;
++ struct vm_area_struct * vma, * prev_vma;
+ struct mm_struct *mm = current->mm;
+ siginfo_t info;
+ int code = SEGV_MAPERR;
+@@ -111,7 +111,8 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ vma = find_vma_prev(mm, address, &prev_vma);
++ if (expand_stack(vma, address, prev_vma))
+ goto bad_area;
+
+ good_area:
+diff -urN 2.4.10pre11/arch/s390/mm/fault.c silent-stack-overflow/arch/s390/mm/fault.c
+--- 2.4.10pre11/arch/s390/mm/fault.c Sat Aug 11 08:03:59 2001
++++ silent-stack-overflow/arch/s390/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -122,7 +122,7 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/arch/s390x/mm/fault.c silent-stack-overflow/arch/s390x/mm/fault.c
+--- 2.4.10pre11/arch/s390x/mm/fault.c Sat Aug 11 08:04:00 2001
++++ silent-stack-overflow/arch/s390x/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -152,7 +152,7 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/arch/sh/mm/fault.c silent-stack-overflow/arch/sh/mm/fault.c
+--- 2.4.10pre11/arch/sh/mm/fault.c Tue Sep 18 02:42:19 2001
++++ silent-stack-overflow/arch/sh/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -74,7 +74,7 @@
+ check_stack:
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, start) == 0)
++ if (expand_stack(vma, start, NULL) == 0)
+ goto good_area;
+
+ bad_area:
+@@ -114,7 +114,7 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/arch/sparc/mm/fault.c silent-stack-overflow/arch/sparc/mm/fault.c
+--- 2.4.10pre11/arch/sparc/mm/fault.c Sat Aug 11 08:04:01 2001
++++ silent-stack-overflow/arch/sparc/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -238,7 +238,7 @@
+ goto good_area;
+ if(!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if(expand_stack(vma, address))
++ if(expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+@@ -485,7 +485,7 @@
+ goto good_area;
+ if(!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if(expand_stack(vma, address))
++ if(expand_stack(vma, address, NULL))
+ goto bad_area;
+ good_area:
+ info.si_code = SEGV_ACCERR;
+diff -urN 2.4.10pre11/arch/sparc64/mm/fault.c silent-stack-overflow/arch/sparc64/mm/fault.c
+--- 2.4.10pre11/arch/sparc64/mm/fault.c Tue Sep 18 02:42:20 2001
++++ silent-stack-overflow/arch/sparc64/mm/fault.c Tue Sep 18 10:08:51 2001
+@@ -340,7 +340,7 @@
+ goto good_area;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto bad_area;
+- if (expand_stack(vma, address))
++ if (expand_stack(vma, address, NULL))
+ goto bad_area;
+ /*
+ * Ok, we have a good vm_area for this memory access, so
+diff -urN 2.4.10pre11/include/linux/mm.h silent-stack-overflow/include/linux/mm.h
+--- 2.4.10pre11/include/linux/mm.h Tue Sep 18 02:43:02 2001
++++ silent-stack-overflow/include/linux/mm.h Tue Sep 18 10:10:24 2001
+@@ -572,11 +572,24 @@
+
+ #define GFP_DMA __GFP_DMA
+
+-/* vma is the first one with address < vma->vm_end,
+- * and even address < vma->vm_start. Have to extend vma. */
+-static inline int expand_stack(struct vm_area_struct * vma, unsigned long address)
++extern int heap_stack_gap;
++
++/*
++ * vma is the first one with address < vma->vm_end,
++ * and even address < vma->vm_start. Have to extend vma.
++ *
++ * Locking: vm_start can decrease under you if you only hold
++ * the read semaphore, you either need the write semaphore
++ * or both the read semaphore and the page_table_lock acquired
++ * if you want vm_start consistent. vm_end and the vma layout
++ * are just consistent with only the read semaphore acquired
++ * instead.
++ */
++static inline int expand_stack(struct vm_area_struct * vma, unsigned long address,
++ struct vm_area_struct * prev_vma)
+ {
+ unsigned long grow;
++ int err = -ENOMEM;
+
+ /*
+ * vma->vm_start/vm_end cannot change under us because the caller is required
+@@ -584,18 +597,22 @@
+ * before relocating the vma range ourself.
+ */
+ address &= PAGE_MASK;
++ if (prev_vma && prev_vma->vm_end + (heap_stack_gap << PAGE_SHIFT) > address)
++ goto out;
++ spin_lock(&vma->vm_mm->page_table_lock);
+ grow = (vma->vm_start - address) >> PAGE_SHIFT;
+ if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur ||
+ ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur)
+- return -ENOMEM;
+- spin_lock(&vma->vm_mm->page_table_lock);
++ goto out_unlock;
+ vma->vm_start = address;
+- vma->vm_pgoff -= grow;
+ vma->vm_mm->total_vm += grow;
+ if (vma->vm_flags & VM_LOCKED)
+ vma->vm_mm->locked_vm += grow;
++ err = 0;
++ out_unlock:
+ spin_unlock(&vma->vm_mm->page_table_lock);
+- return 0;
++ out:
++ return err;
+ }
+
+ /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
+diff -urN 2.4.10pre11/include/linux/sysctl.h silent-stack-overflow/include/linux/sysctl.h
+--- 2.4.10pre11/include/linux/sysctl.h Tue Sep 18 02:43:03 2001
++++ silent-stack-overflow/include/linux/sysctl.h Tue Sep 18 10:08:51 2001
+@@ -136,7 +136,8 @@
+ VM_PAGECACHE=7, /* struct: Set cache memory thresholds */
+ VM_PAGERDAEMON=8, /* struct: Control kswapd behaviour */
+ VM_PGT_CACHE=9, /* struct: Set page table cache parameters */
+- VM_PAGE_CLUSTER=10 /* int: set number of pages to swap together */
++ VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */
++ VM_HEAP_STACK_GAP=11, /* int: page gap between heap and stack */
+ };
+
+
+diff -urN 2.4.10pre11/kernel/sysctl.c silent-stack-overflow/kernel/sysctl.c
+--- 2.4.10pre11/kernel/sysctl.c Tue Sep 18 02:43:04 2001
++++ silent-stack-overflow/kernel/sysctl.c Tue Sep 18 10:08:51 2001
+@@ -265,6 +265,8 @@
+ &pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_PAGE_CLUSTER, "page-cluster",
+ &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
++ {VM_HEAP_STACK_GAP, "heap-stack-gap",
++ &heap_stack_gap, sizeof(int), 0644, NULL, &proc_dointvec},
+ {0}
+ };
+
+diff -urN 2.4.10pre11/mm/memory.c silent-stack-overflow/mm/memory.c
+--- 2.4.10pre11/mm/memory.c Tue Sep 18 02:43:04 2001
++++ silent-stack-overflow/mm/memory.c Tue Sep 18 10:08:51 2001
+@@ -444,7 +444,7 @@
+ unsigned long ptr, end;
+ int err;
+ struct mm_struct * mm;
+- struct vm_area_struct * vma = 0;
++ struct vm_area_struct * vma, * prev_vma;
+ struct page * map;
+ int i;
+ int datain = (rw == READ);
+@@ -470,19 +470,21 @@
+ iobuf->length = len;
+
+ i = 0;
++ vma = NULL;
+
+ /*
+ * First of all, try to fault in all of the necessary pages
+ */
+ while (ptr < end) {
+ if (!vma || ptr >= vma->vm_end) {
+- vma = find_vma(current->mm, ptr);
++ vma = find_vma(mm, ptr);
+ if (!vma)
+ goto out_unlock;
+ if (vma->vm_start > ptr) {
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ goto out_unlock;
+- if (expand_stack(vma, ptr))
++ find_vma_prev(mm, ptr, &prev_vma);
++ if (expand_stack(vma, ptr, prev_vma))
+ goto out_unlock;
+ }
+ if (((datain) && (!(vma->vm_flags & VM_WRITE))) ||
+diff -urN 2.4.10pre11/mm/mmap.c silent-stack-overflow/mm/mmap.c
+--- 2.4.10pre11/mm/mmap.c Tue Sep 18 02:43:04 2001
++++ silent-stack-overflow/mm/mmap.c Tue Sep 18 10:12:08 2001
+@@ -45,6 +45,7 @@
+ };
+
+ int sysctl_overcommit_memory;
++int heap_stack_gap = 1;
+
+ /* Check that a process has enough memory to allocate a
+ * new virtual mapping.
+@@ -606,9 +607,15 @@
+
+ for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) {
+ /* At this point: (!vma || addr < vma->vm_end). */
++ unsigned long __heap_stack_gap;
+ if (TASK_SIZE - len < addr)
+ return -ENOMEM;
+- if (!vma || addr + len <= vma->vm_start)
++ if (!vma)
++ return addr;
++ __heap_stack_gap = 0;
++ if (vma->vm_flags & VM_GROWSDOWN)
++ __heap_stack_gap = heap_stack_gap << PAGE_SHIFT;
++ if (addr + len + __heap_stack_gap <= vma->vm_start)
+ return addr;
+ addr = vma->vm_end;
+ }
+@@ -717,7 +724,7 @@
+
+ struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr)
+ {
+- struct vm_area_struct * vma;
++ struct vm_area_struct * vma, * prev_vma;
+ unsigned long start;
+
+ addr &= PAGE_MASK;
+@@ -728,9 +735,10 @@
+ return vma;
+ if (!(vma->vm_flags & VM_GROWSDOWN))
+ return NULL;
+- start = vma->vm_start;
+- if (expand_stack(vma, addr))
++ find_vma_prev(mm, addr, &prev_vma);
++ if (expand_stack(vma, addr, prev_vma))
+ return NULL;
++ start = vma->vm_start;
+ if (vma->vm_flags & VM_LOCKED) {
+ make_pages_present(addr, start);
+ }
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2
new file mode 100644
index 000000000000..98023b0de4d1
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2
@@ -0,0 +1,136 @@
+diff -urN 2.4.14pre4/arch/i386/kernel/i386_ksyms.c spinlock/arch/i386/kernel/i386_ksyms.c
+--- 2.4.14pre4/arch/i386/kernel/i386_ksyms.c Wed Oct 24 08:04:08 2001
++++ spinlock/arch/i386/kernel/i386_ksyms.c Tue Oct 30 01:44:59 2001
+@@ -120,7 +120,7 @@
+
+ #ifdef CONFIG_SMP
+ EXPORT_SYMBOL(cpu_data);
+-EXPORT_SYMBOL(kernel_flag);
++EXPORT_SYMBOL(kernel_flag_cacheline);
+ EXPORT_SYMBOL(smp_num_cpus);
+ EXPORT_SYMBOL(cpu_online_map);
+ EXPORT_SYMBOL_NOVERS(__write_lock_failed);
+diff -urN 2.4.14pre4/arch/i386/kernel/smp.c spinlock/arch/i386/kernel/smp.c
+--- 2.4.14pre4/arch/i386/kernel/smp.c Wed Oct 24 08:04:08 2001
++++ spinlock/arch/i386/kernel/smp.c Tue Oct 30 01:44:59 2001
+@@ -102,7 +102,7 @@
+ */
+
+ /* The 'big kernel lock' */
+-spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED;
++spinlock_cacheline_t kernel_flag_cacheline = {SPIN_LOCK_UNLOCKED};
+
+ struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { &init_mm, 0 }};
+
+diff -urN 2.4.14pre4/fs/buffer.c spinlock/fs/buffer.c
+--- 2.4.14pre4/fs/buffer.c Tue Oct 30 00:07:24 2001
++++ spinlock/fs/buffer.c Tue Oct 30 01:44:59 2001
+@@ -72,7 +72,10 @@
+ static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED;
+
+ static struct buffer_head *lru_list[NR_LIST];
+-static spinlock_t lru_list_lock = SPIN_LOCK_UNLOCKED;
++
++static spinlock_cacheline_t lru_list_lock_cacheline = {SPIN_LOCK_UNLOCKED};
++#define lru_list_lock lru_list_lock_cacheline.lock
++
+ static int nr_buffers_type[NR_LIST];
+ static unsigned long size_buffers_type[NR_LIST];
+
+diff -urN 2.4.14pre4/include/asm-i386/smplock.h spinlock/include/asm-i386/smplock.h
+--- 2.4.14pre4/include/asm-i386/smplock.h Mon Oct 29 01:49:56 2001
++++ spinlock/include/asm-i386/smplock.h Tue Oct 30 01:44:59 2001
+@@ -8,7 +8,8 @@
+ #include <linux/sched.h>
+ #include <asm/current.h>
+
+-extern spinlock_t kernel_flag;
++extern spinlock_cacheline_t kernel_flag_cacheline;
++#define kernel_flag kernel_flag_cacheline.lock
+
+ #define kernel_locked() spin_is_locked(&kernel_flag)
+
+diff -urN 2.4.14pre4/include/linux/spinlock.h spinlock/include/linux/spinlock.h
+--- 2.4.14pre4/include/linux/spinlock.h Mon Oct 29 01:49:55 2001
++++ spinlock/include/linux/spinlock.h Tue Oct 30 01:44:59 2001
+@@ -138,4 +138,20 @@
+ extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+ #endif
+
++#ifdef CONFIG_SMP
++#include <linux/cache.h>
++
++typedef union {
++ spinlock_t lock;
++ char fill_up[(SMP_CACHE_BYTES)];
++} spinlock_cacheline_t __attribute__ ((aligned(SMP_CACHE_BYTES)));
++
++#else /* SMP */
++
++typedef struct {
++ spinlock_t lock;
++} spinlock_cacheline_t;
++
++
++#endif
+ #endif /* __LINUX_SPINLOCK_H */
+diff -urN 2.4.14pre4/include/linux/swap.h spinlock/include/linux/swap.h
+--- 2.4.14pre4/include/linux/swap.h Tue Oct 30 00:07:29 2001
++++ spinlock/include/linux/swap.h Tue Oct 30 01:44:59 2001
+@@ -86,7 +86,10 @@
+ extern atomic_t nr_async_pages;
+ extern atomic_t page_cache_size;
+ extern atomic_t buffermem_pages;
+-extern spinlock_t pagecache_lock;
++
++extern spinlock_cacheline_t pagecache_lock_cacheline;
++#define pagecache_lock (pagecache_lock_cacheline.lock)
++
+ extern void __remove_inode_page(struct page *);
+
+ /* Incomplete types for prototype declarations: */
+@@ -154,7 +157,8 @@
+ asmlinkage long sys_swapoff(const char *);
+ asmlinkage long sys_swapon(const char *, int);
+
+-extern spinlock_t pagemap_lru_lock;
++extern spinlock_cacheline_t pagemap_lru_lock_cacheline;
++#define pagemap_lru_lock pagemap_lru_lock_cacheline.lock
+
+ extern void FASTCALL(mark_page_accessed(struct page *));
+
+diff -urN 2.4.14pre4/mm/filemap.c spinlock/mm/filemap.c
+--- 2.4.14pre4/mm/filemap.c Tue Oct 30 00:07:29 2001
++++ spinlock/mm/filemap.c Tue Oct 30 01:45:32 2001
+@@ -47,7 +47,8 @@
+ unsigned int page_hash_bits;
+ struct page **page_hash_table;
+
+-spinlock_t pagecache_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
++spinlock_cacheline_t pagecache_lock_cacheline = {SPIN_LOCK_UNLOCKED};
++
+ /*
+ * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock
+ * with the pagecache_lock held.
+@@ -57,7 +58,7 @@
+ * pagemap_lru_lock ->
+ * pagecache_lock
+ */
+-spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
++spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED};
+
+ #define CLUSTER_PAGES (1 << page_cluster)
+ #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster)
+diff -urN 2.4.14pre4/mm/highmem.c spinlock/mm/highmem.c
+--- 2.4.14pre4/mm/highmem.c Wed Oct 24 08:04:27 2001
++++ spinlock/mm/highmem.c Tue Oct 30 01:44:59 2001
+@@ -32,7 +32,8 @@
+ */
+ static int pkmap_count[LAST_PKMAP];
+ static unsigned int last_pkmap_nr;
+-static spinlock_t kmap_lock = SPIN_LOCK_UNLOCKED;
++static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED};
++#define kmap_lock kmap_lock_cacheline.lock
+
+ pte_t * pkmap_page_table;
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1
new file mode 100644
index 000000000000..31c2fb732a75
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1
@@ -0,0 +1,20 @@
+--- 2.4.10pre2aa1/arch/i386/lib/usercopy.c.~1~ Thu Aug 30 00:35:26 2001
++++ 2.4.10pre2aa1/arch/i386/lib/usercopy.c Thu Aug 30 03:22:04 2001
+@@ -166,6 +166,8 @@
+ unsigned long res, tmp;
+
+ __asm__ __volatile__(
++ " testl %0, %0\n"
++ " jz 3f\n"
+ " andl %0,%%ecx\n"
+ "0: repne; scasb\n"
+ " setne %%al\n"
+@@ -174,6 +176,8 @@
+ "1:\n"
+ ".section .fixup,\"ax\"\n"
+ "2: xorl %%eax,%%eax\n"
++ " jmp 1b\n"
++ "3: movb $1,%%al\n"
+ " jmp 1b\n"
+ ".previous\n"
+ ".section __ex_table,\"a\"\n"
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-1
new file mode 100644
index 000000000000..ac2f9a2778da
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-1
@@ -0,0 +1,35 @@
+--- 2.4.9aa3/mm/filemap.c.~1~ Sun Aug 19 06:59:36 2001
++++ 2.4.9aa3/mm/filemap.c Sun Aug 19 07:33:24 2001
+@@ -1645,11 +1645,18 @@
+ * pages in the previous window.
+ */
+ if ((pgoff + (ra_window >> 1)) == vma->vm_raend) {
+- unsigned long start = vma->vm_pgoff + vma->vm_raend;
++ unsigned long vm_raend = *(volatile unsigned long *) &vma->vm_raend;
++ unsigned long start = vma->vm_pgoff + vm_raend;
+ unsigned long end = start + ra_window;
+
+ if (end > ((vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff))
+ end = (vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff;
++ /*
++ * Sanitize 'start' as well because vm_raend is racy when only
++ * the read sem is acquired like here.
++ */
++ if (start < vma->vm_pgoff)
++ return;
+ if (start > end)
+ return;
+
+@@ -1663,10 +1670,10 @@
+
+ /* if we're far enough past the beginning of this area,
+ recycle pages that are in the previous window. */
+- if (vma->vm_raend > (vma->vm_pgoff + ra_window + ra_window)) {
++ if (vm_raend > (vma->vm_pgoff + ra_window + ra_window)) {
+ unsigned long window = ra_window << PAGE_SHIFT;
+
+- end = vma->vm_start + (vma->vm_raend << PAGE_SHIFT);
++ end = vma->vm_start + (vm_raend << PAGE_SHIFT);
+ end -= window + window;
+ filemap_sync(vma, end - window, window, MS_INVALIDATE);
+ }
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-1
new file mode 100644
index 000000000000..2dd5cb755887
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-1
@@ -0,0 +1,10 @@
+--- 2.4.10pre12aa2/mm/vmalloc.c.~1~ Thu Sep 20 01:44:20 2001
++++ 2.4.10pre12aa2/mm/vmalloc.c Fri Sep 21 00:40:48 2001
+@@ -144,6 +144,7 @@
+ int ret;
+
+ dir = pgd_offset_k(address);
++ flush_cache_all();
+ spin_lock(&init_mm.page_table_lock);
+ do {
+ pmd_t *pmd;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1
new file mode 100644
index 000000000000..7006f1fb2497
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1
@@ -0,0 +1,12 @@
+--- 2.4.10pre14aa1/mm/vmalloc.c.~1~ Sat Sep 22 12:02:18 2001
++++ 2.4.10pre14aa1/mm/vmalloc.c Sat Sep 22 16:54:58 2001
+@@ -164,6 +164,9 @@
+ ret = 0;
+ } while (address && (address < end));
+ spin_unlock(&init_mm.page_table_lock);
++#if !defined(__alpha__) && !defined(__i386__)
++ flush_tlb_all();
++#endif
+ return ret;
+ }
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-1
new file mode 100644
index 000000000000..db05cc32aefe
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-1
@@ -0,0 +1,18 @@
+--- 2.4.10pre2aa3/arch/i386/kernel/irq.c.~1~ Sat Sep 1 02:39:49 2001
++++ 2.4.10pre2aa3/arch/i386/kernel/irq.c Sat Sep 1 02:40:17 2001
+@@ -443,10 +443,12 @@
+
+ status = 1; /* Force the "do bottom halves" bit */
+
+- if (!(action->flags & SA_INTERRUPT))
+- __sti();
+-
+ do {
++ if (!(action->flags & SA_INTERRUPT))
++ __sti();
++ else
++ __cli();
++
+ status |= action->flags;
+ action->handler(irq, action->dev_id, regs);
+ action = action->next;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-1
new file mode 100644
index 000000000000..70728f71fd32
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-1
@@ -0,0 +1,22 @@
+diff -urN 2.4.10/kernel/time.c xtime/kernel/time.c
+--- 2.4.10/kernel/time.c Thu Nov 16 15:37:43 2000
++++ xtime/kernel/time.c Sun Sep 23 22:20:29 2001
+@@ -38,7 +38,18 @@
+
+ static void do_normal_gettime(struct timeval * tm)
+ {
++ extern rwlock_t xtime_lock;
++ unsigned long flags;
++ /*
++ * Inspired by arch/.../kernel/time.c in which do_gettimeofday also
++ * locks xtime_lock, do_normal_gettime should do the same. After all
++ * access to xtime isn't atomic. -rolf 20010923
++ */
++ read_lock_irqsave(&xtime_lock, flags);
++
+ *tm=xtime;
++
++ read_unlock_irqrestore(&xtime_lock, flags);
+ }
+
+ void (*do_get_fast_time)(struct timeval *) = do_normal_gettime;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-2
new file mode 100644
index 000000000000..5eac3e42d144
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-2
@@ -0,0 +1,77 @@
+diff -urN 2.4.10aa2/fs/inode.c compiler/fs/inode.c
+--- 2.4.10aa2/fs/inode.c Fri Sep 28 02:45:57 2001
++++ compiler/fs/inode.c Fri Sep 28 02:47:02 2001
+@@ -17,7 +17,6 @@
+ #include <linux/swapctl.h>
+ #include <linux/prefetch.h>
+ #include <linux/locks.h>
+-#include <linux/compiler.h>
+
+ /*
+ * New inode.c implementation.
+diff -urN 2.4.10aa2/include/linux/kernel.h compiler/include/linux/kernel.h
+--- 2.4.10aa2/include/linux/kernel.h Fri Sep 28 02:46:24 2001
++++ compiler/include/linux/kernel.h Fri Sep 28 02:46:35 2001
+@@ -11,6 +11,7 @@
+ #include <linux/linkage.h>
+ #include <linux/stddef.h>
+ #include <linux/types.h>
++#include <linux/compiler.h>
+
+ /* Optimization barrier */
+ /* The "volatile" is due to gcc bugs */
+diff -urN 2.4.10aa2/mm/page_alloc.c compiler/mm/page_alloc.c
+--- 2.4.10aa2/mm/page_alloc.c Fri Sep 28 02:45:59 2001
++++ compiler/mm/page_alloc.c Fri Sep 28 02:46:56 2001
+@@ -17,7 +17,6 @@
+ #include <linux/pagemap.h>
+ #include <linux/bootmem.h>
+ #include <linux/slab.h>
+-#include <linux/compiler.h>
+ #include <linux/vmalloc.h>
+
+ int nr_swap_pages;
+diff -urN 2.4.10aa2/mm/slab.c compiler/mm/slab.c
+--- 2.4.10aa2/mm/slab.c Fri Sep 28 02:46:24 2001
++++ compiler/mm/slab.c Fri Sep 28 02:46:35 2001
+@@ -72,7 +72,6 @@
+ #include <linux/slab.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
+-#include <linux/compiler.h>
+ #include <asm/uaccess.h>
+
+ /*
+diff -urN 2.4.10aa2/mm/swapfile.c compiler/mm/swapfile.c
+--- 2.4.10aa2/mm/swapfile.c Fri Sep 28 02:46:24 2001
++++ compiler/mm/swapfile.c Fri Sep 28 02:46:35 2001
+@@ -14,7 +14,6 @@
+ #include <linux/vmalloc.h>
+ #include <linux/pagemap.h>
+ #include <linux/shm.h>
+-#include <linux/compiler.h>
+
+ #include <asm/pgtable.h>
+
+diff -urN 2.4.10aa2/mm/vmscan.c compiler/mm/vmscan.c
+--- 2.4.10aa2/mm/vmscan.c Fri Sep 28 02:46:24 2001
++++ compiler/mm/vmscan.c Fri Sep 28 02:46:35 2001
+@@ -21,7 +21,6 @@
+ #include <linux/init.h>
+ #include <linux/highmem.h>
+ #include <linux/file.h>
+-#include <linux/compiler.h>
+
+ #include <asm/pgalloc.h>
+
+diff -urN 2.4.13pre1/mm/filemap.c o_direct/mm/filemap.c
+--- 2.4.13pre1/mm/filemap.c Fri Oct 12 06:15:14 2001
++++ o_direct/mm/filemap.c Fri Oct 12 08:02:24 2001
+@@ -23,7 +23,6 @@
+ #include <linux/init.h>
+ #include <linux/mm.h>
+ #include <linux/iobuf.h>
+-#include <linux/compiler.h>
+
+ #include <asm/pgalloc.h>
+ #include <asm/uaccess.h>
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-1
new file mode 100644
index 000000000000..e270f772b01c
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-1
@@ -0,0 +1,10 @@
+--- 2.4.13pre3aa2/drivers/md/lvm.c.~1~ Wed Oct 17 08:19:05 2001
++++ 2.4.13pre3aa2/drivers/md/lvm.c Wed Oct 17 21:45:51 2001
+@@ -1067,7 +1067,6 @@
+ P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n",
+ minor, VG_BLK(minor), LV_BLK(minor));
+
+- sync_dev(inode->i_rdev);
+ if (lv_ptr->lv_open == 1) vg_ptr->lv_open--;
+ lv_ptr->lv_open--;
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-1
new file mode 100644
index 000000000000..4d8ab00700bb
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-1
@@ -0,0 +1,80 @@
+diff -urN lvm/drivers/md/lvm.c lvm-fix/drivers/md/lvm.c
+--- lvm/drivers/md/lvm.c Thu Oct 4 18:51:34 2001
++++ lvm-fix/drivers/md/lvm.c Thu Oct 4 18:51:48 2001
+@@ -259,9 +259,13 @@
+
+ #include "lvm-internal.h"
+
+-#define LVM_CORRECT_READ_AHEAD( a) \
+- if ( a < LVM_MIN_READ_AHEAD || \
+- a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD;
++#define LVM_CORRECT_READ_AHEAD(a) \
++do { \
++ if ((a) < LVM_MIN_READ_AHEAD || \
++ (a) > LVM_MAX_READ_AHEAD) \
++ (a) = LVM_DEFAULT_READ_AHEAD; \
++ read_ahead[MAJOR_NR] = (a); \
++} while(0)
+
+ #ifndef WRITEA
+ # define WRITEA WRITE
+@@ -414,17 +418,13 @@
+
+ static struct gendisk lvm_gendisk =
+ {
+- MAJOR_NR, /* major # */
+- LVM_NAME, /* name of major */
+- 0, /* number of times minor is shifted
+- to get real minor */
+- 1, /* maximum partitions per device */
+- lvm_hd_struct, /* partition table */
+- lvm_size, /* device size in blocks, copied
+- to block_size[] */
+- MAX_LV, /* number or real devices */
+- NULL, /* internal */
+- NULL, /* pointer to next gendisk struct (internal) */
++ major: MAJOR_NR,
++ major_name: LVM_NAME,
++ minor_shift: 0,
++ max_p: 1,
++ part: lvm_hd_struct,
++ sizes: lvm_size,
++ nr_real: MAX_LV,
+ };
+
+ /*
+@@ -939,6 +939,11 @@
+ return -EFAULT;
+ break;
+
++ case BLKGETSIZE64:
++ if (put_user((u64)lv_ptr->lv_size << 9, (u64 *)arg))
++ return -EFAULT;
++ break;
++
+
+ case BLKFLSBUF:
+ /* flush buffer cache */
+@@ -962,6 +967,7 @@
+ (long) arg > LVM_MAX_READ_AHEAD)
+ return -EINVAL;
+ lv_ptr->lv_read_ahead = (long) arg;
++ read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead;
+ break;
+
+
+diff -urN lvm/include/linux/lvm.h lvm-fix/include/linux/lvm.h
+--- lvm/include/linux/lvm.h Thu Oct 4 18:51:34 2001
++++ lvm-fix/include/linux/lvm.h Thu Oct 4 18:51:48 2001
+@@ -260,8 +260,9 @@
+ #define LVM_MAX_STRIPES 128 /* max # of stripes */
+ #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */
+ #define LVM_MAX_MIRRORS 2 /* future use */
+-#define LVM_MIN_READ_AHEAD 2 /* minimum read ahead sectors */
+-#define LVM_MAX_READ_AHEAD 120 /* maximum read ahead sectors */
++#define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */
++#define LVM_DEFAULT_READ_AHEAD 1024 /* default read ahead sectors for 512k scsi segments */
++#define LVM_MAX_READ_AHEAD 10000 /* maximum read ahead sectors */
+ #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */
+ #define LVM_PARTITION 0xfe /* LVM partition id */
+ #define LVM_NEW_PARTITION 0x8e /* new LVM partition id (10/09/1999) */
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-1
new file mode 100644
index 000000000000..e2175a75c4ad
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-1
@@ -0,0 +1,39 @@
+--- 0.21/drivers/md/lvm.c Sun, 07 Oct 2001 22:15:54 -0400
++++ 0.21(w)/drivers/md/lvm.c Mon, 08 Oct 2001 15:54:42 -0400
+@@ -1142,7 +1142,8 @@
+
+ /* we must redo lvm_snapshot_remap_block in order to avoid a
+ race condition in the gap where no lock was held */
+- if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
++ if (lv->lv_block_exception &&
++ !lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) &&
+ !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv))
+ lvm_write_COW_table_block(vg, lv);
+
+@@ -1151,11 +1152,12 @@
+
+ static inline void _remap_snapshot(kdev_t rdev, ulong rsector,
+ ulong pe_start, lv_t *lv, vg_t *vg) {
+- int r;
++ int r = 0;
+
+ /* check to see if this chunk is already in the snapshot */
+ down_read(&lv->lv_lock);
+- r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
++ if (lv->lv_block_exception)
++ r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv);
+ up_read(&lv->lv_lock);
+
+ if (!r)
+Index: 0.21/drivers/md/lvm-snap.c
+--- 0.21/drivers/md/lvm-snap.c Sat, 06 Oct 2001 00:07:22 -0400 root (linux/i/c/38_lvm-snap.c 1.1.2.1.2.1 644)
++++ 0.21(w)/drivers/md/lvm-snap.c Mon, 08 Oct 2001 15:13:10 -0400 root (linux/i/c/38_lvm-snap.c 1.1.2.1.2.1 644)
+@@ -140,6 +140,8 @@
+ unsigned long mask = lv->lv_snapshot_hash_mask;
+ int chunk_size = lv->lv_chunk_size;
+
++ if (!hash_table)
++ BUG() ;
+ hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)];
+ list_add(&exception->hash, hash_table);
+ }
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2
new file mode 100644
index 000000000000..a5f424b7a282
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2
@@ -0,0 +1,64 @@
+--- 0.36/drivers/md/lvm-snap.c Thu, 11 Oct 2001 10:17:22 -0400
++++ 0.36(w)/drivers/md/lvm-snap.c Thu, 11 Oct 2001 14:11:50 -0400
+@@ -326,6 +326,7 @@
+ {
+ const char * reason;
+ unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
++ unsigned long phys_start ;
+ int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
+ struct kiobuf * iobuf;
+ unsigned long blocks[KIO_MAX_SECTORS];
+@@ -360,8 +361,14 @@
+
+ iobuf = lv_snap->lv_iobuf;
+
+- blksize_org = lvm_get_blksize(org_phys_dev);
+- blksize_snap = lvm_get_blksize(snap_phys_dev);
++ blksize_org = get_hardsect_size(org_phys_dev);
++ blksize_snap = get_hardsect_size(snap_phys_dev);
++
++ /* org_start must not change, we use it later on to fill in the
++ ** exception table
++ */
++ phys_start = org_start ;
++
+ max_blksize = max(blksize_org, blksize_snap);
+ min_blksize = min(blksize_org, blksize_snap);
+ max_sectors = KIO_MAX_SECTORS * (min_blksize>>9);
+@@ -376,7 +383,7 @@
+
+ iobuf->length = nr_sectors << 9;
+
+- if(!lvm_snapshot_prepare_blocks(blocks, org_start,
++ if(!lvm_snapshot_prepare_blocks(blocks, phys_start,
+ nr_sectors, blksize_org))
+ goto fail_prepare;
+
+@@ -391,6 +398,9 @@
+ if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks,
+ blksize_snap, lv_snap) != (nr_sectors<<9))
+ goto fail_raw_write;
++
++ phys_start += nr_sectors ;
++ snap_start += nr_sectors ;
+ }
+
+ #ifdef DEBUG_SNAPSHOT
+@@ -605,7 +615,7 @@
+ snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+ snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
+
+- blksize_snap = lvm_get_blksize(snap_phys_dev);
++ blksize_snap = get_hardsect_size(snap_phys_dev);
+
+ COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t);
+ idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block;
+@@ -654,7 +664,7 @@
+ idx++;
+ snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
+ snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
+- blksize_snap = lvm_get_blksize(snap_phys_dev);
++ blksize_snap = get_hardsect_size(snap_phys_dev);
+ blocks[0] = snap_pe_start >> (blksize_snap >> 10);
+ } else blocks[0]++;
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-2
new file mode 100644
index 000000000000..125fcf3ad4c9
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-2
@@ -0,0 +1,56 @@
+diff -urN 2.4.6pre5/arch/i386/config.in novirtual/arch/i386/config.in
+--- 2.4.6pre5/arch/i386/config.in Thu Jun 21 08:03:30 2001
++++ novirtual/arch/i386/config.in Thu Jun 21 16:02:11 2001
+@@ -165,6 +165,9 @@
+ define_bool CONFIG_HIGHMEM y
+ define_bool CONFIG_X86_PAE y
+ fi
++if [ "$CONFIG_NOHIGHMEM" = "y" ]; then
++ define_bool CONFIG_NO_PAGE_VIRTUAL y
++fi
+
+ bool 'Math emulation' CONFIG_MATH_EMULATION
+ bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
+diff -urN 2.4.6pre5/include/asm-i386/pgtable.h novirtual/include/asm-i386/pgtable.h
+--- 2.4.6pre5/include/asm-i386/pgtable.h Thu Jun 14 18:07:49 2001
++++ novirtual/include/asm-i386/pgtable.h Thu Jun 21 16:02:11 2001
+@@ -255,7 +255,11 @@
+ * Permanent address of a page. Obviously must never be
+ * called on a highmem page.
+ */
++#ifdef CONFIG_NO_PAGE_VIRTUAL
++#define page_address(page) __va((page - mem_map) << PAGE_SHIFT)
++#else
+ #define page_address(page) ((page)->virtual)
++#endif
+ #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+ /*
+diff -urN 2.4.6pre5/include/linux/mm.h novirtual/include/linux/mm.h
+--- 2.4.6pre5/include/linux/mm.h Thu Jun 21 08:03:56 2001
++++ novirtual/include/linux/mm.h Thu Jun 21 16:02:33 2001
+@@ -160,8 +160,10 @@
+ wait_queue_head_t wait; /* Page locked? Stand in line... */
+ struct page **pprev_hash; /* Complement to *next_hash. */
+ struct buffer_head * buffers; /* Buffer maps us to a disk block. */
++#ifndef CONFIG_NO_PAGE_VIRTUAL
+ void *virtual; /* Kernel virtual address (NULL if
+ not kmapped, ie. highmem) */
++#endif
+ struct zone_struct *zone; /* Memory zone we are in. */
+ } mem_map_t;
+
+diff -urN 2.4.6pre5/mm/page_alloc.c novirtual/mm/page_alloc.c
+--- 2.4.6pre5/mm/page_alloc.c Thu Jun 21 08:03:57 2001
++++ novirtual/mm/page_alloc.c Thu Jun 21 16:02:11 2001
+@@ -851,8 +851,10 @@
+ for (i = 0; i < size; i++) {
+ struct page *page = mem_map + offset + i;
+ page->zone = zone;
++#ifndef CONFIG_NO_PAGE_VIRTUAL
+ if (j != ZONE_HIGHMEM)
+ page->virtual = __va(zone_start_paddr);
++#endif
+ zone_start_paddr += PAGE_SIZE;
+ }
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13
new file mode 100644
index 000000000000..1eb40c9dc3e3
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13
@@ -0,0 +1,800 @@
+diff -urN numa-sched-ref/arch/alpha/config.in numa-sched/arch/alpha/config.in
+--- numa-sched-ref/arch/alpha/config.in Sun Oct 28 15:28:11 2001
++++ numa-sched/arch/alpha/config.in Sun Oct 28 15:28:25 2001
+@@ -219,6 +219,9 @@
+ bool 'Discontiguous Memory Support' CONFIG_DISCONTIGMEM
+ if [ "$CONFIG_DISCONTIGMEM" = "y" ]; then
+ bool ' NUMA Support' CONFIG_NUMA
++ if [ "$CONFIG_NUMA" = "y" ]; then
++ bool ' NUMA Scheduler Support' CONFIG_NUMA_SCHED
++ fi
+ fi
+ fi
+
+diff -urN numa-sched-ref/arch/alpha/kernel/entry.S numa-sched/arch/alpha/kernel/entry.S
+--- numa-sched-ref/arch/alpha/kernel/entry.S Sun Oct 28 15:28:10 2001
++++ numa-sched/arch/alpha/kernel/entry.S Sun Oct 28 15:28:25 2001
+@@ -35,7 +35,7 @@
+ #define TASK_EXEC_DOMAIN 32
+ #define TASK_NEED_RESCHED 40
+ #define TASK_PTRACE 48
+-#define TASK_PROCESSOR 100
++#define TASK_PROCESSOR 84
+
+ /*
+ * task flags (must match include/linux/sched.h):
+diff -urN numa-sched-ref/include/asm-alpha/mmzone.h numa-sched/include/asm-alpha/mmzone.h
+--- numa-sched-ref/include/asm-alpha/mmzone.h Sat May 26 04:03:47 2001
++++ numa-sched/include/asm-alpha/mmzone.h Sun Oct 28 15:28:25 2001
+@@ -21,7 +21,7 @@
+ #ifdef NOTYET
+ kern_vars_t kern_vars;
+ #endif
+-#if defined(CONFIG_NUMA) && defined(CONFIG_NUMA_SCHED)
++#ifdef CONFIG_NUMA_SCHED
+ struct numa_schedule_data schedule_data;
+ #endif
+ } plat_pg_data_t;
+diff -urN numa-sched-ref/include/asm-alpha/timex.h numa-sched/include/asm-alpha/timex.h
+--- numa-sched-ref/include/asm-alpha/timex.h Tue Dec 29 22:56:15 1998
++++ numa-sched/include/asm-alpha/timex.h Sun Oct 28 15:28:25 2001
+@@ -27,4 +27,8 @@
+ return ret;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-arm/timex.h numa-sched/include/asm-arm/timex.h
+--- numa-sched-ref/include/asm-arm/timex.h Thu Nov 16 15:37:33 2000
++++ numa-sched/include/asm-arm/timex.h Sun Oct 28 15:28:25 2001
+@@ -23,4 +23,8 @@
+ return 0;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-cris/timex.h numa-sched/include/asm-cris/timex.h
+--- numa-sched-ref/include/asm-cris/timex.h Sat May 26 04:03:47 2001
++++ numa-sched/include/asm-cris/timex.h Sun Oct 28 15:28:25 2001
+@@ -20,4 +20,8 @@
+ return 0;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-i386/timex.h numa-sched/include/asm-i386/timex.h
+--- numa-sched-ref/include/asm-i386/timex.h Sun Oct 28 15:04:11 2001
++++ numa-sched/include/asm-i386/timex.h Sun Oct 28 15:44:38 2001
+@@ -47,4 +47,8 @@
+
+ extern unsigned long cpu_khz;
+
++typedef cycles_t last_schedule_t;
++#define get_last_schedule() ({ get_cycles(); })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-ia64/timex.h numa-sched/include/asm-ia64/timex.h
+--- numa-sched-ref/include/asm-ia64/timex.h Tue May 1 19:35:31 2001
++++ numa-sched/include/asm-ia64/timex.h Sun Oct 28 15:28:25 2001
+@@ -21,4 +21,8 @@
+ return ret;
+ }
+
++typedef cycles_t last_schedule_t;
++#define get_last_schedule() ({ get_cycles(); })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif /* _ASM_IA64_TIMEX_H */
+diff -urN numa-sched-ref/include/asm-m68k/timex.h numa-sched/include/asm-m68k/timex.h
+--- numa-sched-ref/include/asm-m68k/timex.h Tue Jan 5 20:20:43 1999
++++ numa-sched/include/asm-m68k/timex.h Sun Oct 28 15:28:25 2001
+@@ -19,4 +19,8 @@
+ return 0;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-mips/timex.h numa-sched/include/asm-mips/timex.h
+--- numa-sched-ref/include/asm-mips/timex.h Sat May 13 17:31:25 2000
++++ numa-sched/include/asm-mips/timex.h Sun Oct 28 15:28:25 2001
+@@ -36,6 +36,11 @@
+ {
+ return read_32bit_cp0_register(CP0_COUNT);
+ }
++
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif /* __KERNEL__ */
+
+ #endif /* __ASM_MIPS_TIMEX_H */
+diff -urN numa-sched-ref/include/asm-mips64/timex.h numa-sched/include/asm-mips64/timex.h
+--- numa-sched-ref/include/asm-mips64/timex.h Sun Sep 23 21:11:41 2001
++++ numa-sched/include/asm-mips64/timex.h Sun Oct 28 15:28:25 2001
+@@ -43,4 +43,8 @@
+ return val;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif /* _ASM_TIMEX_H */
+diff -urN numa-sched-ref/include/asm-parisc/timex.h numa-sched/include/asm-parisc/timex.h
+--- numa-sched-ref/include/asm-parisc/timex.h Thu Dec 14 22:34:13 2000
++++ numa-sched/include/asm-parisc/timex.h Sun Oct 28 15:28:25 2001
+@@ -18,4 +18,8 @@
+ return mfctl(16);
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-ppc/timex.h numa-sched/include/asm-ppc/timex.h
+--- numa-sched-ref/include/asm-ppc/timex.h Sun Sep 23 21:11:41 2001
++++ numa-sched/include/asm-ppc/timex.h Sun Oct 28 15:28:25 2001
+@@ -45,5 +45,9 @@
+ return ret;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+ #endif /* __KERNEL__ */
+diff -urN numa-sched-ref/include/asm-s390/timex.h numa-sched/include/asm-s390/timex.h
+--- numa-sched-ref/include/asm-s390/timex.h Fri May 12 20:41:44 2000
++++ numa-sched/include/asm-s390/timex.h Sun Oct 28 15:28:25 2001
+@@ -26,4 +26,8 @@
+ return 0;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-s390x/timex.h numa-sched/include/asm-s390x/timex.h
+--- numa-sched-ref/include/asm-s390x/timex.h Thu Feb 22 03:45:11 2001
++++ numa-sched/include/asm-s390x/timex.h Sun Oct 28 15:28:25 2001
+@@ -26,4 +26,8 @@
+ return 0;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-sh/timex.h numa-sched/include/asm-sh/timex.h
+--- numa-sched-ref/include/asm-sh/timex.h Fri Jan 5 02:19:29 2001
++++ numa-sched/include/asm-sh/timex.h Sun Oct 28 15:28:25 2001
+@@ -21,4 +21,8 @@
+ return 0;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif /* __ASM_SH_TIMEX_H */
+diff -urN numa-sched-ref/include/asm-sparc/timex.h numa-sched/include/asm-sparc/timex.h
+--- numa-sched-ref/include/asm-sparc/timex.h Thu Mar 11 01:53:37 1999
++++ numa-sched/include/asm-sparc/timex.h Sun Oct 28 15:28:25 2001
+@@ -17,4 +17,8 @@
+ extern cycles_t cacheflush_time;
+ #define get_cycles() (0)
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/asm-sparc64/timex.h numa-sched/include/asm-sparc64/timex.h
+--- numa-sched-ref/include/asm-sparc64/timex.h Sun Sep 23 21:11:42 2001
++++ numa-sched/include/asm-sparc64/timex.h Sun Oct 28 15:28:25 2001
+@@ -20,4 +20,8 @@
+ ret; \
+ })
+
++typedef cycles_t last_schedule_t;
++#define get_last_schedule() ({ get_cycles(); })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
+diff -urN numa-sched-ref/include/linux/numa_sched.h numa-sched/include/linux/numa_sched.h
+--- numa-sched-ref/include/linux/numa_sched.h Thu Jan 1 01:00:00 1970
++++ numa-sched/include/linux/numa_sched.h Sun Oct 28 15:59:37 2001
+@@ -0,0 +1,67 @@
++/*
++ * linux/include/linux/numa_sched.h
++ *
++ * NUMA based scheduler
++ */
++
++#ifndef _LINUX_NUMA_SCHED_H
++#define _LINUX_NUMA_SCHED_H
++
++#ifdef CONFIG_NUMA_SCHED
++#include <linux/cache.h>
++#include <linux/list.h>
++#include <linux/threads.h>
++#include <asm/timex.h>
++
++struct numa_per_cpu_schedule_data {
++ struct task_struct * curr;
++ last_schedule_t last_schedule;
++ long quiescent;
++};
++
++struct numa_schedule_data {
++ struct numa_per_cpu_schedule_data per_cpu[NR_CPUS] ____cacheline_aligned;
++ struct list_head runqueue_head;
++ int nr_running, nr_threads;
++};
++
++#include <linux/mmzone.h>
++
++#define numa_nr_running_inc() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_running++; } while(0)
++#define numa_nr_running_dec() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_running--; } while(0)
++#define numa_nr_running(nid) (NODE_SCHEDULE_DATA(nid)->nr_running)
++
++#define numa_nr_threads_inc() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_threads++; } while(0)
++#define numa_nr_threads_dec() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_threads--; } while(0)
++#define numa_nr_threads(nid) (NODE_SCHEDULE_DATA(nid)->nr_threads)
++
++#define cpu_curr(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].curr)
++#define last_schedule(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].last_schedule)
++#define RCU_quiescent(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].quiescent)
++
++#define numa_runqueue_head(x) (&NODE_SCHEDULE_DATA(x)->runqueue_head)
++
++#else /* CONFIG_NUMA_SCHED */
++
++#define numa_nr_running_inc() do { } while(0)
++#define numa_nr_running_dec() do { } while(0)
++#define numa_nr_threads_inc() do { } while(0)
++#define numa_nr_threads_dec() do { } while(0)
++
++/* per-cpu schedule data */
++typedef struct schedule_data_s {
++ struct task_struct * curr;
++ last_schedule_t last_schedule;
++ long quiescent;
++} schedule_data_t ____cacheline_aligned;
++
++extern schedule_data_t schedule_data[NR_CPUS];
++
++#define cpu_curr(cpu) (schedule_data[(cpu)].curr)
++#define last_schedule(cpu) (schedule_data[(cpu)].last_schedule)
++#define RCU_quiescent(cpu) (schedule_data[(cpu)].quiescent)
++
++#define numa_runqueue_head(x) (&runqueue_head)
++#endif /* CONFIG_NUMA_SCHED */
++
++#endif /* __ALPHA_NUMA_SCHED_H */
+diff -urN numa-sched-ref/include/linux/sched.h numa-sched/include/linux/sched.h
+--- numa-sched-ref/include/linux/sched.h Sun Oct 28 15:28:11 2001
++++ numa-sched/include/linux/sched.h Sun Oct 28 15:44:39 2001
+@@ -27,6 +27,7 @@
+ #include <linux/signal.h>
+ #include <linux/securebits.h>
+ #include <linux/fs_struct.h>
++#include <linux/numa_sched.h>
+
+ struct exec_domain;
+
+@@ -302,9 +303,9 @@
+ * all fields in a single cacheline that are needed for
+ * the goodness() loop in schedule().
+ */
+- long counter;
+- long nice;
+- unsigned long policy;
++ int counter;
++ int nice;
++ unsigned int policy;
+ struct mm_struct *mm;
+ int has_cpu, processor;
+ unsigned long cpus_allowed;
+@@ -313,8 +314,9 @@
+ * that's just fine.)
+ */
+ struct list_head run_list;
+- unsigned long sleep_time;
+-
++#ifdef CONFIG_NUMA_SCHED
++ int nid;
++#endif
+ struct task_struct *next_task, *prev_task;
+ struct mm_struct *active_mm;
+ struct rw_sem_recursor mm_recursor;
+@@ -464,7 +466,7 @@
+ mm: NULL, \
+ active_mm: &init_mm, \
+ mm_recursor: RWSEM_RECURSOR_INITIALIZER, \
+- cpus_allowed: -1, \
++ cpus_allowed: -1UL, \
+ run_list: LIST_HEAD_INIT(tsk.run_list), \
+ next_task: &tsk, \
+ prev_task: &tsk, \
+@@ -552,18 +554,6 @@
+ extern volatile struct timeval xtime;
+ extern void do_timer(struct pt_regs *);
+
+-/* per-cpu schedule data */
+-typedef struct schedule_data_s {
+- struct task_struct * curr;
+- cycles_t last_schedule;
+- long quiescent;
+-} schedule_data_t ____cacheline_aligned;
+-
+-extern schedule_data_t schedule_data[NR_CPUS];
+-#define cpu_curr(cpu) (schedule_data[(cpu)].curr)
+-#define last_schedule(cpu) (schedule_data[(cpu)].last_schedule)
+-#define RCU_quiescent(cpu) (schedule_data[(cpu)].quiescent)
+-
+ extern unsigned int * prof_buffer;
+ extern unsigned long prof_len;
+ extern unsigned long prof_shift;
+@@ -781,6 +771,30 @@
+ extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait));
+ extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait));
+
++#define nr_running_inc() \
++do { \
++ numa_nr_running_inc(); \
++ nr_running++; \
++} while (0)
++
++#define nr_running_dec() \
++do { \
++ numa_nr_running_dec(); \
++ nr_running--; \
++} while (0)
++
++#define nr_threads_inc() \
++do { \
++ numa_nr_threads_inc(); \
++ nr_threads++; \
++} while (0)
++
++#define nr_threads_dec() \
++do { \
++ numa_nr_threads_dec(); \
++ nr_threads--; \
++} while (0)
++
+ #define __wait_event(wq, condition) \
+ do { \
+ wait_queue_t __wait; \
+@@ -861,29 +875,28 @@
+ #define next_thread(p) \
+ list_entry((p)->thread_group.next, struct task_struct, thread_group)
+
+-static inline void del_from_runqueue(struct task_struct * p)
+-{
+- nr_running--;
+- p->sleep_time = jiffies;
+- list_del(&p->run_list);
+- p->run_list.next = NULL;
+-}
++#define del_from_runqueue(p) \
++do { \
++ nr_running_dec(); \
++ list_del(&(p)->run_list); \
++ (p)->run_list.next = NULL; \
++} while(0)
+
+ static inline int task_on_runqueue(struct task_struct *p)
+ {
+ return (p->run_list.next != NULL);
+ }
+
+-static inline void unhash_process(struct task_struct *p)
+-{
+- if (task_on_runqueue(p)) BUG();
+- write_lock_irq(&tasklist_lock);
+- nr_threads--;
+- unhash_pid(p);
+- REMOVE_LINKS(p);
+- list_del(&p->thread_group);
+- write_unlock_irq(&tasklist_lock);
+-}
++#define unhash_process(p) \
++do { \
++ if (task_on_runqueue(p)) BUG(); \
++ write_lock_irq(&tasklist_lock); \
++ nr_threads_dec(); \
++ unhash_pid(p); \
++ REMOVE_LINKS(p); \
++ list_del(&(p)->thread_group); \
++ write_unlock_irq(&tasklist_lock); \
++} while(0)
+
+ /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */
+ static inline void task_lock(struct task_struct *p)
+diff -urN numa-sched-ref/kernel/fork.c numa-sched/kernel/fork.c
+--- numa-sched-ref/kernel/fork.c Sun Oct 28 15:28:10 2001
++++ numa-sched/kernel/fork.c Sun Oct 28 15:28:25 2001
+@@ -635,7 +635,6 @@
+ {
+ int i;
+ p->has_cpu = 0;
+- p->processor = current->processor;
+ /* ?? should we just memset this ?? */
+ for(i = 0; i < smp_num_cpus; i++)
+ p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
+@@ -712,7 +711,7 @@
+
+ SET_LINKS(p);
+ hash_pid(p);
+- nr_threads++;
++ nr_threads_inc();
+ write_unlock_irq(&tasklist_lock);
+
+ if (p->ptrace & PT_PTRACED)
+diff -urN numa-sched-ref/kernel/sched.c numa-sched/kernel/sched.c
+--- numa-sched-ref/kernel/sched.c Sun Oct 28 15:28:11 2001
++++ numa-sched/kernel/sched.c Sun Oct 28 15:38:10 2001
+@@ -10,6 +10,7 @@
+ * 1998-11-19 Implemented schedule_timeout() and related stuff
+ * by Andrea Arcangeli
+ * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar
++ * 2001-01-29 first NUMA scheduler attempt by Andrea Arcangeli, SuSE
+ */
+
+ /*
+@@ -92,6 +93,8 @@
+ spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */
+ rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */
+
++#ifndef CONFIG_NUMA_SCHED
++
+ static LIST_HEAD(runqueue_head);
+
+ /*
+@@ -100,14 +103,31 @@
+ */
+ schedule_data_t schedule_data[NR_CPUS] __cacheline_aligned = {{&init_task,0}};
+
++#define init_numa_schedule_data() do { } while(0)
++
++#else /* CONFIG_NUMA_SCHED */
++
++static void __init init_numa_schedule_data(void)
++{
++ int i;
++
++ for (i = 0; i < numnodes; i++) {
++ INIT_LIST_HEAD(&NODE_SCHEDULE_DATA(i)->runqueue_head);
++ NODE_SCHEDULE_DATA(i)->nr_running = 0;
++ NODE_SCHEDULE_DATA(i)->nr_threads = 0;
++ }
++}
++#endif /* CONFIG_NUMA_SCHED */
++
+ struct kernel_stat kstat;
+ extern struct task_struct *child_reaper;
+
+ #ifdef CONFIG_SMP
+
+ #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)])
++#define logical_idle_task(cpu) (init_tasks[cpu])
+ #define can_schedule(p,cpu) ((!(p)->has_cpu) && \
+- ((p)->cpus_allowed & (1 << cpu)))
++ ((p)->cpus_allowed & (1UL << cpu)))
+
+ #else
+
+@@ -205,8 +225,8 @@
+ #ifdef CONFIG_SMP
+ int this_cpu = smp_processor_id();
+ struct task_struct *tsk, *target_tsk;
+- int cpu, best_cpu, i, max_prio;
+- cycles_t oldest_idle;
++ int cpu, best_cpu, i, max_prio, found_idle;
++ last_schedule_t oldest_idle;
+
+ /*
+ * shortcut if the woken up task's last CPU is
+@@ -214,17 +234,17 @@
+ */
+ best_cpu = p->processor;
+ if (can_schedule(p, best_cpu)) {
+- tsk = idle_task(best_cpu);
+- if (cpu_curr(best_cpu) == tsk) {
+- int need_resched;
++ target_tsk = idle_task(best_cpu);
++ if (cpu_curr(best_cpu) == target_tsk) {
++ long need_resched;
+ send_now_idle:
+ /*
+ * If need_resched == -1 then we can skip sending
+ * the IPI altogether, tsk->need_resched is
+ * actively watched by the idle thread.
+ */
+- need_resched = tsk->need_resched;
+- tsk->need_resched = 1;
++ need_resched = target_tsk->need_resched;
++ target_tsk->need_resched = 1;
+ if ((best_cpu != this_cpu) && !need_resched)
+ smp_send_reschedule(best_cpu);
+ return;
+@@ -238,13 +258,17 @@
+ * one will have the least active cache context.) Also find
+ * the executing process which has the least priority.
+ */
+- oldest_idle = (cycles_t) -1;
+ target_tsk = NULL;
+ max_prio = 0;
++ found_idle = 0;
+
+ for (i = 0; i < smp_num_cpus; i++) {
+ cpu = cpu_logical_map(i);
+- if (!can_schedule(p, cpu))
++ if (
++#ifdef CONFIG_NUMA_SCHED
++ cputonode(cpu) != p->nid ||
++#endif
++ !can_schedule(p, cpu))
+ continue;
+ tsk = cpu_curr(cpu);
+ /*
+@@ -252,13 +276,14 @@
+ * a priority list between idle CPUs, but this is not
+ * a problem.
+ */
+- if (tsk == idle_task(cpu)) {
+- if (last_schedule(cpu) < oldest_idle) {
++ if (tsk == logical_idle_task(i)) {
++ if (!found_idle || last_schedule_before(last_schedule(cpu), oldest_idle)) {
+ oldest_idle = last_schedule(cpu);
+ target_tsk = tsk;
++ found_idle = 1;
+ }
+ } else {
+- if (oldest_idle == -1ULL) {
++ if (!found_idle) {
+ int prio = preemption_goodness(tsk, p, cpu);
+
+ if (prio > max_prio) {
+@@ -268,15 +293,33 @@
+ }
+ }
+ }
+- tsk = target_tsk;
+- if (tsk) {
+- if (oldest_idle != -1ULL) {
+- best_cpu = tsk->processor;
+- goto send_now_idle;
++
++#ifdef CONFIG_NUMA_SCHED
++ if (!target_tsk)
++ /* Make sure to use the idle cpus in the other nodes */
++ for (i = 0; i < smp_num_cpus; i++) {
++ cpu = cpu_logical_map(i);
++ if (cputonode(cpu) == p->nid || !can_schedule(p, cpu))
++ continue;
++ tsk = cpu_curr(cpu);
++ if (tsk == logical_idle_task(i)) {
++ if (!found_idle || last_schedule_before(last_schedule(cpu), oldest_idle)) {
++ oldest_idle = last_schedule(cpu);
++ target_tsk = tsk;
++ found_idle = 1;
++ target_tsk->nid = cputonode(cpu);
++ }
++ }
+ }
+- tsk->need_resched = 1;
+- if (tsk->processor != this_cpu)
+- smp_send_reschedule(tsk->processor);
++#endif
++
++ if (target_tsk) {
++ best_cpu = target_tsk->processor;
++ if (found_idle)
++ goto send_now_idle;
++ target_tsk->need_resched = 1;
++ if (best_cpu != this_cpu)
++ smp_send_reschedule(best_cpu);
+ }
+ return;
+
+@@ -300,20 +343,20 @@
+ */
+ static inline void add_to_runqueue(struct task_struct * p)
+ {
+- list_add(&p->run_list, &runqueue_head);
+- nr_running++;
++ list_add(&p->run_list, numa_runqueue_head(p->nid));
++ nr_running_inc();
+ }
+
+ static inline void move_last_runqueue(struct task_struct * p)
+ {
+ list_del(&p->run_list);
+- list_add_tail(&p->run_list, &runqueue_head);
++ list_add_tail(&p->run_list, numa_runqueue_head(p->nid));
+ }
+
+ static inline void move_first_runqueue(struct task_struct * p)
+ {
+ list_del(&p->run_list);
+- list_add(&p->run_list, &runqueue_head);
++ list_add(&p->run_list, numa_runqueue_head(p->nid));
+ }
+
+ /*
+@@ -336,9 +379,9 @@
+ p->state = TASK_RUNNING;
+ if (task_on_runqueue(p))
+ goto out;
+- add_to_runqueue(p);
+ if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id())))
+ reschedule_idle(p);
++ add_to_runqueue(p);
+ success = 1;
+ out:
+ spin_unlock_irqrestore(&runqueue_lock, flags);
+@@ -524,10 +567,12 @@
+ */
+ asmlinkage void schedule(void)
+ {
+- schedule_data_t * sched_data;
+ struct task_struct *prev, *next, *p;
+ struct list_head *tmp;
+ int this_cpu, c;
++#ifdef CONFIG_NUMA_SCHED
++ int recalculate_all;
++#endif
+
+
+ spin_lock_prefetch(&runqueue_lock);
+@@ -542,12 +587,6 @@
+
+ release_kernel_lock(prev, this_cpu);
+
+- /*
+- * 'sched_data' is protected by the fact that we can run
+- * only one process per CPU.
+- */
+- sched_data = &schedule_data[this_cpu];
+-
+ spin_lock_irq(&runqueue_lock);
+
+ /* move an exhausted RR process to be last.. */
+@@ -581,7 +620,7 @@
+ goto still_running;
+
+ still_running_back:
+- list_for_each(tmp, &runqueue_head) {
++ list_for_each(tmp, numa_runqueue_head(numa_node_id())) {
+ p = list_entry(tmp, struct task_struct, run_list);
+ if (can_schedule(p, this_cpu)) {
+ int weight = goodness(p, this_cpu, prev->active_mm);
+@@ -590,6 +629,27 @@
+ }
+ }
+
++#ifdef CONFIG_NUMA_SCHED
++ recalculate_all = 0;
++ if (c < 0) {
++ int nid;
++
++ recalculate_all = 1;
++ for (nid = 0; nid < numnodes; nid++) {
++ if (nid == numa_node_id())
++ continue;
++ list_for_each(tmp, numa_runqueue_head(nid)) {
++ p = list_entry(tmp, struct task_struct, run_list);
++ if (can_schedule(p, this_cpu)) {
++ int weight = goodness(p, this_cpu, prev->active_mm);
++ if (weight > c)
++ c = weight, next = p;
++ }
++ }
++ }
++ }
++#endif
++
+ /* Do we need to re-calculate counters? */
+ if (!c)
+ goto recalculate;
+@@ -598,12 +658,18 @@
+ * switching to the next task, save this fact in
+ * sched_data.
+ */
+- sched_data->curr = next;
++ cpu_curr(this_cpu) = next;
+ #ifdef CONFIG_SMP
+ RCU_quiescent(this_cpu)++;
+
+ next->has_cpu = 1;
+ next->processor = this_cpu;
++#ifdef CONFIG_NUMA_SCHED
++ if (next != idle_task(this_cpu) && next->nid != numa_node_id()) {
++ next->nid = numa_node_id();
++ move_last_runqueue(next);
++ }
++#endif
+ #endif
+ spin_unlock_irq(&runqueue_lock);
+
+@@ -621,7 +687,7 @@
+ * and it's approximate, so we do not have to maintain
+ * it while holding the runqueue spinlock.
+ */
+- sched_data->last_schedule = get_cycles();
++ last_schedule(this_cpu) = get_last_schedule();
+
+ /*
+ * We drop the scheduler lock early (it's a global spinlock),
+@@ -680,8 +746,13 @@
+ struct task_struct *p;
+ spin_unlock_irq(&runqueue_lock);
+ read_lock(&tasklist_lock);
+- for_each_task(p)
++ for_each_task(p) {
++#ifdef CONFIG_NUMA_SCHED
++ if (!recalculate_all && p->nid != numa_node_id())
++ continue;
++#endif
+ p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
++ }
+ read_unlock(&tasklist_lock);
+ spin_lock_irq(&runqueue_lock);
+ }
+@@ -1062,7 +1133,7 @@
+ // Subtract non-idle processes running on other CPUs.
+ for (i = 0; i < smp_num_cpus; i++) {
+ int cpu = cpu_logical_map(i);
+- if (cpu_curr(cpu) != idle_task(cpu))
++ if (cpu_curr(cpu) != logical_idle_task(i))
+ nr_pending--;
+ }
+ #else
+@@ -1319,16 +1390,15 @@
+
+ void __init init_idle(void)
+ {
+- schedule_data_t * sched_data;
+- sched_data = &schedule_data[smp_processor_id()];
++ int cpu = smp_processor_id();
+
+ if (current != &init_task && task_on_runqueue(current)) {
+ printk("UGH! (%d:%d) was on the runqueue, removing.\n",
+ smp_processor_id(), current->pid);
+ del_from_runqueue(current);
+ }
+- sched_data->curr = current;
+- sched_data->last_schedule = get_cycles();
++ cpu_curr(cpu) = current;
++ last_schedule(cpu) = get_last_schedule();
+ clear_bit(current->processor, &wait_init_idle);
+ }
+
+@@ -1359,4 +1429,6 @@
+ */
+ atomic_inc(&init_mm.mm_count);
+ enter_lazy_tlb(&init_mm, current, cpu);
++
++ init_numa_schedule_data();
+ }
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-8 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-8
new file mode 100644
index 000000000000..3cafb1c56042
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-8
@@ -0,0 +1,61 @@
+diff -urN parent-timeslice-ref/include/linux/sched.h parent-timeslice/include/linux/sched.h
+--- parent-timeslice-ref/include/linux/sched.h Wed Oct 24 13:18:54 2001
++++ parent-timeslice/include/linux/sched.h Wed Oct 24 13:19:00 2001
+@@ -317,6 +317,7 @@
+ #ifdef CONFIG_NUMA_SCHED
+ int nid;
+ #endif
++ int get_child_timeslice;
+ struct task_struct *next_task, *prev_task;
+ struct mm_struct *active_mm;
+ struct rw_sem_recursor mm_recursor;
+diff -urN parent-timeslice-ref/kernel/exit.c parent-timeslice/kernel/exit.c
+--- parent-timeslice-ref/kernel/exit.c Wed Oct 24 08:04:27 2001
++++ parent-timeslice/kernel/exit.c Wed Oct 24 13:19:35 2001
+@@ -61,9 +61,11 @@
+ * timeslices, because any timeslice recovered here
+ * was given away by the parent in the first place.)
+ */
+- current->counter += p->counter;
+- if (current->counter >= MAX_COUNTER)
+- current->counter = MAX_COUNTER;
++ if (p->get_child_timeslice) {
++ current->counter += p->counter;
++ if (current->counter >= MAX_COUNTER)
++ current->counter = MAX_COUNTER;
++ }
+ p->pid = 0;
+ free_task_struct(p);
+ } else {
+@@ -164,6 +166,7 @@
+ p->exit_signal = SIGCHLD;
+ p->self_exec_id++;
+ p->p_opptr = child_reaper;
++ p->get_child_timeslice = 0;
+ if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
+ }
+ }
+diff -urN parent-timeslice-ref/kernel/fork.c parent-timeslice/kernel/fork.c
+--- parent-timeslice-ref/kernel/fork.c Wed Oct 24 13:18:54 2001
++++ parent-timeslice/kernel/fork.c Wed Oct 24 13:19:00 2001
+@@ -682,6 +682,9 @@
+ if (!current->counter)
+ current->need_resched = 1;
+
++ /* Tell the parent if it can get back its timeslice when child exits */
++ p->get_child_timeslice = 1;
++
+ /*
+ * Ok, add it to the run-queues and make it
+ * visible to the rest of the system.
+diff -urN parent-timeslice-ref/kernel/sched.c parent-timeslice/kernel/sched.c
+--- parent-timeslice-ref/kernel/sched.c Wed Oct 24 13:18:54 2001
++++ parent-timeslice/kernel/sched.c Wed Oct 24 13:19:00 2001
+@@ -758,6 +758,7 @@
+ continue;
+ #endif
+ p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice);
++ p->get_child_timeslice = 0;
+ }
+ read_unlock(&tasklist_lock);
+ spin_lock_irq(&runqueue_lock);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-13 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-13
new file mode 100644
index 000000000000..40da91b65304
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-13
@@ -0,0 +1,1595 @@
+diff -urN vm-ref/arch/i386/config.in vm/arch/i386/config.in
+--- vm-ref/arch/i386/config.in Fri Nov 9 08:29:24 2001
++++ vm/arch/i386/config.in Fri Nov 9 08:29:33 2001
+@@ -404,6 +404,7 @@
+ bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ
+ bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK
+ bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE
++ bool ' Debug allocation faliures' CONFIG_DEBUG_GFP
+ fi
+
+ endmenu
+diff -urN vm-ref/arch/sparc/kernel/sys_sunos.c vm/arch/sparc/kernel/sys_sunos.c
+--- vm-ref/arch/sparc/kernel/sys_sunos.c Thu Aug 16 22:03:25 2001
++++ vm/arch/sparc/kernel/sys_sunos.c Fri Nov 9 08:29:33 2001
+@@ -193,7 +193,7 @@
+ * fool it, but this should catch most mistakes.
+ */
+ freepages = atomic_read(&buffermem_pages) >> PAGE_SHIFT;
+- freepages += atomic_read(&page_cache_size);
++ freepages += page_cache_size;
+ freepages >>= 1;
+ freepages += nr_free_pages();
+ freepages += nr_swap_pages;
+diff -urN vm-ref/arch/sparc64/kernel/sys_sunos32.c vm/arch/sparc64/kernel/sys_sunos32.c
+--- vm-ref/arch/sparc64/kernel/sys_sunos32.c Thu Aug 16 22:03:26 2001
++++ vm/arch/sparc64/kernel/sys_sunos32.c Fri Nov 9 08:29:33 2001
+@@ -157,7 +157,7 @@
+ * fool it, but this should catch most mistakes.
+ */
+ freepages = atomic_read(&buffermem_pages) >> PAGE_SHIFT;
+- freepages += atomic_read(&page_cache_size);
++ freepages += page_cache_size;
+ freepages >>= 1;
+ freepages += nr_free_pages();
+ freepages += nr_swap_pages;
+diff -urN vm-ref/fs/buffer.c vm/fs/buffer.c
+--- vm-ref/fs/buffer.c Fri Nov 9 08:29:25 2001
++++ vm/fs/buffer.c Fri Nov 9 08:29:33 2001
+@@ -115,7 +115,7 @@
+ int dummy5; /* unused */
+ } b_un;
+ unsigned int data[N_PARAM];
+-} bdf_prm = {{40, 0, 0, 0, 5*HZ, 30*HZ, 60, 0, 0}};
++} bdf_prm = {{20, 0, 0, 0, 5*HZ, 30*HZ, 40, 0, 0}};
+
+ /* These are the min and max parameter values that we will allow to be assigned */
+ int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 1*HZ, 0, 0, 0};
+@@ -124,7 +124,6 @@
+ void unlock_buffer(struct buffer_head *bh)
+ {
+ clear_bit(BH_Wait_IO, &bh->b_state);
+- clear_bit(BH_launder, &bh->b_state);
+ clear_bit(BH_Lock, &bh->b_state);
+ smp_mb__after_clear_bit();
+ if (waitqueue_active(&bh->b_wait))
+@@ -179,6 +178,7 @@
+ do {
+ struct buffer_head * bh = *array++;
+ bh->b_end_io = end_buffer_io_sync;
++ clear_bit(BH_Pending_IO, &bh->b_state);
+ submit_bh(WRITE, bh);
+ } while (--count);
+ }
+@@ -211,6 +211,7 @@
+ if (atomic_set_buffer_clean(bh)) {
+ __refile_buffer(bh);
+ get_bh(bh);
++ set_bit(BH_Pending_IO, &bh->b_state);
+ array[count++] = bh;
+ if (count < NRSYNC)
+ continue;
+@@ -238,7 +239,6 @@
+ conditional_schedule();
+ spin_lock(&lru_list_lock);
+ } while (write_some_buffers(dev));
+- run_task_queue(&tq_disk);
+ }
+
+ /*
+@@ -710,12 +710,8 @@
+
+ static void free_more_memory(void)
+ {
+- zone_t * zone = contig_page_data.node_zonelists[GFP_NOFS & GFP_ZONEMASK].zones[0];
+-
+- balance_dirty();
+ wakeup_bdflush();
+- try_to_free_pages(zone, GFP_NOFS, 0);
+- run_task_queue(&tq_disk);
++ try_to_free_pages_nozone(GFP_NOIO);
+ current->policy |= SCHED_YIELD;
+ __set_current_state(TASK_RUNNING);
+ schedule();
+@@ -1057,19 +1053,17 @@
+ if (state < 0)
+ return;
+
+- /* If we're getting into imbalance, start write-out */
+- spin_lock(&lru_list_lock);
+- write_some_buffers(NODEV);
++ wakeup_bdflush();
+
+ /*
+ * And if we're _really_ out of balance, wait for
+- * some of the dirty/locked buffers ourselves and
+- * start bdflush.
++ * some of the dirty/locked buffers ourselves.
+ * This will throttle heavy writers.
+ */
+ if (state > 0) {
++ spin_lock(&lru_list_lock);
++ write_some_buffers(NODEV);
+ wait_for_some_buffers(NODEV);
+- wakeup_bdflush();
+ }
+ }
+
+@@ -2376,23 +2370,28 @@
+ return 1;
+ }
+
+-static int sync_page_buffers(struct buffer_head *head, unsigned int gfp_mask)
++static int sync_page_buffers(struct buffer_head *head)
+ {
+ struct buffer_head * bh = head;
+- int tryagain = 0;
++ int tryagain = 1;
+
+ do {
+ if (!buffer_dirty(bh) && !buffer_locked(bh))
+ continue;
+
++ if (unlikely(buffer_pending_IO(bh))) {
++ tryagain = 0;
++ continue;
++ }
++
+ /* Don't start IO first time around.. */
+- if (!test_and_set_bit(BH_Wait_IO, &bh->b_state))
++ if (!test_and_set_bit(BH_Wait_IO, &bh->b_state)) {
++ tryagain = 0;
+ continue;
++ }
+
+ /* Second time through we start actively writing out.. */
+ if (test_and_set_bit(BH_Lock, &bh->b_state)) {
+- if (!test_bit(BH_launder, &bh->b_state))
+- continue;
+ wait_on_buffer(bh);
+ tryagain = 1;
+ continue;
+@@ -2405,7 +2404,6 @@
+
+ __mark_buffer_clean(bh);
+ get_bh(bh);
+- set_bit(BH_launder, &bh->b_state);
+ bh->b_end_io = end_buffer_io_sync;
+ submit_bh(WRITE, bh);
+ tryagain = 0;
+@@ -2479,7 +2477,7 @@
+ spin_unlock(&lru_list_lock);
+ if (gfp_mask & __GFP_IO) {
+ if ((gfp_mask & __GFP_HIGHIO) || !PageHighMem(page)) {
+- if (sync_page_buffers(bh, gfp_mask)) {
++ if (sync_page_buffers(bh)) {
+ /* no IO or waiting next time */
+ gfp_mask = 0;
+ goto cleaned_buffers_try_again;
+@@ -2730,7 +2728,7 @@
+
+ spin_lock(&lru_list_lock);
+ if (!write_some_buffers(NODEV) || balance_dirty_state() < 0) {
+- wait_for_some_buffers(NODEV);
++ run_task_queue(&tq_disk);
+ interruptible_sleep_on(&bdflush_wait);
+ }
+ }
+@@ -2761,8 +2759,6 @@
+ complete((struct completion *)startup);
+
+ for (;;) {
+- wait_for_some_buffers(NODEV);
+-
+ /* update interval */
+ interval = bdf_prm.b_un.interval;
+ if (interval) {
+@@ -2790,6 +2786,7 @@
+ printk(KERN_DEBUG "kupdate() activated...\n");
+ #endif
+ sync_old_buffers();
++ run_task_queue(&tq_disk);
+ }
+ }
+
+diff -urN vm-ref/fs/proc/proc_misc.c vm/fs/proc/proc_misc.c
+--- vm-ref/fs/proc/proc_misc.c Wed Oct 24 08:04:23 2001
++++ vm/fs/proc/proc_misc.c Fri Nov 9 08:29:33 2001
+@@ -149,7 +149,7 @@
+ #define B(x) ((unsigned long long)(x) << PAGE_SHIFT)
+ si_meminfo(&i);
+ si_swapinfo(&i);
+- pg_size = atomic_read(&page_cache_size) - i.bufferram ;
++ pg_size = page_cache_size - i.bufferram;
+
+ len = sprintf(page, " total: used: free: shared: buffers: cached:\n"
+ "Mem: %8Lu %8Lu %8Lu %8Lu %8Lu %8Lu\n"
+diff -urN vm-ref/include/linux/fs.h vm/include/linux/fs.h
+--- vm-ref/include/linux/fs.h Fri Nov 9 08:29:24 2001
++++ vm/include/linux/fs.h Fri Nov 9 08:29:33 2001
+@@ -215,7 +215,7 @@
+ BH_New, /* 1 if the buffer is new and not yet written out */
+ BH_Async, /* 1 if the buffer is under end_buffer_io_async I/O */
+ BH_Wait_IO, /* 1 if we should write out this buffer */
+- BH_launder, /* 1 if we should throttle on this buffer */
++ BH_Pending_IO, /* 1 if the buffer is locked but not in the I/O queue yet */
+
+ BH_PrivateStart,/* not a state bit, but the first bit available
+ * for private allocation by other entities
+@@ -276,6 +276,7 @@
+ #define buffer_mapped(bh) __buffer_state(bh,Mapped)
+ #define buffer_new(bh) __buffer_state(bh,New)
+ #define buffer_async(bh) __buffer_state(bh,Async)
++#define buffer_pending_IO(bh) __buffer_state(bh,Pending_IO)
+
+ #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK)
+
+diff -urN vm-ref/include/linux/mm.h vm/include/linux/mm.h
+--- vm-ref/include/linux/mm.h Fri Nov 9 08:29:24 2001
++++ vm/include/linux/mm.h Fri Nov 9 08:29:33 2001
+@@ -294,8 +294,10 @@
+ #define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags)
+ #define PageChecked(page) test_bit(PG_checked, &(page)->flags)
+ #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags)
++
+ #define PageLaunder(page) test_bit(PG_launder, &(page)->flags)
+ #define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags)
++#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags)
+
+ extern void FASTCALL(set_page_dirty(struct page *));
+
+@@ -391,6 +393,8 @@
+ #define __free_page(page) __free_pages((page), 0)
+ #define free_page(addr) free_pages((addr),0)
+
++extern int start_aggressive_readahead(unsigned int);
++
+ extern void show_free_areas(void);
+ extern void show_free_areas_node(pg_data_t *pgdat);
+
+@@ -451,8 +455,8 @@
+ return page_count(page) - !!page->buffers == 1;
+ }
+
+-extern int can_share_swap_page(struct page *);
+-extern int remove_exclusive_swap_page(struct page *);
++extern int FASTCALL(make_exclusive_page(struct page *, int));
++extern int FASTCALL(remove_exclusive_swap_page(struct page *));
+
+ extern void __free_pte(pte_t);
+
+diff -urN vm-ref/include/linux/mmzone.h vm/include/linux/mmzone.h
+--- vm-ref/include/linux/mmzone.h Mon Nov 5 05:26:23 2001
++++ vm/include/linux/mmzone.h Fri Nov 9 08:29:33 2001
+@@ -40,7 +40,17 @@
+ spinlock_t lock;
+ unsigned long free_pages;
+ unsigned long pages_min, pages_low, pages_high;
+- int need_balance;
++ /*
++ * The below fields are protected by different locks (or by
++ * no lock at all like need_balance), so they're longs to
++ * provide an atomic granularity against each other on
++ * all architectures.
++ */
++ unsigned long need_balance;
++ /* protected by the pagemap_lru_lock */
++ unsigned long nr_active_pages, nr_inactive_pages;
++ /* protected by the pagecache_lock */
++ unsigned long nr_cache_pages;
+
+ /*
+ * free areas of different sizes
+@@ -113,8 +123,8 @@
+ extern int numnodes;
+ extern pg_data_t *pgdat_list;
+
+-#define memclass(pgzone, classzone) (((pgzone)->zone_pgdat == (classzone)->zone_pgdat) \
+- && ((pgzone) <= (classzone)))
++#define memclass(pgzone, classzone) \
++ (((pgzone) - (pgzone)->zone_pgdat->node_zones) <= ((classzone) - (classzone)->zone_pgdat->node_zones))
+
+ /*
+ * The following two are not meant for general usage. They are here as
+diff -urN vm-ref/include/linux/pagemap.h vm/include/linux/pagemap.h
+--- vm-ref/include/linux/pagemap.h Tue Nov 6 02:04:54 2001
++++ vm/include/linux/pagemap.h Fri Nov 9 08:29:33 2001
+@@ -45,7 +45,7 @@
+ #define PAGE_HASH_BITS (page_hash_bits)
+ #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS)
+
+-extern atomic_t page_cache_size; /* # of pages currently in the hash table */
++extern unsigned long page_cache_size; /* # of pages currently in the hash table */
+ extern struct page **page_hash_table;
+
+ extern void page_cache_init(unsigned long);
+diff -urN vm-ref/include/linux/sched.h vm/include/linux/sched.h
+--- vm-ref/include/linux/sched.h Fri Nov 9 08:29:24 2001
++++ vm/include/linux/sched.h Fri Nov 9 08:29:33 2001
+@@ -280,6 +280,14 @@
+ extern struct user_struct root_user;
+ #define INIT_USER (&root_user)
+
++struct zone_struct;
++
++struct local_pages {
++ struct list_head list;
++ unsigned int order, nr;
++ struct zone_struct * classzone;
++};
++
+ struct task_struct {
+ /*
+ * offsets of these are hardcoded elsewhere - touch with care
+@@ -318,8 +326,7 @@
+ struct task_struct *next_task, *prev_task;
+ struct mm_struct *active_mm;
+ struct rw_sem_recursor mm_recursor;
+- struct list_head local_pages;
+- unsigned int allocation_order, nr_local_pages;
++ struct local_pages local_pages;
+
+ /* task state */
+ struct linux_binfmt *binfmt;
+@@ -416,7 +423,6 @@
+ #define PF_DUMPCORE 0x00000200 /* dumped core */
+ #define PF_SIGNALED 0x00000400 /* killed by a signal */
+ #define PF_MEMALLOC 0x00000800 /* Allocating memory */
+-#define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */
+ #define PF_FREE_PAGES 0x00002000 /* per process page freeing */
+
+ #define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */
+diff -urN vm-ref/include/linux/swap.h vm/include/linux/swap.h
+--- vm-ref/include/linux/swap.h Fri Nov 9 08:29:25 2001
++++ vm/include/linux/swap.h Fri Nov 9 08:29:33 2001
+@@ -88,7 +88,7 @@
+ extern int nr_active_pages;
+ extern int nr_inactive_pages;
+ extern atomic_t nr_async_pages;
+-extern atomic_t page_cache_size;
++extern unsigned long page_cache_size;
+ extern atomic_t buffermem_pages;
+
+ extern spinlock_cacheline_t pagecache_lock_cacheline;
+@@ -115,6 +115,8 @@
+ /* linux/mm/vmscan.c */
+ extern wait_queue_head_t kswapd_wait;
+ extern int FASTCALL(try_to_free_pages(zone_t *, unsigned int, unsigned int));
++extern int FASTCALL(try_to_free_pages_nozone(unsigned int));
++extern int vm_scan_ratio, vm_balance_ratio, vm_mapped_ratio;
+
+ /* linux/mm/page_io.c */
+ extern void rw_swap_page(int, struct page *);
+@@ -178,32 +180,128 @@
+ BUG(); \
+ } while (0)
+
++#define inc_nr_active_pages(page) \
++do { \
++ pg_data_t * __pgdat; \
++ zone_t * __classzone, * __overflow; \
++ \
++ __classzone = (page)->zone; \
++ __pgdat = __classzone->zone_pgdat; \
++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \
++ \
++ while (__classzone < __overflow) { \
++ __classzone->nr_active_pages++; \
++ __classzone++; \
++ } \
++ nr_active_pages++; \
++} while (0)
++
++#define dec_nr_active_pages(page) \
++do { \
++ pg_data_t * __pgdat; \
++ zone_t * __classzone, * __overflow; \
++ \
++ __classzone = (page)->zone; \
++ __pgdat = __classzone->zone_pgdat; \
++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \
++ \
++ while (__classzone < __overflow) { \
++ __classzone->nr_active_pages--; \
++ __classzone++; \
++ } \
++ nr_active_pages--; \
++} while (0)
++
++#define inc_nr_inactive_pages(page) \
++do { \
++ pg_data_t * __pgdat; \
++ zone_t * __classzone, * __overflow; \
++ \
++ __classzone = (page)->zone; \
++ __pgdat = __classzone->zone_pgdat; \
++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \
++ \
++ while (__classzone < __overflow) { \
++ __classzone->nr_inactive_pages++; \
++ __classzone++; \
++ } \
++ nr_inactive_pages++; \
++} while (0)
++
++#define dec_nr_inactive_pages(page) \
++do { \
++ pg_data_t * __pgdat; \
++ zone_t * __classzone, * __overflow; \
++ \
++ __classzone = (page)->zone; \
++ __pgdat = __classzone->zone_pgdat; \
++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \
++ \
++ while (__classzone < __overflow) { \
++ __classzone->nr_inactive_pages--; \
++ __classzone++; \
++ } \
++ nr_inactive_pages--; \
++} while (0)
++
+ #define add_page_to_active_list(page) \
+ do { \
+ DEBUG_LRU_PAGE(page); \
+ SetPageActive(page); \
+ list_add(&(page)->lru, &active_list); \
+- nr_active_pages++; \
++ inc_nr_active_pages(page); \
+ } while (0)
+
+ #define add_page_to_inactive_list(page) \
+ do { \
+ DEBUG_LRU_PAGE(page); \
+ list_add(&(page)->lru, &inactive_list); \
+- nr_inactive_pages++; \
++ inc_nr_inactive_pages(page); \
+ } while (0)
+
+ #define del_page_from_active_list(page) \
+ do { \
+ list_del(&(page)->lru); \
+ ClearPageActive(page); \
+- nr_active_pages--; \
++ dec_nr_active_pages(page); \
+ } while (0)
+
+ #define del_page_from_inactive_list(page) \
+ do { \
+ list_del(&(page)->lru); \
+- nr_inactive_pages--; \
++ dec_nr_inactive_pages(page); \
++} while (0)
++
++#define inc_nr_cache_pages(page) \
++do { \
++ pg_data_t * __pgdat; \
++ zone_t * __classzone, * __overflow; \
++ \
++ __classzone = (page)->zone; \
++ __pgdat = __classzone->zone_pgdat; \
++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \
++ \
++ while (__classzone < __overflow) { \
++ __classzone->nr_cache_pages++; \
++ __classzone++; \
++ } \
++ page_cache_size++; \
++} while (0)
++
++#define dec_nr_cache_pages(page) \
++do { \
++ pg_data_t * __pgdat; \
++ zone_t * __classzone, * __overflow; \
++ \
++ __classzone = (page)->zone; \
++ __pgdat = __classzone->zone_pgdat; \
++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \
++ \
++ while (__classzone < __overflow) { \
++ __classzone->nr_cache_pages--; \
++ __classzone++; \
++ } \
++ page_cache_size--; \
+ } while (0)
+
+ /*
+diff -urN vm-ref/include/linux/sysctl.h vm/include/linux/sysctl.h
+--- vm-ref/include/linux/sysctl.h Fri Nov 9 08:29:24 2001
++++ vm/include/linux/sysctl.h Fri Nov 9 08:29:33 2001
+@@ -134,12 +134,13 @@
+ VM_FREEPG=3, /* struct: Set free page thresholds */
+ VM_BDFLUSH=4, /* struct: Control buffer cache flushing */
+ VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */
+- VM_BUFFERMEM=6, /* struct: Set buffer memory thresholds */
+- VM_PAGECACHE=7, /* struct: Set cache memory thresholds */
+ VM_PAGERDAEMON=8, /* struct: Control kswapd behaviour */
+ VM_PGT_CACHE=9, /* struct: Set page table cache parameters */
+ VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */
+ VM_HEAP_STACK_GAP=11, /* int: page gap between heap and stack */
++ VM_SCAN_RATIO=12, /* part of the inactive list to scan */
++ VM_BALANCE_RATIO=13, /* balance active and inactive caches */
++ VM_MAPPED_RATIO=14, /* pageout when we find too many mapped pages */
+ };
+
+
+diff -urN vm-ref/kernel/fork.c vm/kernel/fork.c
+--- vm-ref/kernel/fork.c Sun Sep 23 21:11:43 2001
++++ vm/kernel/fork.c Fri Nov 9 08:29:33 2001
+@@ -649,7 +649,7 @@
+ p->lock_depth = -1; /* -1 = no lock */
+ p->start_time = jiffies;
+
+- INIT_LIST_HEAD(&p->local_pages);
++ INIT_LIST_HEAD(&p->local_pages.list);
+
+ retval = -ENOMEM;
+ /* copy all the process information */
+diff -urN vm-ref/kernel/ksyms.c vm/kernel/ksyms.c
+--- vm-ref/kernel/ksyms.c Fri Nov 9 08:29:24 2001
++++ vm/kernel/ksyms.c Fri Nov 9 08:29:33 2001
+@@ -89,6 +89,7 @@
+ EXPORT_SYMBOL(exit_sighand);
+
+ /* internal kernel memory management */
++EXPORT_SYMBOL(start_aggressive_readahead);
+ EXPORT_SYMBOL(_alloc_pages);
+ EXPORT_SYMBOL(__alloc_pages);
+ EXPORT_SYMBOL(alloc_pages_node);
+diff -urN vm-ref/kernel/sysctl.c vm/kernel/sysctl.c
+--- vm-ref/kernel/sysctl.c Fri Nov 9 08:29:24 2001
++++ vm/kernel/sysctl.c Fri Nov 9 08:29:33 2001
+@@ -30,6 +30,7 @@
+ #include <linux/init.h>
+ #include <linux/sysrq.h>
+ #include <linux/highuid.h>
++#include <linux/swap.h>
+
+ #include <asm/uaccess.h>
+
+@@ -259,6 +260,12 @@
+ };
+
+ static ctl_table vm_table[] = {
++ {VM_SCAN_RATIO, "vm_scan_ratio",
++ &vm_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
++ {VM_BALANCE_RATIO, "vm_balance_ratio",
++ &vm_balance_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
++ {VM_MAPPED_RATIO, "vm_mapped_ratio",
++ &vm_mapped_ratio, sizeof(int), 0644, NULL, &proc_dointvec},
+ {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL,
+ &proc_dointvec_minmax, &sysctl_intvec, NULL,
+ &bdflush_min, &bdflush_max},
+diff -urN vm-ref/mm/filemap.c vm/mm/filemap.c
+--- vm-ref/mm/filemap.c Fri Nov 9 08:29:25 2001
++++ vm/mm/filemap.c Fri Nov 9 08:29:33 2001
+@@ -43,7 +43,7 @@
+ * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli <andrea@suse.de>
+ */
+
+-atomic_t page_cache_size = ATOMIC_INIT(0);
++unsigned long page_cache_size;
+ unsigned int page_hash_bits;
+ struct page **page_hash_table;
+
+@@ -75,7 +75,7 @@
+ next->pprev_hash = &page->next_hash;
+ if (page->buffers)
+ PAGE_BUG(page);
+- atomic_inc(&page_cache_size);
++ inc_nr_cache_pages(page);
+ }
+
+ static inline void add_page_to_inode_queue(struct address_space *mapping, struct page * page)
+@@ -105,7 +105,7 @@
+ next->pprev_hash = pprev;
+ *pprev = next;
+ page->pprev_hash = NULL;
+- atomic_dec(&page_cache_size);
++ dec_nr_cache_pages(page);
+ }
+
+ /*
+@@ -781,7 +781,7 @@
+
+ void unlock_page(struct page *page)
+ {
+- clear_bit(PG_launder, &(page)->flags);
++ ClearPageLaunder(page);
+ smp_mb__before_clear_bit();
+ if (!test_and_clear_bit(PG_locked, &(page)->flags))
+ BUG();
+@@ -1914,8 +1914,7 @@
+ * Found the page and have a reference on it, need to check sharing
+ * and possibly copy it over to another page..
+ */
+- mark_page_accessed(page);
+- flush_page_to_ram(page);
++ activate_page(page);
+ return page;
+
+ no_cached_page:
+@@ -3017,8 +3016,15 @@
+ }
+ unlock:
+ kunmap(page);
++
++ /*
++ * Mark the page accessed if we wrote the
++ * beginning or we just did an lseek.
++ */
++ if (!offset || !file->f_reada)
++ SetPageReferenced(page);
++
+ /* Mark it unlocked again and drop the page.. */
+- SetPageReferenced(page);
+ UnlockPage(page);
+ page_cache_release(page);
+
+diff -urN vm-ref/mm/memory.c vm/mm/memory.c
+--- vm-ref/mm/memory.c Fri Nov 9 08:29:24 2001
++++ vm/mm/memory.c Fri Nov 9 08:29:33 2001
+@@ -913,15 +913,11 @@
+ if (!VALID_PAGE(old_page))
+ goto bad_wp_page;
+
+- if (!TryLockPage(old_page)) {
+- int reuse = can_share_swap_page(old_page);
+- unlock_page(old_page);
+- if (reuse) {
+- flush_cache_page(vma, address);
+- establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
+- spin_unlock(&mm->page_table_lock);
+- return 1; /* Minor fault */
+- }
++ if (make_exclusive_page(old_page, 1)) {
++ flush_cache_page(vma, address);
++ establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte))));
++ spin_unlock(&mm->page_table_lock);
++ return 1; /* Minor fault */
+ }
+
+ /*
+@@ -939,6 +935,19 @@
+ * Re-check the pte - we dropped the lock
+ */
+ spin_lock(&mm->page_table_lock);
++ /*
++ * keep the page pinned until we return runnable
++ * to avoid another thread to skip the break_cow
++ * path, so we're sure pte_same below check also implys
++ * that the _contents_ of the old_page didn't changed
++ * under us (not only that the pagetable is the same).
++ *
++ * Since we have the page_table_lock acquired here, if the
++ * pte is the same it means we're still holding an additional
++ * reference on the old_page so we can safely
++ * page_cache_release(old_page) before the "pte_same == true" path.
++ */
++ page_cache_release(old_page);
+ if (pte_same(*page_table, pte)) {
+ if (PageReserved(old_page))
+ ++mm->rss;
+@@ -950,7 +959,6 @@
+ }
+ spin_unlock(&mm->page_table_lock);
+ page_cache_release(new_page);
+- page_cache_release(old_page);
+ return 1; /* Minor fault */
+
+ bad_wp_page:
+@@ -1106,7 +1114,8 @@
+ ret = 2;
+ }
+
+- lock_page(page);
++ if (!Page_Uptodate(page))
++ wait_on_page(page);
+
+ /*
+ * Back out if somebody else faulted in this pte while we
+@@ -1115,7 +1124,6 @@
+ spin_lock(&mm->page_table_lock);
+ if (!pte_same(*page_table, orig_pte)) {
+ spin_unlock(&mm->page_table_lock);
+- unlock_page(page);
+ page_cache_release(page);
+ return 1;
+ }
+@@ -1123,14 +1131,14 @@
+ /* The page isn't present yet, go ahead with the fault. */
+
+ swap_free(entry);
+- if (vm_swap_full())
+- remove_exclusive_swap_page(page);
+-
+ mm->rss++;
+ pte = mk_pte(page, vma->vm_page_prot);
+- if (write_access && can_share_swap_page(page))
+- pte = pte_mkdirty(pte_mkwrite(pte));
+- unlock_page(page);
++ if (make_exclusive_page(page, write_access)) {
++ if (write_access)
++ pte = pte_mkdirty(pte);
++ if (vma->vm_flags & VM_WRITE)
++ pte = pte_mkwrite(pte);
++ }
+
+ flush_page_to_ram(page);
+ flush_icache_page(vma, page);
+@@ -1168,8 +1176,8 @@
+
+ spin_lock(&mm->page_table_lock);
+ if (!pte_none(*page_table)) {
+- page_cache_release(page);
+ spin_unlock(&mm->page_table_lock);
++ page_cache_release(page);
+ return 1;
+ }
+ mm->rss++;
+@@ -1225,7 +1233,7 @@
+ struct page * page = alloc_page(GFP_HIGHUSER);
+ if (!page)
+ return -1;
+- copy_highpage(page, new_page);
++ copy_user_highpage(page, new_page, address);
+ page_cache_release(new_page);
+ lru_cache_add(page);
+ new_page = page;
+@@ -1252,9 +1260,9 @@
+ entry = pte_mkwrite(pte_mkdirty(entry));
+ set_pte(page_table, entry);
+ } else {
++ spin_unlock(&mm->page_table_lock);
+ /* One of our sibling threads was faster, back out. */
+ page_cache_release(new_page);
+- spin_unlock(&mm->page_table_lock);
+ return 1;
+ }
+
+diff -urN vm-ref/mm/mmap.c vm/mm/mmap.c
+--- vm-ref/mm/mmap.c Fri Nov 9 08:29:24 2001
++++ vm/mm/mmap.c Fri Nov 9 08:29:33 2001
+@@ -69,7 +69,7 @@
+ return 1;
+
+ /* The page cache contains buffer pages these days.. */
+- free = atomic_read(&page_cache_size);
++ free = page_cache_size;
+ free += nr_free_pages();
+ free += nr_swap_pages;
+
+diff -urN vm-ref/mm/oom_kill.c vm/mm/oom_kill.c
+--- vm-ref/mm/oom_kill.c Tue Nov 6 02:04:54 2001
++++ vm/mm/oom_kill.c Fri Nov 9 08:29:33 2001
+@@ -150,7 +150,6 @@
+ * exit() and clear out its resources quickly...
+ */
+ p->counter = 5 * HZ;
+- p->flags |= PF_MEMALLOC | PF_MEMDIE;
+
+ /* This process has hardware access, be more careful. */
+ if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) {
+diff -urN vm-ref/mm/page_alloc.c vm/mm/page_alloc.c
+--- vm-ref/mm/page_alloc.c Fri Nov 9 08:29:24 2001
++++ vm/mm/page_alloc.c Fri Nov 9 08:29:33 2001
+@@ -138,14 +138,14 @@
+ return;
+
+ local_freelist:
+- if (current->nr_local_pages)
++ if ((current->local_pages.nr && !current->local_pages.order) ||
++ !memclass(page->zone, current->local_pages.classzone) ||
++ in_interrupt())
+ goto back_local_freelist;
+- if (in_interrupt())
+- goto back_local_freelist;
+
+- list_add(&page->list, &current->local_pages);
++ list_add(&page->list, &current->local_pages.list);
+ page->index = order;
+- current->nr_local_pages++;
++ current->local_pages.nr++;
+ }
+
+ #define MARK_USED(index, order, area) \
+@@ -230,35 +230,36 @@
+ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
+ {
+ struct page * page = NULL;
+- int __freed = 0;
++ int __freed;
+
+- if (!(gfp_mask & __GFP_WAIT))
+- goto out;
+ if (in_interrupt())
+ BUG();
+
+- current->allocation_order = order;
++ current->local_pages.order = order;
++ current->local_pages.classzone = classzone;
+ current->flags |= PF_MEMALLOC | PF_FREE_PAGES;
+
+ __freed = try_to_free_pages(classzone, gfp_mask, order);
+
+ current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES);
+
+- if (current->nr_local_pages) {
++ if (current->local_pages.nr) {
+ struct list_head * entry, * local_pages;
+ struct page * tmp;
+ int nr_pages;
+
+- local_pages = &current->local_pages;
++ local_pages = &current->local_pages.list;
+
+ if (likely(__freed)) {
+ /* pick from the last inserted so we're lifo */
+ entry = local_pages->next;
+ do {
+ tmp = list_entry(entry, struct page, list);
+- if (tmp->index == order && memclass(tmp->zone, classzone)) {
++ if (!memclass(tmp->zone, classzone))
++ BUG();
++ if (tmp->index == order) {
+ list_del(entry);
+- current->nr_local_pages--;
++ current->local_pages.nr--;
+ set_page_count(tmp, 1);
+ page = tmp;
+
+@@ -284,7 +285,7 @@
+ } while ((entry = entry->next) != local_pages);
+ }
+
+- nr_pages = current->nr_local_pages;
++ nr_pages = current->local_pages.nr;
+ /* free in reverse order so that the global order will be lifo */
+ while ((entry = local_pages->prev) != local_pages) {
+ list_del(entry);
+@@ -293,9 +294,8 @@
+ if (!nr_pages--)
+ BUG();
+ }
+- current->nr_local_pages = 0;
++ current->local_pages.nr = 0;
+ }
+- out:
+ *freed = __freed;
+ return page;
+ }
+@@ -353,8 +353,7 @@
+
+ /* here we're in the low on memory slow path */
+
+-rebalance:
+- if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) {
++ if (current->flags & PF_MEMALLOC && !in_interrupt()) {
+ zone = zonelist->zones;
+ for (;;) {
+ zone_t *z = *(zone++);
+@@ -370,34 +369,52 @@
+
+ /* Atomic allocations - we can't balance anything */
+ if (!(gfp_mask & __GFP_WAIT))
+- return NULL;
++ goto out;
+
++ rebalance:
+ page = balance_classzone(classzone, gfp_mask, order, &freed);
+ if (page)
+ return page;
+
+ zone = zonelist->zones;
+- for (;;) {
+- zone_t *z = *(zone++);
+- if (!z)
+- break;
++ if (likely(freed)) {
++ for (;;) {
++ zone_t *z = *(zone++);
++ if (!z)
++ break;
+
+- if (zone_free_pages(z, order) > z->pages_min) {
+- page = rmqueue(z, order);
+- if (page)
+- return page;
++ if (zone_free_pages(z, order) > z->pages_min) {
++ page = rmqueue(z, order);
++ if (page)
++ return page;
++ }
+ }
+- }
++ goto rebalance;
++ } else {
++ /*
++ * Check that no other task is been killed meanwhile,
++ * in such a case we can succeed the allocation.
++ */
++ for (;;) {
++ zone_t *z = *(zone++);
++ if (!z)
++ break;
+
+- /* Don't let big-order allocations loop */
+- if (order > 3)
+- return NULL;
++ if (zone_free_pages(z, order) > z->pages_high) {
++ page = rmqueue(z, order);
++ if (page)
++ return page;
++ }
++ }
++ }
+
+- /* Yield for kswapd, and try again */
+- current->policy |= SCHED_YIELD;
+- __set_current_state(TASK_RUNNING);
+- schedule();
+- goto rebalance;
++ out:
++ printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n",
++ order, gfp_mask, !!(current->flags & PF_MEMALLOC));
++#ifdef CONFIG_DEBUG_GFP
++ show_stack(NULL);
++#endif
++ return NULL;
+ }
+
+ /*
+@@ -520,17 +537,24 @@
+ {
+ pg_data_t *pgdat = pgdat_list;
+ unsigned int sum = 0;
++ zonelist_t *zonelist;
++ zone_t **zonep, *zone;
+
+ do {
+- zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
+- zone_t **zonep = zonelist->zones;
+- zone_t *zone;
++ zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK);
++ zonep = zonelist->zones;
+
+- for (zone = *zonep++; zone; zone = *zonep++) {
+- unsigned long size = zone->size;
+- unsigned long high = zone->pages_high;
+- if (size > high)
+- sum += size - high;
++ zone = *zonep;
++ if (zone) {
++ sum += zone->nr_cache_pages;
++ do {
++ unsigned int free = zone->free_pages - zone->pages_high;
++ zonep++;
++ zone = *zonep;
++ if (free <= 0)
++ continue;
++ sum += free;
++ } while (zone);
+ }
+
+ pgdat = pgdat->node_next;
+@@ -553,6 +577,62 @@
+ }
+ #endif
+
++/*
++ * If it returns non zero it means there's lots of ram "free"
++ * (note: not in cache!) so any caller will know that
++ * he can allocate some memory to do some more aggressive
++ * (possibly wasteful) readahead. The state of the memory
++ * should be rechecked after every few pages allocated for
++ * doing this aggressive readahead.
++ *
++ * The gfp_mask parameter specifies in which kind of memory
++ * the readahead information will be applocated to.
++ */
++int start_aggressive_readahead(unsigned int gfp_mask)
++{
++ pg_data_t *pgdat = pgdat_list;
++ zonelist_t *zonelist;
++ zone_t **zonep, *zone;
++ int ret = 0;
++
++ do {
++ zonelist = pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK);
++ zonep = zonelist->zones;
++
++ for (zone = *zonep++; zone; zone = *zonep++)
++ if (zone->free_pages > zone->pages_high * 2)
++ ret = 1;
++
++ pgdat = pgdat->node_next;
++ } while (pgdat);
++
++ return ret;
++}
++
++int try_to_free_pages_nozone(unsigned int gfp_mask)
++{
++ pg_data_t *pgdat = pgdat_list;
++ zonelist_t *zonelist;
++ zone_t **zonep;
++ int ret = 0;
++ unsigned long pf_free_pages;
++
++ pf_free_pages = current->flags & PF_FREE_PAGES;
++ current->flags &= ~PF_FREE_PAGES;
++
++ do {
++ zonelist = pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK);
++ zonep = zonelist->zones;
++
++ ret |= try_to_free_pages(*zonep, gfp_mask, 0);
++
++ pgdat = pgdat->node_next;
++ } while (pgdat);
++
++ current->flags |= pf_free_pages;
++ return ret;
++}
++
+ #define K(x) ((x) << (PAGE_SHIFT-10))
+
+ /*
+@@ -758,6 +838,7 @@
+ zone->zone_pgdat = pgdat;
+ zone->free_pages = 0;
+ zone->need_balance = 0;
++ zone->nr_active_pages = zone->nr_inactive_pages = 0;
+ if (!size)
+ continue;
+
+diff -urN vm-ref/mm/page_io.c vm/mm/page_io.c
+--- vm-ref/mm/page_io.c Tue Nov 6 02:04:54 2001
++++ vm/mm/page_io.c Fri Nov 9 08:29:33 2001
+@@ -41,7 +41,6 @@
+ kdev_t dev = 0;
+ int block_size;
+ struct inode *swapf = 0;
+- int wait = 0;
+
+ if (rw == READ) {
+ ClearPageUptodate(page);
+@@ -73,18 +72,6 @@
+
+ /* block_size == PAGE_SIZE/zones_used */
+ brw_page(rw, page, dev, zones, block_size);
+-
+- /* Note! For consistency we do all of the logic,
+- * decrementing the page count, and unlocking the page in the
+- * swap lock map - in the IO completion handler.
+- */
+- if (!wait)
+- return 1;
+-
+- wait_on_page(page);
+- /* This shouldn't happen, but check to be sure. */
+- if (page_count(page) == 0)
+- printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");
+
+ return 1;
+ }
+diff -urN vm-ref/mm/slab.c vm/mm/slab.c
+--- vm-ref/mm/slab.c Sun Sep 23 21:11:43 2001
++++ vm/mm/slab.c Fri Nov 9 08:29:33 2001
+@@ -914,8 +914,6 @@
+ slab_t *slabp;
+ int ret;
+
+- drain_cpu_caches(cachep);
+-
+ spin_lock_irq(&cachep->spinlock);
+
+ /* If the cache is growing, stop shrinking. */
+@@ -985,6 +983,8 @@
+ kmem_cache_t, next);
+ list_del(&cachep->next);
+ up(&cache_chain_sem);
++
++ drain_cpu_caches(cachep);
+
+ if (__kmem_cache_shrink(cachep)) {
+ printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n",
+diff -urN vm-ref/mm/swap.c vm/mm/swap.c
+--- vm-ref/mm/swap.c Thu Nov 8 04:07:20 2001
++++ vm/mm/swap.c Fri Nov 9 08:29:33 2001
+@@ -38,9 +38,13 @@
+ */
+ static inline void activate_page_nolock(struct page * page)
+ {
+- if (PageLRU(page) && !PageActive(page)) {
+- del_page_from_inactive_list(page);
+- add_page_to_active_list(page);
++ if (PageLRU(page)) {
++ if (!PageActive(page)) {
++ del_page_from_inactive_list(page);
++ add_page_to_active_list(page);
++ ClearPageReferenced(page);
++ } else
++ SetPageReferenced(page);
+ }
+ }
+
+diff -urN vm-ref/mm/swap_state.c vm/mm/swap_state.c
+--- vm-ref/mm/swap_state.c Tue Nov 6 02:04:54 2001
++++ vm/mm/swap_state.c Fri Nov 9 08:29:33 2001
+@@ -117,7 +117,9 @@
+ if (!PageLocked(page))
+ BUG();
+
+- block_flushpage(page, 0);
++ if (!block_flushpage(page, 0))
++ /* an anonymous page cannot have page->buffers set */
++ BUG();
+
+ entry.val = page->index;
+
+diff -urN vm-ref/mm/swapfile.c vm/mm/swapfile.c
+--- vm-ref/mm/swapfile.c Tue Nov 6 02:04:54 2001
++++ vm/mm/swapfile.c Fri Nov 9 08:29:33 2001
+@@ -227,6 +227,7 @@
+ * Check if we're the only user of a swap page,
+ * when the page is locked.
+ */
++static int FASTCALL(exclusive_swap_page(struct page *page));
+ static int exclusive_swap_page(struct page *page)
+ {
+ int retval = 0;
+@@ -240,12 +241,13 @@
+ if (p->swap_map[SWP_OFFSET(entry)] == 1) {
+ /* Recheck the page count with the pagecache lock held.. */
+ spin_lock(&pagecache_lock);
+- if (page_count(page) - !!page->buffers == 2)
++ if (PageSwapCache(page) && page_count(page) - !!page->buffers == 2)
+ retval = 1;
+ spin_unlock(&pagecache_lock);
+ }
+ swap_info_put(p);
+ }
++
+ return retval;
+ }
+
+@@ -257,21 +259,42 @@
+ * work, but we opportunistically check whether
+ * we need to get all the locks first..
+ */
+-int can_share_swap_page(struct page *page)
++int make_exclusive_page(struct page *page, int write)
+ {
+ int retval = 0;
+
+- if (!PageLocked(page))
+- BUG();
+ switch (page_count(page)) {
+ case 3:
+ if (!page->buffers)
+ break;
+ /* Fallthrough */
+ case 2:
++ /* racy fastpath check */
+ if (!PageSwapCache(page))
+ break;
+- retval = exclusive_swap_page(page);
++
++ if ((!write && !vm_swap_full()) || TryLockPage(page)) {
++ /*
++ * Don't remove the page from the swapcache if:
++ * - it was a read fault and...
++ * - the swap isn't full
++ * or if
++ * - we failed acquiring the page lock
++ *
++ * NOTE: if failed acquiring the lock we cannot remove the
++ * page from the swapcache, but still we can safely takeover
++ * the page if it's exclusive, see the swapcache check in
++ * the innermost critical section of exclusive_swap_page().
++ */
++ retval = exclusive_swap_page(page);
++ } else {
++ /*
++ * Here we've the page lock acquired and we're asked
++ * to try to drop this page from the swapcache.
++ */
++ retval = remove_exclusive_swap_page(page);
++ unlock_page(page);
++ }
+ break;
+ case 1:
+ if (PageReserved(page))
+@@ -300,7 +323,7 @@
+
+ entry.val = page->index;
+ p = swap_info_get(entry);
+- if (!p)
++ if (unlikely(!p))
+ return 0;
+
+ /* Is the only swap cache user the cache itself? */
+@@ -309,7 +332,11 @@
+ /* Recheck the page count with the pagecache lock held.. */
+ spin_lock(&pagecache_lock);
+ if (page_count(page) - !!page->buffers == 2) {
++ if (page->buffers && !try_to_free_buffers(page, 0))
++ /* an anonymous page cannot have page->buffers set */
++ BUG();
+ __delete_from_swap_cache(page);
++ swap_entry_free(p, SWP_OFFSET(entry));
+ SetPageDirty(page);
+ retval = 1;
+ }
+@@ -317,11 +344,8 @@
+ }
+ swap_info_put(p);
+
+- if (retval) {
+- block_flushpage(page, 0);
+- swap_free(entry);
++ if (retval)
+ page_cache_release(page);
+- }
+
+ return retval;
+ }
+@@ -343,11 +367,7 @@
+ }
+ if (page) {
+ page_cache_get(page);
+- /* Only cache user (+us), or swap space full? Free it! */
+- if (page_count(page) == 2 || vm_swap_full()) {
+- delete_from_swap_cache(page);
+- SetPageDirty(page);
+- }
++ remove_exclusive_swap_page(page);
+ UnlockPage(page);
+ page_cache_release(page);
+ }
+diff -urN vm-ref/mm/vmscan.c vm/mm/vmscan.c
+--- vm-ref/mm/vmscan.c Thu Nov 8 04:07:20 2001
++++ vm/mm/vmscan.c Fri Nov 9 08:36:58 2001
+@@ -26,12 +26,28 @@
+ #include <asm/pgalloc.h>
+
+ /*
+- * The "priority" of VM scanning is how much of the queues we
+- * will scan in one go. A value of 6 for DEF_PRIORITY implies
+- * that we'll scan 1/64th of the queues ("queue_length >> 6")
+- * during a normal aging round.
++ * "vm_scan_ratio" is how much of the queues we will scan
++ * in one go. A value of 8 for vm_scan_ratio implies that we'll
++ * scan 1/8 of the inactive list during a normal aging round.
++ * So if 1/vm_scan_ratio of the inactive cache is unfreeable
++ * we'll start the background paging.
+ */
+-#define DEF_PRIORITY (6)
++int vm_scan_ratio = 8;
++
++/*
++ * "vm_scan_ratio" controls when we start to swapout, the lower,
++ * the earlier we'll start to swapout.
++ */
++int vm_mapped_ratio = 10;
++
++/*
++ * "vm_balance_ratio" controls the balance between active and
++ * inactive cache. The bigger vm_balance_ratio is, the easier the
++ * active cache will grow, because we'll rotate the active list
++ * slowly. A value of 3 means we'll go towards a balance of
++ * 1/4 of the cache being inactive.
++ */
++int vm_balance_ratio = 3;
+
+ /*
+ * The swap-out function returns 1 if it successfully
+@@ -50,13 +66,15 @@
+
+ /* Don't look at this pte if it's been accessed recently. */
+ if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) {
+- mark_page_accessed(page);
++ activate_page(page);
+ return 0;
+ }
+
++#if 0
+ /* Don't bother unmapping pages that are active */
+ if (PageActive(page))
+ return 0;
++#endif
+
+ /* Don't bother replenishing zones not under pressure.. */
+ if (!memclass(page->zone, classzone))
+@@ -113,6 +131,9 @@
+ */
+ if (page->mapping)
+ goto drop_pte;
++ if (page->buffers)
++ /* can happen if there's a page fault during vmtruncate */
++ goto preserve;
+ if (!PageDirty(page))
+ goto drop_pte;
+
+@@ -139,6 +160,7 @@
+ swap_free(entry);
+ }
+
++ preserve:
+ /* No swap space left */
+ set_pte(page_table, pte);
+ UnlockPage(page);
+@@ -249,6 +271,7 @@
+ {
+ unsigned long address;
+ struct vm_area_struct* vma;
++ int tlb_flush = 0;
+
+ /*
+ * Find the proper vm-area after freezing the vma chain
+@@ -263,6 +286,7 @@
+ }
+ vma = find_vma(mm, address);
+ if (vma) {
++ tlb_flush = 1;
+ if (address < vma->vm_start)
+ address = vma->vm_start;
+
+@@ -281,16 +305,18 @@
+
+ out_unlock:
+ spin_unlock(&mm->page_table_lock);
++ if (tlb_flush)
++ flush_tlb_mm(mm);
+ return count;
+ }
+
+-static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone));
+-static int swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone)
++static int FASTCALL(swap_out(zone_t * classzone));
++static int swap_out(zone_t * classzone)
+ {
+ int counter, nr_pages = SWAP_CLUSTER_MAX;
+ struct mm_struct *mm;
+
+- counter = mmlist_nr;
++ counter = mmlist_nr << 1;
+ do {
+ if (unlikely(current->need_resched)) {
+ __set_current_state(TASK_RUNNING);
+@@ -326,15 +352,13 @@
+ return 0;
+ }
+
+-static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority));
+-static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority)
++static int FASTCALL(shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout));
++static int shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout)
+ {
+ struct list_head * entry;
+- int max_scan = nr_inactive_pages / priority;
+- int max_mapped = nr_pages << (9 - priority);
++ int max_mapped = nr_pages * vm_mapped_ratio;
+
+- spin_lock(&pagemap_lru_lock);
+- while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) {
++ while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) {
+ struct page * page;
+
+ if (unlikely(current->need_resched)) {
+@@ -365,6 +389,8 @@
+ if (!memclass(page->zone, classzone))
+ continue;
+
++ max_scan--;
++
+ /* Racy check to avoid trylocking when not worthwhile */
+ if (!page->buffers && (page_count(page) != 1 || !page->mapping))
+ goto page_mapped;
+@@ -462,34 +488,43 @@
+ spin_lock(&pagecache_lock);
+
+ /*
+- * this is the non-racy check for busy page.
++ * This is the non-racy check for busy page.
++ * It is critical to check PageDirty _after_ we made sure
++ * the page is freeable so not in use by anybody.
++ * At this point we're guaranteed that page->buffers is NULL,
++ * nobody can refill page->buffers under us because we still
++ * hold the page lock.
+ */
+- if (!page->mapping || !is_page_cache_freeable(page)) {
++ if (!page->mapping || page_count(page) > 1) {
+ spin_unlock(&pagecache_lock);
+ UnlockPage(page);
+-page_mapped:
+- if (--max_mapped >= 0)
+- continue;
++ page_mapped:
++ if (--max_mapped < 0) {
++ spin_unlock(&pagemap_lru_lock);
+
+- /*
+- * Alert! We've found too many mapped pages on the
+- * inactive list, so we start swapping out now!
+- */
+- spin_unlock(&pagemap_lru_lock);
+- swap_out(priority, gfp_mask, classzone);
+- return nr_pages;
+- }
++ shrink_dcache_memory(vm_scan_ratio, gfp_mask);
++ shrink_icache_memory(vm_scan_ratio, gfp_mask);
++#ifdef CONFIG_QUOTA
++ shrink_dqcache_memory(vm_scan_ratio, gfp_mask);
++#endif
+
+- /*
+- * It is critical to check PageDirty _after_ we made sure
+- * the page is freeable* so not in use by anybody.
+- */
++ if (!*failed_swapout)
++ *failed_swapout = !swap_out(classzone);
++ max_mapped = nr_pages * vm_mapped_ratio;
++
++ spin_lock(&pagemap_lru_lock);
++ }
++ continue;
++
++ }
+ if (PageDirty(page)) {
+ spin_unlock(&pagecache_lock);
+ UnlockPage(page);
+ continue;
+ }
+
++ __lru_cache_del(page);
++
+ /* point of no return */
+ if (likely(!PageSwapCache(page))) {
+ __remove_inode_page(page);
+@@ -502,7 +537,6 @@
+ swap_free(swap);
+ }
+
+- __lru_cache_del(page);
+ UnlockPage(page);
+
+ /* effectively free the page here */
+@@ -524,74 +558,96 @@
+ * We move them the other way when we see the
+ * reference bit on the page.
+ */
+-static void refill_inactive(int nr_pages)
++static void FASTCALL(refill_inactive(int nr_pages, zone_t * classzone));
++static void refill_inactive(int nr_pages, zone_t * classzone)
+ {
+ struct list_head * entry;
+
+- spin_lock(&pagemap_lru_lock);
+ entry = active_list.prev;
+- while (nr_pages-- && entry != &active_list) {
++ while (nr_pages && entry != &active_list) {
+ struct page * page;
+
+ page = list_entry(entry, struct page, lru);
+ entry = entry->prev;
++
++ if (!memclass(page->zone, classzone))
++ continue;
++
+ if (PageTestandClearReferenced(page)) {
+ list_del(&page->lru);
+ list_add(&page->lru, &active_list);
+ continue;
+ }
+
++ nr_pages--;
++
+ del_page_from_active_list(page);
+ add_page_to_inactive_list(page);
+ SetPageReferenced(page);
+ }
+- spin_unlock(&pagemap_lru_lock);
++ if (entry != &active_list) {
++ list_del(&active_list);
++ list_add(&active_list, entry);
++ }
+ }
+
+-static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages));
+-static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)
++static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout));
++static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout)
+ {
+- int chunk_size = nr_pages;
++ int max_scan, nr_pages_orig = nr_pages;
+ unsigned long ratio;
+
+ nr_pages -= kmem_cache_reap(gfp_mask);
+ if (nr_pages <= 0)
+ return 0;
+
+- nr_pages = chunk_size;
+- /* try to keep the active list 2/3 of the size of the cache */
+- ratio = (unsigned long) nr_pages * nr_active_pages / ((nr_inactive_pages + 1) * 2);
+- refill_inactive(ratio);
++ nr_pages = nr_pages_orig;
++ spin_lock(&pagemap_lru_lock);
++ ratio = (unsigned long) nr_pages * classzone->nr_active_pages / (((unsigned long) classzone->nr_inactive_pages * vm_balance_ratio) + 1);
++ if (ratio > nr_pages * 2)
++ ratio = nr_pages * 2;
++ refill_inactive(ratio, classzone);
+
+- nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, priority);
+- if (nr_pages <= 0)
+- return 0;
+-
+- shrink_dcache_memory(priority, gfp_mask);
+- shrink_icache_memory(priority, gfp_mask);
+-#ifdef CONFIG_QUOTA
+- shrink_dqcache_memory(DEF_PRIORITY, gfp_mask);
+-#endif
++ max_scan = classzone->nr_inactive_pages / vm_scan_ratio;
++ nr_pages = shrink_cache(nr_pages, max_scan, classzone, gfp_mask, failed_swapout);
+
+ return nr_pages;
+ }
+
++static int check_classzone_need_balance(zone_t * classzone);
++
+ int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order)
+ {
+- int priority = DEF_PRIORITY;
+- int nr_pages = SWAP_CLUSTER_MAX;
++ for (;;) {
++ int tries = vm_scan_ratio << 2;
++ int failed_swapout = 0;
+
+- do {
+- nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages);
+- if (nr_pages <= 0)
+- return 1;
+- } while (--priority);
++ do {
++ int nr_pages = SWAP_CLUSTER_MAX;
++
++ nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout);
++ if (nr_pages <= 0)
++ return 1;
++
++ shrink_dcache_memory(vm_scan_ratio, gfp_mask);
++ shrink_icache_memory(vm_scan_ratio, gfp_mask);
++#ifdef CONFIG_QUOTA
++ shrink_dqcache_memory(vm_scan_ratio, gfp_mask);
++#endif
++
++ if (!failed_swapout)
++ failed_swapout = !swap_out(classzone);
++ } while (--tries);
++
++ if (likely(current->pid != 1))
++ break;
++ if (!check_classzone_need_balance(classzone))
++ break;
++ current->policy |= SCHED_YIELD;
++ __set_current_state(TASK_RUNNING);
++ schedule();
++ }
+
+- /*
+- * Hmm.. Cache shrink failed - time to kill something?
+- * Mhwahahhaha! This is the part I really like. Giggle.
+- */
+- out_of_memory();
+ return 0;
+ }
+
+@@ -624,7 +680,7 @@
+ if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) {
+ zone->need_balance = 0;
+ __set_current_state(TASK_INTERRUPTIBLE);
+- schedule_timeout(HZ);
++ schedule_timeout(HZ*5);
+ continue;
+ }
+ if (check_classzone_need_balance(zone))
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-7 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-7
new file mode 100644
index 000000000000..173c9d081ea2
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-7
@@ -0,0 +1,37 @@
+diff -urN highmem-debug-ref/arch/i386/config.in highmem-debug/arch/i386/config.in
+--- highmem-debug-ref/arch/i386/config.in Tue Oct 23 12:56:05 2001
++++ highmem-debug/arch/i386/config.in Tue Oct 23 12:57:17 2001
+@@ -407,6 +407,9 @@
+ bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK
+ bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE
+ bool ' Debug allocation faliures' CONFIG_DEBUG_GFP
++ if [ "$CONFIG_HIGHMEM" = "y" ]; then
++ bool ' Emulate HIGHMEM on lowmem machines' CONFIG_HIGHMEM_EMULATION
++ fi
+ fi
+
+ endmenu
+diff -urN highmem-debug-ref/arch/i386/kernel/setup.c highmem-debug/arch/i386/kernel/setup.c
+--- highmem-debug-ref/arch/i386/kernel/setup.c Sun Oct 21 20:03:33 2001
++++ highmem-debug/arch/i386/kernel/setup.c Tue Oct 23 12:57:35 2001
+@@ -821,7 +821,20 @@
+ */
+ #define VMALLOC_RESERVE (unsigned long)(128 << 20)
+ #define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
++#ifdef CONFIG_HIGHMEM_EMULATION
++#define ORDER_DOWN(x) ((x >> (MAX_ORDER-1)) << (MAX_ORDER-1))
++#define MAXMEM_PFN \
++({ \
++ int __max_pfn; \
++ if (max_pfn > PFN_DOWN(MAXMEM)) \
++ __max_pfn = PFN_DOWN(MAXMEM); \
++ else \
++ __max_pfn = ORDER_DOWN(max_pfn / 5); \
++ __max_pfn; \
++})
++#else
+ #define MAXMEM_PFN PFN_DOWN(MAXMEM)
++#endif
+ #define MAX_NONPAE_PFN (1 << 20)
+
+ /*
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1
new file mode 100644
index 000000000000..2dc98673eb6a
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1
@@ -0,0 +1,327 @@
+diff -urN vm-ref/arch/sparc64/mm/init.c vm-numa/arch/sparc64/mm/init.c
+--- vm-ref/arch/sparc64/mm/init.c Thu Nov 1 20:05:09 2001
++++ vm-numa/arch/sparc64/mm/init.c Thu Nov 1 20:05:26 2001
+@@ -1591,7 +1591,7 @@
+ * Set up the zero page, mark it reserved, so that page count
+ * is not manipulated when freeing the page from user ptes.
+ */
+- mem_map_zero = _alloc_pages(GFP_KERNEL, 0);
++ mem_map_zero = alloc_pages(GFP_KERNEL, 0);
+ if (mem_map_zero == NULL) {
+ prom_printf("paging_init: Cannot alloc zero page.\n");
+ prom_halt();
+diff -urN vm-ref/include/asm-alpha/max_numnodes.h vm-numa/include/asm-alpha/max_numnodes.h
+--- vm-ref/include/asm-alpha/max_numnodes.h Thu Jan 1 01:00:00 1970
++++ vm-numa/include/asm-alpha/max_numnodes.h Thu Nov 1 20:05:26 2001
+@@ -0,0 +1,13 @@
++#ifndef _ASM_MAX_NUMNODES_H
++#define _ASM_MAX_NUMNODES_H
++
++#include <linux/config.h>
++
++#ifdef CONFIG_ALPHA_WILDFIRE
++#include <asm/core_wildfire.h>
++#define MAX_NUMNODES WILDFIRE_MAX_QBB
++#else
++#define MAX_NUMNODES 1
++#endif
++
++#endif
+diff -urN vm-ref/include/asm-alpha/mmzone.h vm-numa/include/asm-alpha/mmzone.h
+--- vm-ref/include/asm-alpha/mmzone.h Thu Nov 1 20:05:09 2001
++++ vm-numa/include/asm-alpha/mmzone.h Thu Nov 1 20:05:26 2001
+@@ -37,11 +37,9 @@
+ #ifdef CONFIG_ALPHA_WILDFIRE
+ # define ALPHA_PA_TO_NID(pa) ((pa) >> 36) /* 16 nodes max due 43bit kseg */
+ #define NODE_MAX_MEM_SIZE (64L * 1024L * 1024L * 1024L) /* 64 GB */
+-#define MAX_NUMNODES WILDFIRE_MAX_QBB
+ #else
+ # define ALPHA_PA_TO_NID(pa) (0)
+ #define NODE_MAX_MEM_SIZE (~0UL)
+-#define MAX_NUMNODES 1
+ #endif
+
+ #define PHYSADDR_TO_NID(pa) ALPHA_PA_TO_NID(pa)
+@@ -63,8 +61,6 @@
+ }
+ #endif
+
+-#ifdef CONFIG_DISCONTIGMEM
+-
+ /*
+ * Following are macros that each numa implmentation must define.
+ */
+@@ -121,7 +117,5 @@
+
+ #define numa_node_id() cputonode(smp_processor_id())
+ #endif /* CONFIG_NUMA */
+-
+-#endif /* CONFIG_DISCONTIGMEM */
+
+ #endif /* _ASM_MMZONE_H_ */
+diff -urN vm-ref/include/linux/mm.h vm-numa/include/linux/mm.h
+--- vm-ref/include/linux/mm.h Thu Nov 1 20:05:09 2001
++++ vm-numa/include/linux/mm.h Thu Nov 1 20:05:26 2001
+@@ -372,7 +372,6 @@
+ * can allocate highmem pages, the *get*page*() variants return
+ * virtual kernel addresses to the allocated page(s).
+ */
+-extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order));
+ extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist));
+ extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order);
+
+@@ -383,7 +382,13 @@
+ */
+ if (order >= MAX_ORDER)
+ return NULL;
+- return _alloc_pages(gfp_mask, order);
++ /*
++ * we get the zone list from the current node and the gfp_mask.
++ * This zone list contains a maximum of
++ * MAXNODES*MAX_NR_ZONES zones.
++ */
++ return __alloc_pages(gfp_mask, order,
++ NODE_DATA(numa_node_id())->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+ }
+
+ #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
+diff -urN vm-ref/include/linux/mmzone.h vm-numa/include/linux/mmzone.h
+--- vm-ref/include/linux/mmzone.h Thu Nov 1 20:05:09 2001
++++ vm-numa/include/linux/mmzone.h Thu Nov 1 20:05:26 2001
+@@ -79,8 +79,14 @@
+ * so despite the zonelist table being relatively big, the cache
+ * footprint of this construct is very small.
+ */
++#ifndef CONFIG_DISCONTIGMEM
++#define MAX_NUMNODES 1
++#else
++#include <asm/max_numnodes.h>
++#endif /* !CONFIG_DISCONTIGMEM */
++
+ typedef struct zonelist_struct {
+- zone_t * zones [MAX_NR_ZONES+1]; // NULL delimited
++ zone_t * zones [MAX_NUMNODES * MAX_NR_ZONES+1]; // NULL delimited
+ } zonelist_t;
+
+ #define GFP_ZONEMASK 0x0f
+@@ -126,6 +132,7 @@
+ extern void free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap,
+ unsigned long *zones_size, unsigned long paddr, unsigned long *zholes_size,
+ struct page *pmap);
++extern void build_all_zonelists(void);
+
+ extern pg_data_t contig_page_data;
+
+diff -urN vm-ref/init/main.c vm-numa/init/main.c
+--- vm-ref/init/main.c Thu Nov 1 20:05:09 2001
++++ vm-numa/init/main.c Thu Nov 1 20:05:26 2001
+@@ -553,6 +553,7 @@
+ lock_kernel();
+ printk(linux_banner);
+ setup_arch(&command_line);
++ build_all_zonelists();
+ printk("Kernel command line: %s\n", saved_command_line);
+ parse_options(command_line);
+ trap_init();
+diff -urN vm-ref/kernel/ksyms.c vm-numa/kernel/ksyms.c
+--- vm-ref/kernel/ksyms.c Thu Nov 1 20:05:09 2001
++++ vm-numa/kernel/ksyms.c Thu Nov 1 20:05:35 2001
+@@ -93,7 +93,6 @@
+
+ /* internal kernel memory management */
+ EXPORT_SYMBOL(start_aggressive_readahead);
+-EXPORT_SYMBOL(_alloc_pages);
+ EXPORT_SYMBOL(__alloc_pages);
+ EXPORT_SYMBOL(alloc_pages_node);
+ EXPORT_SYMBOL(__get_free_pages);
+@@ -113,7 +112,10 @@
+ EXPORT_SYMBOL(kfree);
+ EXPORT_SYMBOL(vfree);
+ EXPORT_SYMBOL(__vmalloc);
++#ifndef CONFIG_DISCONTIGMEM
++EXPORT_SYMBOL(contig_page_data);
+ EXPORT_SYMBOL(mem_map);
++#endif
+ EXPORT_SYMBOL(remap_page_range);
+ EXPORT_SYMBOL(max_mapnr);
+ EXPORT_SYMBOL(high_memory);
+diff -urN vm-ref/mm/numa.c vm-numa/mm/numa.c
+--- vm-ref/mm/numa.c Thu Nov 1 20:05:09 2001
++++ vm-numa/mm/numa.c Thu Nov 1 20:05:26 2001
+@@ -82,49 +82,4 @@
+ memset(pgdat->valid_addr_bitmap, 0, size);
+ }
+
+-static struct page * alloc_pages_pgdat(pg_data_t *pgdat, unsigned int gfp_mask,
+- unsigned int order)
+-{
+- return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+-}
+-
+-/*
+- * This can be refined. Currently, tries to do round robin, instead
+- * should do concentratic circle search, starting from current node.
+- */
+-struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order)
+-{
+- struct page *ret = 0;
+- pg_data_t *start, *temp;
+-#ifndef CONFIG_NUMA
+- unsigned long flags;
+- static pg_data_t *next = 0;
+-#endif
+-
+- if (order >= MAX_ORDER)
+- return NULL;
+-#ifdef CONFIG_NUMA
+- temp = NODE_DATA(numa_node_id());
+-#else
+- spin_lock_irqsave(&node_lock, flags);
+- if (!next) next = pgdat_list;
+- temp = next;
+- next = next->node_next;
+- spin_unlock_irqrestore(&node_lock, flags);
+-#endif
+- start = temp;
+- while (temp) {
+- if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
+- return(ret);
+- temp = temp->node_next;
+- }
+- temp = pgdat_list;
+- while (temp != start) {
+- if ((ret = alloc_pages_pgdat(temp, gfp_mask, order)))
+- return(ret);
+- temp = temp->node_next;
+- }
+- return(0);
+-}
+-
+ #endif /* CONFIG_DISCONTIGMEM */
+diff -urN vm-ref/mm/page_alloc.c vm-numa/mm/page_alloc.c
+--- vm-ref/mm/page_alloc.c Thu Nov 1 20:05:09 2001
++++ vm-numa/mm/page_alloc.c Thu Nov 1 20:05:26 2001
+@@ -220,14 +220,6 @@
+ return NULL;
+ }
+
+-#ifndef CONFIG_DISCONTIGMEM
+-struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order)
+-{
+- return __alloc_pages(gfp_mask, order,
+- contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK));
+-}
+-#endif
+-
+ static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *));
+ static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed)
+ {
+@@ -710,13 +702,41 @@
+ /*
+ * Builds allocation fallback zone lists.
+ */
+-static inline void build_zonelists(pg_data_t *pgdat)
++static int __init build_zonelists_node(pg_data_t *pgdat, zonelist_t *zonelist, int j, int k)
++{
++ switch (k) {
++ zone_t *zone;
++ default:
++ BUG();
++ case ZONE_HIGHMEM:
++ zone = pgdat->node_zones + ZONE_HIGHMEM;
++ if (zone->size) {
++#ifndef CONFIG_HIGHMEM
++ BUG();
++#endif
++ zonelist->zones[j++] = zone;
++ }
++ case ZONE_NORMAL:
++ zone = pgdat->node_zones + ZONE_NORMAL;
++ if (zone->size)
++ zonelist->zones[j++] = zone;
++ case ZONE_DMA:
++ zone = pgdat->node_zones + ZONE_DMA;
++ if (zone->size)
++ zonelist->zones[j++] = zone;
++ }
++
++ return j;
++}
++
++static void __init build_zonelists(pg_data_t *pgdat)
+ {
+- int i, j, k;
++ int i, j, k, node, local_node;
+
++ local_node = pgdat->node_id;
++ printk("Building zonelist for node : %d\n", local_node);
+ for (i = 0; i <= GFP_ZONEMASK; i++) {
+ zonelist_t *zonelist;
+- zone_t *zone;
+
+ zonelist = pgdat->node_zonelists + i;
+ memset(zonelist, 0, sizeof(*zonelist));
+@@ -728,33 +748,32 @@
+ if (i & __GFP_DMA)
+ k = ZONE_DMA;
+
+- switch (k) {
+- default:
+- BUG();
+- /*
+- * fallthrough:
+- */
+- case ZONE_HIGHMEM:
+- zone = pgdat->node_zones + ZONE_HIGHMEM;
+- if (zone->size) {
+-#ifndef CONFIG_HIGHMEM
+- BUG();
+-#endif
+- zonelist->zones[j++] = zone;
+- }
+- case ZONE_NORMAL:
+- zone = pgdat->node_zones + ZONE_NORMAL;
+- if (zone->size)
+- zonelist->zones[j++] = zone;
+- case ZONE_DMA:
+- zone = pgdat->node_zones + ZONE_DMA;
+- if (zone->size)
+- zonelist->zones[j++] = zone;
+- }
++ j = build_zonelists_node(pgdat, zonelist, j, k);
++ /*
++ * Now we build the zonelist so that it contains the zones
++ * of all the other nodes.
++ * We don't want to pressure a particular node, so when
++ * building the zones for node N, we make sure that the
++ * zones coming right after the local ones are those from
++ * node N+1 (modulo N)
++ */
++ for (node = local_node + 1; node < numnodes; node++)
++ j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
++ for (node = 0; node < local_node; node++)
++ j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
++
+ zonelist->zones[j++] = NULL;
+ }
+ }
+
++void __init build_all_zonelists(void)
++{
++ int i;
++
++ for(i = 0 ; i < numnodes ; i++)
++ build_zonelists(NODE_DATA(i));
++}
++
+ #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
+
+ /*
+@@ -910,7 +929,6 @@
+ (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size);
+ }
+ }
+- build_zonelists(pgdat);
+ }
+
+ void __init free_area_init(unsigned long *zones_size)
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-2
new file mode 100644
index 000000000000..75c5a94320df
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-2
@@ -0,0 +1,41 @@
+diff -urN parent-timeslice/include/linux/sched.h child-first/include/linux/sched.h
+--- parent-timeslice/include/linux/sched.h Thu May 3 18:17:56 2001
++++ child-first/include/linux/sched.h Thu May 3 18:19:44 2001
+@@ -301,7 +301,7 @@
+ * all fields in a single cacheline that are needed for
+ * the goodness() loop in schedule().
+ */
+- int counter;
++ volatile int counter;
+ int nice;
+ unsigned int policy;
+ struct mm_struct *mm;
+diff -urN parent-timeslice/kernel/fork.c child-first/kernel/fork.c
+--- parent-timeslice/kernel/fork.c Thu May 3 18:18:31 2001
++++ child-first/kernel/fork.c Thu May 3 18:20:40 2001
+@@ -665,15 +665,18 @@
+ p->pdeath_signal = 0;
+
+ /*
+- * "share" dynamic priority between parent and child, thus the
+- * total amount of dynamic priorities in the system doesnt change,
+- * more scheduling fairness. This is only important in the first
+- * timeslice, on the long run the scheduling behaviour is unchanged.
++ * Scheduling the child first is especially useful in avoiding a
++ * lot of copy-on-write faults if the child for a fork() just wants
++ * to do a few simple things and then exec().
+ */
+- p->counter = (current->counter + 1) >> 1;
+- current->counter >>= 1;
+- if (!current->counter)
++ {
++ int counter = current->counter;
++ p->counter = (counter + 1) >> 1;
++ current->counter = counter >> 1;
++ p->policy &= ~SCHED_YIELD;
++ current->policy |= SCHED_YIELD;
+ current->need_resched = 1;
++ }
+
+ /* Tell the parent if it can get back its timeslice when child exits */
+ p->get_child_timeslice = 1;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2
new file mode 100644
index 000000000000..8fd2ad956df1
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2
Binary files differ
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-5
new file mode 100644
index 000000000000..9e1df6cc2e1b
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-5
@@ -0,0 +1,60 @@
+diff -urN uml-ref/arch/um/kernel/time.c uml/arch/um/kernel/time.c
+--- uml-ref/arch/um/kernel/time.c Mon Jul 23 17:07:17 2001
++++ uml/arch/um/kernel/time.c Mon Jul 23 17:08:36 2001
+@@ -16,7 +16,7 @@
+ #include "user.h"
+ #include "process.h"
+
+-extern struct timeval xtime;
++extern volatile struct timeval xtime;
+
+ void timer_handler(int sig, void *sc, int usermode)
+ {
+diff -urN uml-ref/arch/um/kernel/trap_kern.c uml/arch/um/kernel/trap_kern.c
+--- uml-ref/arch/um/kernel/trap_kern.c Mon Jul 23 17:07:17 2001
++++ uml/arch/um/kernel/trap_kern.c Mon Jul 23 17:08:36 2001
+@@ -41,7 +41,7 @@
+ if(!vma) ok = 0;
+ else if(vma->vm_start > address){
+ if((vma->vm_flags & VM_STACK_FLAGS) != VM_STACK_FLAGS) ok = 0;
+- else if(expand_stack(vma, address)) ok = 0;
++ else if(expand_stack(vma, address, NULL)) ok = 0;
+ }
+ if(!ok){
+ if (current->thread.fault_catcher != NULL) {
+diff -urN uml-ref/include/asm-um/rwsem.h uml/include/asm-um/rwsem.h
+--- uml-ref/include/asm-um/rwsem.h Mon Jul 23 17:07:17 2001
++++ uml/include/asm-um/rwsem.h Thu Jan 1 01:00:00 1970
+@@ -1,10 +0,0 @@
+-#ifndef __UM_RWSEM_H__
+-#define __UM_RWSEM_H__
+-
+-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
+-#define __builtin_expect(exp,c) (exp)
+-#endif
+-
+-#include "asm/arch/rwsem.h"
+-
+-#endif
+diff -urN uml-ref/include/asm-um/rwsem_xchgadd.h uml/include/asm-um/rwsem_xchgadd.h
+--- uml-ref/include/asm-um/rwsem_xchgadd.h Thu Jan 1 01:00:00 1970
++++ uml/include/asm-um/rwsem_xchgadd.h Mon Jul 23 17:08:36 2001
+@@ -0,0 +1,6 @@
++#ifndef __UM_RWSEM_H__
++#define __UM_RWSEM_H__
++
++#include "asm/arch/rwsem_xchgadd.h"
++
++#endif
+diff -urN uml-ref/include/asm-um/timex.h uml/include/asm-um/timex.h
+--- uml-ref/include/asm-um/timex.h Mon Jul 23 17:07:17 2001
++++ uml/include/asm-um/timex.h Mon Jul 23 17:08:36 2001
+@@ -12,4 +12,8 @@
+ return 0;
+ }
+
++typedef long last_schedule_t;
++#define get_last_schedule() ({ jiffies; })
++#define last_schedule_before(a, b) ({ a < b; })
++
+ #endif
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-1
new file mode 100644
index 000000000000..21b64f13fa64
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-1
@@ -0,0 +1,18 @@
+--- 2.4.10pre2aa3/arch/um/kernel/irq.c.~1~ Sat Sep 1 02:40:55 2001
++++ 2.4.10pre2aa3/arch/um/kernel/irq.c Sat Sep 1 02:59:47 2001
+@@ -141,10 +141,12 @@
+
+ status = 1; /* Force the "do bottom halves" bit */
+
+- if (!(action->flags & SA_INTERRUPT))
+- __sti();
+-
+ do {
++ if (!(action->flags & SA_INTERRUPT))
++ __sti();
++ else
++ __cli();
++
+ status |= action->flags;
+ action->handler(irq, action->dev_id, regs);
+ action = action->next;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-6 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-6
new file mode 100644
index 000000000000..9f2aa4a718a8
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-6
@@ -0,0 +1,55 @@
+diff -urN atomic-alloc-ref/fs/buffer.c atomic-alloc/fs/buffer.c
+--- atomic-alloc-ref/fs/buffer.c Thu Sep 20 01:44:06 2001
++++ atomic-alloc/fs/buffer.c Thu Sep 20 20:00:42 2001
+@@ -2613,7 +2613,7 @@
+ spin_unlock(&free_list[index].lock);
+ write_unlock(&hash_table_lock);
+ spin_unlock(&lru_list_lock);
+- if (gfp_mask & __GFP_IO) {
++ if (gfp_mask & __GFP_IO && !(current->flags & PF_ATOMICALLOC)) {
+ if ((gfp_mask & __GFP_HIGHIO) || !PageHighMem(page)) {
+ if (sync_page_buffers(bh, gfp_mask)) {
+ /* no IO or waiting next time */
+diff -urN atomic-alloc-ref/include/linux/sched.h atomic-alloc/include/linux/sched.h
+--- atomic-alloc-ref/include/linux/sched.h Thu Sep 20 20:00:21 2001
++++ atomic-alloc/include/linux/sched.h Thu Sep 20 20:01:06 2001
+@@ -408,18 +408,16 @@
+ /*
+ * Per process flags
+ */
+-#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */
+- /* Not implemented yet, only for 486*/
+-#define PF_STARTING 0x00000002 /* being created */
+-#define PF_EXITING 0x00000004 /* getting shut down */
+-#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
+-#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
+-#define PF_DUMPCORE 0x00000200 /* dumped core */
+-#define PF_SIGNALED 0x00000400 /* killed by a signal */
+-#define PF_MEMALLOC 0x00000800 /* Allocating memory */
+-#define PF_FREE_PAGES 0x00002000 /* per process page freeing */
++#define PF_EXITING (1UL<<0) /* getting shut down */
++#define PF_FORKNOEXEC (1UL<<1) /* forked but didn't exec */
++#define PF_SUPERPRIV (1UL<<2) /* used super-user privileges */
++#define PF_DUMPCORE (1UL<<3) /* dumped core */
++#define PF_SIGNALED (1UL<<4) /* killed by a signal */
++#define PF_MEMALLOC (1UL<<5) /* Allocating memory */
++#define PF_USEDFPU (1UL<<6) /* task used FPU this quantum (SMP) */
++#define PF_ATOMICALLOC (1UL<<7) /* do not block during memalloc */
++#define PF_FREE_PAGES (1UL<<8) /* per process page freeing */
+
+-#define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */
+
+ /*
+ * Ptrace flags
+diff -urN atomic-alloc-ref/mm/slab.c atomic-alloc/mm/slab.c
+--- atomic-alloc-ref/mm/slab.c Thu Sep 20 01:44:20 2001
++++ atomic-alloc/mm/slab.c Thu Sep 20 20:00:42 2001
+@@ -1715,7 +1715,7 @@
+ unsigned int scan;
+ int ret = 0;
+
+- if (gfp_mask & __GFP_WAIT)
++ if (gfp_mask & __GFP_WAIT && !(current->flags & PF_ATOMICALLOC))
+ down(&cache_chain_sem);
+ else
+ if (down_trylock(&cache_chain_sem))
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5
new file mode 100644
index 000000000000..d07cfb6d3b02
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5
@@ -0,0 +1,137 @@
+diff -urN 2.4.15pre1/fs/namei.c atomic-lookup/fs/namei.c
+--- 2.4.15pre1/fs/namei.c Wed Oct 24 08:04:22 2001
++++ atomic-lookup/fs/namei.c Fri Nov 9 04:34:12 2001
+@@ -448,9 +448,13 @@
+ {
+ struct dentry *dentry;
+ struct inode *inode;
+- int err;
++ int err, atomic;
+ unsigned int lookup_flags = nd->flags;
+
++ atomic = 0;
++ if (lookup_flags & LOOKUP_ATOMIC)
++ atomic = 1;
++
+ while (*name=='/')
+ name++;
+ if (!*name)
+@@ -519,6 +523,9 @@
+ /* This does the actual lookups.. */
+ dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+ if (!dentry) {
++ err = -EWOULDBLOCKIO;
++ if (atomic)
++ break;
+ dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+@@ -582,6 +589,9 @@
+ }
+ dentry = cached_lookup(nd->dentry, &this, 0);
+ if (!dentry) {
++ err = -EWOULDBLOCKIO;
++ if (atomic)
++ break;
+ dentry = real_lookup(nd->dentry, &this, 0);
+ err = PTR_ERR(dentry);
+ if (IS_ERR(dentry))
+@@ -924,6 +934,8 @@
+
+ if (f & O_DIRECTORY)
+ retval |= LOOKUP_DIRECTORY;
++ if (f & O_ATOMICLOOKUP)
++ retval |= LOOKUP_ATOMIC;
+
+ return retval;
+ }
+diff -urN 2.4.15pre1/include/asm-alpha/fcntl.h atomic-lookup/include/asm-alpha/fcntl.h
+--- 2.4.15pre1/include/asm-alpha/fcntl.h Sun Sep 23 21:11:40 2001
++++ atomic-lookup/include/asm-alpha/fcntl.h Fri Nov 9 04:34:12 2001
+@@ -20,6 +20,7 @@
+ #define O_DIRECTORY 0100000 /* must be a directory */
+ #define O_NOFOLLOW 0200000 /* don't follow links */
+ #define O_LARGEFILE 0400000 /* will be set by the kernel on every open */
++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
+ #define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */
+
+ #define F_DUPFD 0 /* dup */
+diff -urN 2.4.15pre1/include/asm-i386/fcntl.h atomic-lookup/include/asm-i386/fcntl.h
+--- 2.4.15pre1/include/asm-i386/fcntl.h Sun Sep 23 21:11:40 2001
++++ atomic-lookup/include/asm-i386/fcntl.h Fri Nov 9 04:34:12 2001
+@@ -20,6 +20,7 @@
+ #define O_LARGEFILE 0100000
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+diff -urN 2.4.15pre1/include/asm-ia64/fcntl.h atomic-lookup/include/asm-ia64/fcntl.h
+--- 2.4.15pre1/include/asm-ia64/fcntl.h Thu Nov 16 15:37:42 2000
++++ atomic-lookup/include/asm-ia64/fcntl.h Fri Nov 9 04:34:12 2001
+@@ -28,6 +28,7 @@
+ #define O_LARGEFILE 0100000
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+diff -urN 2.4.15pre1/include/asm-ppc/fcntl.h atomic-lookup/include/asm-ppc/fcntl.h
+--- 2.4.15pre1/include/asm-ppc/fcntl.h Tue Nov 6 02:04:53 2001
++++ atomic-lookup/include/asm-ppc/fcntl.h Fri Nov 9 04:34:42 2001
+@@ -23,6 +23,7 @@
+ #define O_NOFOLLOW 0100000 /* don't follow links */
+ #define O_LARGEFILE 0200000
+ #define O_DIRECT 0400000 /* direct disk access hint */
++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+diff -urN 2.4.15pre1/include/asm-sparc/fcntl.h atomic-lookup/include/asm-sparc/fcntl.h
+--- 2.4.15pre1/include/asm-sparc/fcntl.h Sun Sep 23 21:11:42 2001
++++ atomic-lookup/include/asm-sparc/fcntl.h Fri Nov 9 04:34:12 2001
+@@ -20,6 +20,7 @@
+ #define O_DIRECTORY 0x10000 /* must be a directory */
+ #define O_NOFOLLOW 0x20000 /* don't follow links */
+ #define O_LARGEFILE 0x40000
++#define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */
+ #define O_DIRECT 0x100000 /* direct disk access hint */
+
+ #define F_DUPFD 0 /* dup */
+diff -urN 2.4.15pre1/include/asm-sparc64/fcntl.h atomic-lookup/include/asm-sparc64/fcntl.h
+--- 2.4.15pre1/include/asm-sparc64/fcntl.h Sun Sep 23 21:11:42 2001
++++ atomic-lookup/include/asm-sparc64/fcntl.h Fri Nov 9 04:34:12 2001
+@@ -20,6 +20,7 @@
+ #define O_DIRECTORY 0x10000 /* must be a directory */
+ #define O_NOFOLLOW 0x20000 /* don't follow links */
+ #define O_LARGEFILE 0x40000
++#define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */
+ #define O_DIRECT 0x100000 /* direct disk access hint */
+
+
+diff -urN 2.4.15pre1/include/linux/errno.h atomic-lookup/include/linux/errno.h
+--- 2.4.15pre1/include/linux/errno.h Fri Aug 17 05:02:27 2001
++++ atomic-lookup/include/linux/errno.h Fri Nov 9 04:34:12 2001
+@@ -21,6 +21,9 @@
+ #define EBADTYPE 527 /* Type not supported by server */
+ #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
+
++/* Defined for TUX async IO */
++#define EWOULDBLOCKIO 530 /* Would block due to block-IO */
++
+ #endif
+
+ #endif
+diff -urN 2.4.15pre1/include/linux/fs.h atomic-lookup/include/linux/fs.h
+--- 2.4.15pre1/include/linux/fs.h Tue Nov 6 02:04:53 2001
++++ atomic-lookup/include/linux/fs.h Fri Nov 9 04:34:12 2001
+@@ -1260,6 +1260,7 @@
+ #define LOOKUP_POSITIVE (8)
+ #define LOOKUP_PARENT (16)
+ #define LOOKUP_NOALT (32)
++#define LOOKUP_ATOMIC (64)
+ /*
+ * Type of the last component on LOOKUP_PARENT
+ */
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1
new file mode 100644
index 000000000000..fee75a43d12a
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1
@@ -0,0 +1,183 @@
+diff -urN ref/include/linux/socket.h netexports/include/linux/socket.h
+--- ref/include/linux/socket.h Sat Apr 28 20:29:47 2001
++++ netexports/include/linux/socket.h Sun Apr 29 17:28:53 2001
+@@ -254,6 +254,11 @@
+ extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen);
+ extern int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr);
+ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
++struct socket;
++struct file * sock_map_file(struct socket *sock);
++extern int sock_map_fd(struct socket *sock);
++extern struct socket *sockfd_lookup(int fd, int *err);
++
+ #endif
+ #endif /* not kernel and not glibc */
+ #endif /* _LINUX_SOCKET_H */
+diff -urN ref/include/net/tcp.h netexports/include/net/tcp.h
+--- ref/include/net/tcp.h Sat Apr 28 20:34:59 2001
++++ netexports/include/net/tcp.h Sun Apr 29 17:28:53 2001
+@@ -810,6 +810,7 @@
+ extern void tcp_push_one(struct sock *, unsigned mss_now);
+ extern void tcp_send_ack(struct sock *sk);
+ extern void tcp_send_delayed_ack(struct sock *sk);
++extern void cleanup_rbuf(struct sock *sk, int copied);
+
+ /* tcp_timer.c */
+ extern void tcp_init_xmit_timers(struct sock *);
+diff -urN ref/net/ipv4/tcp.c netexports/net/ipv4/tcp.c
+--- ref/net/ipv4/tcp.c Sat Apr 28 05:24:49 2001
++++ netexports/net/ipv4/tcp.c Sun Apr 29 17:28:53 2001
+@@ -1270,7 +1270,7 @@
+ * calculation of whether or not we must ACK for the sake of
+ * a window update.
+ */
+-static void cleanup_rbuf(struct sock *sk, int copied)
++void cleanup_rbuf(struct sock *sk, int copied)
+ {
+ struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
+ int time_to_ack = 0;
+diff -urN ref/net/netsyms.c netexports/net/netsyms.c
+--- ref/net/netsyms.c Sat Apr 28 05:24:49 2001
++++ netexports/net/netsyms.c Sun Apr 29 17:28:53 2001
+@@ -106,6 +106,8 @@
+ EXPORT_SYMBOL(sock_create);
+ EXPORT_SYMBOL(sock_alloc);
+ EXPORT_SYMBOL(sock_release);
++EXPORT_SYMBOL(sock_map_fd);
++EXPORT_SYMBOL(sockfd_lookup);
+ EXPORT_SYMBOL(sock_setsockopt);
+ EXPORT_SYMBOL(sock_getsockopt);
+ EXPORT_SYMBOL(sock_sendmsg);
+@@ -307,6 +309,7 @@
+ EXPORT_SYMBOL(memcpy_fromiovecend);
+ EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
+ EXPORT_SYMBOL(tcp_v4_lookup_listener);
++EXPORT_SYMBOL(cleanup_rbuf);
+ /* UDP/TCP exported functions for TCPv6 */
+ EXPORT_SYMBOL(udp_ioctl);
+ EXPORT_SYMBOL(udp_connect);
+@@ -324,6 +327,7 @@
+ EXPORT_SYMBOL(tcp_getsockopt);
+ EXPORT_SYMBOL(tcp_recvmsg);
+ EXPORT_SYMBOL(tcp_send_synack);
++EXPORT_SYMBOL(tcp_send_skb);
+ EXPORT_SYMBOL(tcp_check_req);
+ EXPORT_SYMBOL(tcp_child_process);
+ EXPORT_SYMBOL(tcp_parse_options);
+diff -urN ref/net/socket.c netexports/net/socket.c
+--- ref/net/socket.c Sat Apr 28 05:24:50 2001
++++ netexports/net/socket.c Sun Apr 29 17:28:53 2001
+@@ -114,7 +114,7 @@
+ * in the operation structures but are done directly via the socketcall() multiplexor.
+ */
+
+-static struct file_operations socket_file_ops = {
++struct file_operations socket_file_ops = {
+ llseek: sock_lseek,
+ read: sock_read,
+ write: sock_write,
+@@ -330,51 +330,62 @@
+ * but we take care of internal coherence yet.
+ */
+
+-static int sock_map_fd(struct socket *sock)
++struct file * sock_map_file(struct socket *sock)
+ {
+- int fd;
++ struct file *file;
+ struct qstr this;
+ char name[32];
+
++ file = get_empty_filp();
++
++ if (!file)
++ return ERR_PTR(-ENFILE);
++
++ sprintf(name, "[%lu]", sock->inode->i_ino);
++ this.name = name;
++ this.len = strlen(name);
++ this.hash = sock->inode->i_ino;
++
++ file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
++ if (!file->f_dentry) {
++ put_filp(file);
++ return ERR_PTR(-ENOMEM);
++ }
++ file->f_dentry->d_op = &sockfs_dentry_operations;
++ d_add(file->f_dentry, sock->inode);
++ file->f_vfsmnt = mntget(sock_mnt);
++
++ if (sock->file)
++ BUG();
++ sock->file = file;
++ file->f_op = sock->inode->i_fop = &socket_file_ops;
++ file->f_mode = 3;
++ file->f_flags = O_RDWR;
++ file->f_pos = 0;
++
++ return file;
++}
++
++int sock_map_fd(struct socket *sock)
++{
++ int fd;
++ struct file *file;
++
+ /*
+ * Find a file descriptor suitable for return to the user.
+ */
+
+ fd = get_unused_fd();
+- if (fd >= 0) {
+- struct file *file = get_empty_filp();
+-
+- if (!file) {
+- put_unused_fd(fd);
+- fd = -ENFILE;
+- goto out;
+- }
++ if (fd < 0)
++ return fd;
+
+- sprintf(name, "[%lu]", sock->inode->i_ino);
+- this.name = name;
+- this.len = strlen(name);
+- this.hash = sock->inode->i_ino;
+-
+- file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
+- if (!file->f_dentry) {
+- put_filp(file);
+- put_unused_fd(fd);
+- fd = -ENOMEM;
+- goto out;
+- }
+- file->f_dentry->d_op = &sockfs_dentry_operations;
+- d_add(file->f_dentry, sock->inode);
+- file->f_vfsmnt = mntget(sock_mnt);
+-
+- sock->file = file;
+- file->f_op = sock->inode->i_fop = &socket_file_ops;
+- file->f_mode = 3;
+- file->f_flags = O_RDWR;
+- file->f_pos = 0;
+- fd_install(fd, file);
++ file = sock_map_file(sock);
++ if (IS_ERR(file)) {
++ put_unused_fd(fd);
++ return PTR_ERR(file);
+ }
++ fd_install(fd, file);
+
+-out:
+ return fd;
+ }
+
+@@ -801,6 +812,8 @@
+ }
+
+ out:
++ if (sock->sk != sk)
++ BUG();
+ release_sock(sock->sk);
+ return 0;
+ }
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-3 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-3
new file mode 100644
index 000000000000..e5b2e9b6264a
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-3
@@ -0,0 +1,69 @@
+diff -urN pagecache-atomic-ref/include/linux/fs.h pagecache-atomic/include/linux/fs.h
+--- pagecache-atomic-ref/include/linux/fs.h Mon Aug 13 03:21:42 2001
++++ pagecache-atomic/include/linux/fs.h Mon Aug 13 03:22:41 2001
+@@ -1370,7 +1370,9 @@
+ extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size);
+ extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *);
+ extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *);
+-extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t);
++extern void __do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t, int);
++#define do_generic_file_read(filp, ppos, desc, actor) __do_generic_file_read(filp, ppos, desc, actor, 0)
++#define do_generic_file_read_atomic(filp, ppos, desc, actor) __do_generic_file_read(filp, ppos, desc, actor, 1)
+ extern loff_t no_llseek(struct file *file, loff_t offset, int origin);
+ extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin);
+ extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *);
+diff -urN pagecache-atomic-ref/kernel/ksyms.c pagecache-atomic/kernel/ksyms.c
+--- pagecache-atomic-ref/kernel/ksyms.c Mon Aug 13 03:21:42 2001
++++ pagecache-atomic/kernel/ksyms.c Mon Aug 13 03:21:54 2001
+@@ -208,7 +208,7 @@
+ EXPORT_SYMBOL(generic_block_bmap);
+ EXPORT_SYMBOL(waitfor_one_page);
+ EXPORT_SYMBOL(generic_file_read);
+-EXPORT_SYMBOL(do_generic_file_read);
++EXPORT_SYMBOL(__do_generic_file_read);
+ EXPORT_SYMBOL(generic_file_write);
+ EXPORT_SYMBOL(generic_direct_IO);
+ EXPORT_SYMBOL(generic_file_mmap);
+diff -urN pagecache-atomic-ref/mm/filemap.c pagecache-atomic/mm/filemap.c
+--- pagecache-atomic-ref/mm/filemap.c Mon Aug 13 03:21:42 2001
++++ pagecache-atomic/mm/filemap.c Mon Aug 13 03:21:54 2001
+@@ -1128,7 +1128,7 @@
+ * This is really ugly. But the goto's actually try to clarify some
+ * of the logic when it comes to error handling etc.
+ */
+-void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor)
++void __do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor, int nonblock)
+ {
+ struct inode *inode = filp->f_dentry->d_inode;
+ struct address_space *mapping = inode->i_mapping;
+@@ -1211,9 +1211,16 @@
+ page_cache_get(page);
+ spin_unlock(&pagecache_lock);
+
+- if (!Page_Uptodate(page))
++ if (!Page_Uptodate(page)) {
++ if (nonblock) {
++ page_cache_release(page);
++ desc->error = -EWOULDBLOCKIO;
++ break;
++ }
+ goto page_not_up_to_date;
+- generic_file_readahead(reada_ok, filp, inode, page);
++ }
++ if (!nonblock)
++ generic_file_readahead(reada_ok, filp, inode, page);
+ page_ok:
+ /* If users can be writing to this page using arbitrary
+ * virtual addresses, take care about potential aliasing
+@@ -1290,6 +1297,11 @@
+ break;
+
+ no_cached_page:
++ if (nonblock) {
++ spin_unlock(&pagecache_lock);
++ desc->error = -EWOULDBLOCKIO;
++ break;
++ }
+ /*
+ * Ok, it wasn't cached, so we need to create a new
+ * page..
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2
new file mode 100644
index 000000000000..4625013db74c
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2
Binary files differ
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-1
new file mode 100644
index 000000000000..13692e77c87a
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-1
@@ -0,0 +1,22 @@
+diff -rNu linux-2.4.9-ac5/net/Config.in linux/net/Config.in
+--- linux-2.4.9-ac5/net/Config.in Tue Aug 21 14:26:10 2001
++++ linux/net/Config.in Fri Aug 31 17:36:42 2001
+@@ -20,6 +20,7 @@
+ tristate 'Unix domain sockets' CONFIG_UNIX
+ bool 'TCP/IP networking' CONFIG_INET
+ if [ "$CONFIG_INET" = "y" ]; then
++ source net/tux/Config.in
+ source net/ipv4/Config.in
+ if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+ # IPv6 as module will cause a CRASH if you try to unload it
+diff -rNu linux-2.4.9-ac5/net/Makefile linux/net/Makefile
+--- linux-2.4.9-ac5/net/Makefile Tue Aug 21 14:26:19 2001
++++ linux/net/Makefile Fri Aug 31 17:36:42 2001
+@@ -26,6 +26,7 @@
+ endif
+ endif
+
++subdir-$(CONFIG_TUX) += tux
+ subdir-$(CONFIG_KHTTPD) += khttpd
+ subdir-$(CONFIG_NETLINK) += netlink
+ subdir-$(CONFIG_PACKET) += packet
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-1
new file mode 100644
index 000000000000..01b63bd39dc3
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-1
@@ -0,0 +1,23 @@
+diff -rNu linux-2.4.9-ac10/include/net/sock.h linux/include/net/sock.h
+--- linux-2.4.9-ac10/include/net/sock.h Sat Sep 8 16:35:03 2001
++++ linux/include/net/sock.h Sat Sep 22 09:49:09 2001
+@@ -677,6 +677,7 @@
+
+ int (*backlog_rcv) (struct sock *sk,
+ struct sk_buff *skb);
++ void (*create_child)(struct sock *sk, struct sock *newsk);
+ void (*destruct)(struct sock *sk);
+ };
+
+diff -rNu linux-2.4.9-ac10/net/ipv4/tcp_minisocks.c linux/net/ipv4/tcp_minisocks.c
+--- linux-2.4.9-ac10/net/ipv4/tcp_minisocks.c Sat Sep 8 16:35:04 2001
++++ linux/net/ipv4/tcp_minisocks.c Sat Sep 22 09:49:09 2001
+@@ -682,6 +682,8 @@
+ if ((filter = newsk->filter) != NULL)
+ sk_filter_charge(newsk, filter);
+ #endif
++ if (sk->create_child)
++ sk->create_child(sk, newsk);
+
+ /* Now setup tcp_opt */
+ newtp = &(newsk->tp_pinfo.af_tcp);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-1
new file mode 100644
index 000000000000..07f7287a6a9f
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-1
@@ -0,0 +1,12 @@
+--- tuxlayerprivate/include/net/sock.h.~1~ Sat Apr 28 20:32:17 2001
++++ tuxlayerprivate/include/net/sock.h Sun Apr 29 16:44:42 2001
+@@ -669,6 +669,9 @@
+ /* RPC layer private data */
+ void *user_data;
+
++ /* TUX application layer private data */
++ void *tux_data;
++
+ /* Callbacks */
+ void (*state_change)(struct sock *sk);
+ void (*data_ready)(struct sock *sk,int bytes);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-2
new file mode 100644
index 000000000000..dc05bb6d9b6b
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-2
@@ -0,0 +1,40 @@
+diff -urN 2.4.10pre14/net/netsyms.c tux-dprintk/net/netsyms.c
+--- 2.4.10pre14/net/netsyms.c Sat Sep 22 08:06:26 2001
++++ tux-dprintk/net/netsyms.c Sat Sep 22 11:07:38 2001
+@@ -24,6 +24,7 @@
+ #include <net/checksum.h>
+ #include <linux/etherdevice.h>
+ #include <net/route.h>
++#include <net/tux.h>
+ #ifdef CONFIG_HIPPI
+ #include <linux/hippidevice.h>
+ #endif
+@@ -574,3 +575,7 @@
+ EXPORT_SYMBOL(softnet_data);
+
+ #endif /* CONFIG_NET */
++
++EXPORT_SYMBOL(tux_Dprintk);
++EXPORT_SYMBOL(tux_TDprintk);
++
+diff -urN 2.4.10pre14/net/socket.c tux-dprintk/net/socket.c
+--- 2.4.10pre14/net/socket.c Sat Sep 22 08:06:26 2001
++++ tux-dprintk/net/socket.c Sat Sep 22 11:08:27 2001
+@@ -85,6 +85,7 @@
+ #include <net/tcp.h>
+ #include <net/udp.h>
+ #include <net/scm.h>
++#include <net/tux.h>
+ #include <linux/netfilter.h>
+
+ static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
+@@ -1738,6 +1739,9 @@
+ bluez_init();
+ #endif
+ }
++
++int tux_Dprintk;
++int tux_TDprintk = 1;
+
+ int socket_get_info(char *buffer, char **start, off_t offset, int length)
+ {
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1
new file mode 100644
index 000000000000..67061fc3cc79
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1
@@ -0,0 +1,122 @@
+diff -urN 2.4.4/arch/alpha/kernel/alpha_ksyms.c exports/arch/alpha/kernel/alpha_ksyms.c
+--- 2.4.4/arch/alpha/kernel/alpha_ksyms.c Sat Apr 28 05:24:29 2001
++++ exports/arch/alpha/kernel/alpha_ksyms.c Sun Apr 29 18:55:56 2001
+@@ -127,15 +127,11 @@
+ /* In-kernel system calls. */
+ EXPORT_SYMBOL(kernel_thread);
+ EXPORT_SYMBOL(sys_open);
+-EXPORT_SYMBOL(sys_dup);
+ EXPORT_SYMBOL(sys_exit);
+-EXPORT_SYMBOL(sys_write);
+-EXPORT_SYMBOL(sys_read);
+ EXPORT_SYMBOL(sys_lseek);
+ EXPORT_SYMBOL(__kernel_execve);
+ EXPORT_SYMBOL(sys_setsid);
+ EXPORT_SYMBOL(sys_sync);
+-EXPORT_SYMBOL(sys_wait4);
+
+ /* Networking helper routines. */
+ EXPORT_SYMBOL(csum_tcpudp_magic);
+diff -urN 2.4.4/include/asm-alpha/unistd.h exports/include/asm-alpha/unistd.h
+--- 2.4.4/include/asm-alpha/unistd.h Sun Apr 1 20:11:24 2001
++++ exports/include/asm-alpha/unistd.h Sun Apr 29 18:55:53 2001
+@@ -515,7 +515,7 @@
+ return sys_open(name, mode, flags);
+ }
+
+-extern long sys_dup(int);
++extern long sys_dup(unsigned int);
+ static inline long dup(int fd)
+ {
+ return sys_dup(fd);
+@@ -540,13 +540,11 @@
+
+ #define exit(x) _exit(x)
+
+-extern long sys_write(int, const char *, int);
+ static inline long write(int fd, const char * buf, int nr)
+ {
+ return sys_write(fd, buf, nr);
+ }
+
+-extern long sys_read(int, char *, int);
+ static inline long read(int fd, char * buf, int nr)
+ {
+ return sys_read(fd, buf, nr);
+diff -urN 2.4.4/include/linux/fs.h exports/include/linux/fs.h
+--- 2.4.4/include/linux/fs.h Sat Apr 28 05:24:47 2001
++++ exports/include/linux/fs.h Sun Apr 29 18:55:53 2001
+@@ -554,6 +554,14 @@
+
+ extern int fcntl_getlk(unsigned int, struct flock *);
+ extern int fcntl_setlk(unsigned int, unsigned int, struct flock *);
++extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg);
++extern asmlinkage long sys_dup(unsigned int fildes);
++extern asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd);
++extern asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count);
++extern asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count);
++extern asmlinkage long sys_chroot(const char * filename);
++extern asmlinkage long sys_chdir(const char * filename);
++
+
+ extern int fcntl_getlk64(unsigned int, struct flock64 *);
+ extern int fcntl_setlk64(unsigned int, unsigned int, struct flock64 *);
+diff -urN 2.4.4/kernel/ksyms.c exports/kernel/ksyms.c
+--- 2.4.4/kernel/ksyms.c Sat Apr 28 05:24:48 2001
++++ exports/kernel/ksyms.c Sun Apr 29 18:55:53 2001
+@@ -9,6 +9,7 @@
+ * by Bjorn Ekwall <bj0rn@blox.se>
+ */
+
++#define __KERNEL_SYSCALLS__
+ #include <linux/config.h>
+ #include <linux/slab.h>
+ #include <linux/module.h>
+@@ -48,6 +49,8 @@
+ #include <linux/tty.h>
+ #include <linux/in6.h>
+ #include <asm/checksum.h>
++#include <linux/unistd.h>
++
+
+ #if defined(CONFIG_PROC_FS)
+ #include <linux/proc_fs.h>
+@@ -149,6 +152,13 @@
+ EXPORT_SYMBOL(lookup_one);
+ EXPORT_SYMBOL(lookup_hash);
+ EXPORT_SYMBOL(sys_close);
++EXPORT_SYMBOL(sys_read);
++EXPORT_SYMBOL(sys_write);
++EXPORT_SYMBOL(sys_dup);
++EXPORT_SYMBOL(sys_chroot);
++EXPORT_SYMBOL(sys_chdir);
++EXPORT_SYMBOL(sys_fcntl);
++EXPORT_SYMBOL(do_pipe);
+ EXPORT_SYMBOL(dcache_lock);
+ EXPORT_SYMBOL(d_alloc_root);
+ EXPORT_SYMBOL(d_delete);
+@@ -176,6 +186,7 @@
+ EXPORT_SYMBOL(invalidate_inodes);
+ EXPORT_SYMBOL(invalidate_inode_pages);
+ EXPORT_SYMBOL(truncate_inode_pages);
++EXPORT_SYMBOL(invalidate_inode_pages2);
+ EXPORT_SYMBOL(fsync_dev);
+ EXPORT_SYMBOL(permission);
+ EXPORT_SYMBOL(vfs_permission);
+@@ -358,6 +369,8 @@
+ EXPORT_SYMBOL(add_wait_queue_exclusive);
+ EXPORT_SYMBOL(remove_wait_queue);
+
++EXPORT_SYMBOL(flush_signal_handlers);
++
+ /* The notion of irq probe/assignment is foreign to S/390 */
+
+ #if !defined(CONFIG_ARCH_S390)
+@@ -427,6 +440,7 @@
+ EXPORT_SYMBOL(interruptible_sleep_on_timeout);
+ EXPORT_SYMBOL(schedule);
+ EXPORT_SYMBOL(schedule_timeout);
++EXPORT_SYMBOL(sys_wait4);
+ EXPORT_SYMBOL(jiffies);
+ EXPORT_SYMBOL(xtime);
+ EXPORT_SYMBOL(do_gettimeofday);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2
new file mode 100644
index 000000000000..e4edc9d743e6
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2
@@ -0,0 +1,136 @@
+diff -urN 2.4.5pre5/fs/proc/proc_misc.c tux-kstat/fs/proc/proc_misc.c
+--- 2.4.5pre5/fs/proc/proc_misc.c Tue May 1 19:35:29 2001
++++ tux-kstat/fs/proc/proc_misc.c Wed May 23 19:07:26 2001
+@@ -259,6 +259,66 @@
+ }
+ #endif
+
++
++/*
++ * print out TUX internal statistics into /proc/stat.
++ * (Most of them are not maintained if CONFIG_TUX_DEBUG is off.)
++ */
++
++static int print_tux_procinfo (char *page)
++{
++ unsigned int len = 0, i;
++
++#define P(x) \
++ do { len += sprintf(page + len, #x ": %u\n", x); } while(0)
++
++ P(kstat.input_fastpath);
++ P(kstat.input_slowpath);
++ P(kstat.inputqueue_got_packet);
++ P(kstat.inputqueue_no_packet);
++ P(kstat.nr_keepalive_optimized);
++ P(kstat.parse_static_incomplete);
++ P(kstat.parse_static_redirect);
++ P(kstat.parse_static_cachemiss);
++ P(kstat.parse_static_nooutput);
++ P(kstat.parse_static_normal);
++ P(kstat.parse_dynamic_incomplete);
++ P(kstat.parse_dynamic_redirect);
++ P(kstat.parse_dynamic_cachemiss);
++ P(kstat.parse_dynamic_nooutput);
++ P(kstat.parse_dynamic_normal);
++ P(kstat.complete_parsing);
++ P(kstat.nr_free_pending);
++ P(kstat.nr_allocated);
++ P(kstat.nr_idle_input_pending);
++ P(kstat.nr_output_space_pending);
++ P(kstat.nr_input_pending);
++ P(kstat.nr_cachemiss_pending);
++ P(kstat.nr_secondary_pending);
++ P(kstat.nr_output_pending);
++ P(kstat.nr_redirect_pending);
++ P(kstat.nr_finish_pending);
++ P(kstat.nr_userspace_pending);
++ P(kstat.nr_postpone_pending);
++ P(kstat.static_lookup_cachemisses);
++ P(kstat.static_sendfile_cachemisses);
++ P(kstat.user_lookup_cachemisses);
++ P(kstat.user_fetch_cachemisses);
++ P(kstat.user_sendobject_cachemisses);
++ P(kstat.user_sendobject_write_misses);
++ P(kstat.nr_keepalive_reqs);
++ P(kstat.nr_nonkeepalive_reqs);
++
++ len += sprintf(page + len, "keephist: ");
++ for (i = 0; i < KEEPALIVE_HIST_SIZE; i++)
++ if (kstat.keepalive_hist[i])
++ len += sprintf(page + len, "%d(%d) ",
++ i, kstat.keepalive_hist[i]);
++ len += sprintf(page + len, "\n");
++#undef P
++
++ return len;
++}
+ static int kstat_read_proc(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+ {
+@@ -333,6 +393,8 @@
+ kstat.context_swtch,
+ xtime.tv_sec - jif / HZ,
+ total_forks);
++
++ len += print_tux_procinfo(page+len);
+
+ return proc_calc_metrics(page, start, off, count, eof, len);
+ }
+diff -urN 2.4.5pre5/include/linux/kernel_stat.h tux-kstat/include/linux/kernel_stat.h
+--- 2.4.5pre5/include/linux/kernel_stat.h Tue May 15 21:40:17 2001
++++ tux-kstat/include/linux/kernel_stat.h Wed May 23 19:06:38 2001
+@@ -33,6 +33,53 @@
+ unsigned int ierrors, oerrors;
+ unsigned int collisions;
+ unsigned int context_swtch;
++ unsigned int context_swtch_cross;
++ unsigned int nr_free_pending;
++ unsigned int nr_allocated;
++ unsigned int nr_idle_input_pending;
++ unsigned int nr_output_space_pending;
++ unsigned int nr_work_pending;
++ unsigned int nr_input_pending;
++ unsigned int nr_cachemiss_pending;
++ unsigned int nr_secondary_pending;
++ unsigned int nr_output_pending;
++ unsigned int nr_redirect_pending;
++ unsigned int nr_postpone_pending;
++ unsigned int nr_finish_pending;
++ unsigned int nr_userspace_pending;
++ unsigned int static_lookup_cachemisses;
++ unsigned int static_sendfile_cachemisses;
++ unsigned int user_lookup_cachemisses;
++ unsigned int user_fetch_cachemisses;
++ unsigned int user_sendobject_cachemisses;
++ unsigned int user_sendobject_write_misses;
++ unsigned int user_sendbuf_cachemisses;
++ unsigned int user_sendbuf_write_misses;
++#define URL_HIST_SIZE 1000
++ unsigned int url_hist_hits[URL_HIST_SIZE];
++ unsigned int url_hist_misses[URL_HIST_SIZE];
++ unsigned int input_fastpath;
++ unsigned int input_slowpath;
++ unsigned int inputqueue_got_packet;
++ unsigned int inputqueue_no_packet;
++ unsigned int nr_keepalive_optimized;
++
++ unsigned int parse_static_incomplete;
++ unsigned int parse_static_redirect;
++ unsigned int parse_static_cachemiss;
++ unsigned int parse_static_nooutput;
++ unsigned int parse_static_normal;
++ unsigned int parse_dynamic_incomplete;
++ unsigned int parse_dynamic_redirect;
++ unsigned int parse_dynamic_cachemiss;
++ unsigned int parse_dynamic_nooutput;
++ unsigned int parse_dynamic_normal;
++ unsigned int complete_parsing;
++
++ unsigned int nr_keepalive_reqs;
++ unsigned int nr_nonkeepalive_reqs;
++#define KEEPALIVE_HIST_SIZE 100
++ unsigned int keepalive_hist[KEEPALIVE_HIST_SIZE];
+ };
+
+ extern struct kernel_stat kstat;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-1
new file mode 100644
index 000000000000..2284d3d4f5b9
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-1
@@ -0,0 +1,42 @@
+diff -urN ref/include/linux/sched.h tuxsched/include/linux/sched.h
+--- ref/include/linux/sched.h Sat Apr 28 20:29:48 2001
++++ tuxsched/include/linux/sched.h Sun Apr 29 17:31:18 2001
+@@ -396,6 +396,10 @@
+ int (*notifier)(void *priv);
+ void *notifier_data;
+ sigset_t *notifier_mask;
++
++ /* TUX state */
++ void *tux_info;
++ void (*tux_exit)(void);
+
+ /* Thread group tracking */
+ u32 parent_exec_id;
+diff -urN ref/kernel/exit.c tuxsched/kernel/exit.c
+--- ref/kernel/exit.c Sat Apr 28 18:37:45 2001
++++ tuxsched/kernel/exit.c Sun Apr 29 17:30:54 2001
+@@ -439,6 +439,13 @@
+ #ifdef CONFIG_BSD_PROCESS_ACCT
+ acct_process(code);
+ #endif
++ if (current->tux_info) {
++#ifdef CONFIG_TUX_DEBUG
++ printk("Possibly unexpected TUX-thread exit(%ld) at %p?\n",
++ code, __builtin_return_address(0));
++#endif
++ current->tux_exit();
++ }
+ __exit_mm(tsk);
+
+ lock_kernel();
+diff -urN ref/kernel/fork.c tuxsched/kernel/fork.c
+--- ref/kernel/fork.c Sat Apr 28 18:37:45 2001
++++ tuxsched/kernel/fork.c Sun Apr 29 17:30:54 2001
+@@ -574,6 +574,7 @@
+ goto fork_out;
+
+ *p = *current;
++ p->tux_info = NULL;
+
+ retval = -EAGAIN;
+ if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur)
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2
new file mode 100644
index 000000000000..040043331c7b
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2
@@ -0,0 +1,113 @@
+diff -urN 2.4.11pre2/arch/alpha/kernel/entry.S tux-syscall/arch/alpha/kernel/entry.S
+--- 2.4.11pre2/arch/alpha/kernel/entry.S Sat Aug 11 08:03:53 2001
++++ tux-syscall/arch/alpha/kernel/entry.S Tue Oct 2 23:58:24 2001
+@@ -988,7 +988,15 @@
+ .quad alpha_ni_syscall
+ .quad alpha_ni_syscall /* 220 */
+ .quad alpha_ni_syscall
++#ifdef CONFIG_TUX
++ .quad __sys_tux
++#else
++# ifdef CONFIG_TUX_MODULE
++ .quad sys_tux
++# else
+ .quad alpha_ni_syscall
++# endif
++#endif
+ .quad alpha_ni_syscall
+ .quad alpha_ni_syscall
+ .quad alpha_ni_syscall /* 225 */
+diff -urN 2.4.11pre2/arch/i386/kernel/entry.S tux-syscall/arch/i386/kernel/entry.S
+--- 2.4.11pre2/arch/i386/kernel/entry.S Tue Oct 2 00:08:30 2001
++++ tux-syscall/arch/i386/kernel/entry.S Tue Oct 2 23:58:56 2001
+@@ -619,7 +619,15 @@
+ .long SYMBOL_NAME(sys_madvise)
+ .long SYMBOL_NAME(sys_getdents64) /* 220 */
+ .long SYMBOL_NAME(sys_fcntl64)
++#ifdef CONFIG_TUX
++ .long SYMBOL_NAME(__sys_tux)
++#else
++# ifdef CONFIG_TUX_MODULE
++ .long SYMBOL_NAME(sys_tux)
++# else
+ .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */
++# endif
++#endif
+ .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */
+
+ .rept NR_syscalls-(.-sys_call_table)/4
+diff -urN 2.4.11pre2/net/netsyms.c tux-syscall/net/netsyms.c
+--- 2.4.11pre2/net/netsyms.c Sun Sep 23 21:11:43 2001
++++ tux-syscall/net/netsyms.c Tue Oct 2 23:58:24 2001
+@@ -55,7 +55,7 @@
+
+ extern struct net_proto_family inet_family_ops;
+
+-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE)
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_TUX) || defined (CONFIG_TUX_MODULE)
+ #include <linux/in6.h>
+ #include <linux/icmpv6.h>
+ #include <net/ipv6.h>
+@@ -285,7 +285,7 @@
+ EXPORT_SYMBOL(register_inet6addr_notifier);
+ EXPORT_SYMBOL(unregister_inet6addr_notifier);
+ #endif
+-#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE)
++#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_TUX) || defined (CONFIG_TUX_MODULE)
+ /* inet functions common to v4 and v6 */
+ EXPORT_SYMBOL(inet_release);
+ EXPORT_SYMBOL(inet_stream_connect);
+@@ -572,5 +572,11 @@
+
+ EXPORT_SYMBOL(net_call_rx_atomic);
+ EXPORT_SYMBOL(softnet_data);
++
++#ifdef CONFIG_TUX_MODULE
++EXPORT_SYMBOL(tux_module_lock);
++EXPORT_SYMBOL(tux_module);
++EXPORT_SYMBOL(sys_tux_ptr);
++#endif
+
+ #endif /* CONFIG_NET */
+diff -urN 2.4.11pre2/net/socket.c tux-syscall/net/socket.c
+--- 2.4.11pre2/net/socket.c Tue Oct 2 00:08:46 2001
++++ tux-syscall/net/socket.c Tue Oct 2 23:58:24 2001
+@@ -1764,3 +1764,38 @@
+ len = 0;
+ return len;
+ }
++
++#ifdef CONFIG_TUX_MODULE
++
++int (*sys_tux_ptr) (unsigned int action, user_req_t *u_info) = NULL;
++
++struct module *tux_module = NULL;
++spinlock_t tux_module_lock = SPIN_LOCK_UNLOCKED;
++
++asmlinkage int sys_tux (unsigned int action, user_req_t *u_info)
++{
++ int ret;
++
++ if (current->tux_info)
++ return sys_tux_ptr(action, u_info);
++
++ ret = -ENOSYS;
++ spin_lock(&tux_module_lock);
++ if (!tux_module)
++ goto out_unlock;
++ __MOD_INC_USE_COUNT(tux_module);
++ spin_unlock(&tux_module_lock);
++
++ if (!sys_tux_ptr)
++ TUX_BUG();
++ ret = sys_tux_ptr(action, u_info);
++
++ spin_lock(&tux_module_lock);
++ __MOD_DEC_USE_COUNT(tux_module);
++out_unlock:
++ spin_unlock(&tux_module_lock);
++
++ return ret;
++}
++
++#endif
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-2
new file mode 100644
index 000000000000..26b98e71b68a
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-2
@@ -0,0 +1,69 @@
+diff -urN 2.4.5pre5/include/linux/sysctl.h tux-sysctl/include/linux/sysctl.h
+--- 2.4.5pre5/include/linux/sysctl.h Tue May 22 22:04:27 2001
++++ tux-sysctl/include/linux/sysctl.h Wed May 23 19:20:48 2001
+@@ -157,7 +157,8 @@
+ NET_TR=14,
+ NET_DECNET=15,
+ NET_ECONET=16,
+- NET_KHTTPD=17
++ NET_KHTTPD=17,
++ NET_TUX=18
+ };
+
+ /* /proc/sys/kernel/random */
+@@ -471,6 +472,55 @@
+ NET_DECNET_DST_GC_INTERVAL = 9,
+ NET_DECNET_CONF = 10,
+ NET_DECNET_DEBUG_LEVEL = 255
++};
++
++/* /proc/sys/net/tux/ */
++enum {
++ NET_TUX_DOCROOT = 1,
++ NET_TUX_LOGFILE = 2,
++ NET_TUX_EXTCGI = 3,
++ NET_TUX_STOP = 4,
++ NET_TUX_CLIENTPORT = 5,
++ NET_TUX_LOGGING = 6,
++ NET_TUX_SERVERPORT = 7,
++ NET_TUX_THREADS = 8,
++ NET_TUX_KEEPALIVE_TIMEOUT = 9,
++ NET_TUX_MAX_KEEPALIVE_BW = 10,
++ NET_TUX_DEFER_ACCEPT = 11,
++ NET_TUX_MAX_FREE_REQUESTS = 12,
++ NET_TUX_MAX_CONNECT = 13,
++ NET_TUX_MAX_BACKLOG = 14,
++ NET_TUX_MODE_FORBIDDEN = 15,
++ NET_TUX_MODE_ALLOWED = 16,
++ NET_TUX_MODE_USERSPACE = 17,
++ NET_TUX_MODE_CGI = 18,
++ NET_TUX_CGI_UID = 19,
++ NET_TUX_CGI_GID = 20,
++ NET_TUX_CGIROOT = 21,
++ NET_TUX_LOGENTRY_ALIGN_ORDER = 22,
++ NET_TUX_NONAGLE = 23,
++ NET_TUX_ACK_PINGPONG = 24,
++ NET_TUX_PUSH_ALL = 25,
++ NET_TUX_ZEROCOPY_PARSE = 26,
++ NET_CONFIG_TUX_DEBUG_BLOCKING = 27,
++ NET_TUX_PAGE_AGE_START = 28,
++ NET_TUX_PAGE_AGE_ADV = 29,
++ NET_TUX_PAGE_AGE_MAX = 30,
++ NET_TUX_VIRTUAL_SERVER = 31,
++ NET_TUX_MAX_OBJECT_SIZE = 32,
++ NET_TUX_COMPRESSION = 33,
++ NET_TUX_NOID = 34,
++ NET_TUX_CGI_INHERIT_CPU = 35,
++ NET_TUX_CGI_CPU_MASK = 36,
++ NET_TUX_ZEROCOPY_HEADER = 37,
++ NET_TUX_ZEROCOPY_SENDFILE = 38,
++ NET_TUX_ALL_USERSPACE = 39,
++ NET_TUX_REDIRECT_LOGGING = 40,
++ NET_TUX_REFERER_LOGGING = 41,
++ NET_TUX_MAX_HEADER_LEN = 42,
++ NET_TUX_404_PAGE = 43,
++ NET_TUX_APPLICATION_PROTOCOL = 44,
++ NET_TUX_MAX_KEEPALIVES = 45,
+ };
+
+ /* /proc/sys/net/khttpd/ */
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-1
new file mode 100644
index 000000000000..df7850171025
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-1
@@ -0,0 +1,11 @@
+--- 2.4.11pre2aa1/include/linux/timer.h.~1~ Wed Oct 3 00:35:01 2001
++++ 2.4.11pre2aa1/include/linux/timer.h Wed Oct 3 00:44:55 2001
+@@ -20,6 +20,8 @@
+ void (*function)(unsigned long);
+ };
+
++typedef struct timer_list timer_t;
++
+ extern void add_timer(struct timer_list * timer);
+ extern int del_timer(struct timer_list * timer);
+
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2
new file mode 100644
index 000000000000..da98d2536a4a
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2
@@ -0,0 +1,79 @@
+diff -urN ref/fs/dcache.c new/fs/dcache.c
+--- ref/fs/dcache.c Sat Jun 9 00:04:48 2001
++++ new/fs/dcache.c Tue Jun 12 15:44:51 2001
+@@ -62,6 +62,10 @@
+ dentry->d_op->d_release(dentry);
+ if (dname_external(dentry))
+ kfree(dentry->d_name.name);
++ if (dentry->d_tux_data) {
++ kfree(dentry->d_tux_data);
++ dentry->d_tux_data = NULL;
++ }
+ kmem_cache_free(dentry_cache, dentry);
+ dentry_stat.nr_dentry--;
+ }
+@@ -616,6 +620,7 @@
+ dentry->d_name.hash = name->hash;
+ dentry->d_op = NULL;
+ dentry->d_fsdata = NULL;
++ dentry->d_tux_data = NULL;
+ dentry->d_mounted = 0;
+ INIT_LIST_HEAD(&dentry->d_hash);
+ INIT_LIST_HEAD(&dentry->d_lru);
+@@ -1162,6 +1167,26 @@
+ }
+ out:
+ return ino;
++}
++
++void flush_dentry_tuxinfo (void)
++{
++ struct list_head *chain, *tmp;
++ struct dentry *dentry;
++ int i;
++
++ spin_lock(&dcache_lock);
++ for (i = 0; i <= d_hash_mask; i++) {
++ chain = dentry_hashtable + i;
++ tmp = chain->next;
++ while (tmp != chain) {
++ dentry = list_entry(tmp, struct dentry, d_hash);
++ kfree(dentry->d_tux_data);
++ dentry->d_tux_data = NULL;
++ tmp = tmp->next;
++ }
++ }
++ spin_unlock(&dcache_lock);
+ }
+
+ static void __init dcache_init(unsigned long mempages)
+diff -urN ref/include/linux/dcache.h new/include/linux/dcache.h
+--- ref/include/linux/dcache.h Tue Jun 12 05:30:15 2001
++++ new/include/linux/dcache.h Tue Jun 12 15:44:07 2001
+@@ -80,6 +80,7 @@
+ struct super_block * d_sb; /* The root of the dentry tree */
+ unsigned long d_vfs_flags;
+ void * d_fsdata; /* fs-specific data */
++ void *d_tux_data; /* TUX-specific data */
+ unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */
+ };
+
+@@ -167,6 +168,7 @@
+ extern void shrink_dcache_sb(struct super_block *);
+ extern void shrink_dcache_parent(struct dentry *);
+ extern int d_invalidate(struct dentry *);
++extern void flush_dentry_tuxinfo (void);
+
+ #define shrink_dcache() prune_dcache(0)
+ struct zone_struct;
+diff -urN ref/kernel/ksyms.c new/kernel/ksyms.c
+--- ref/kernel/ksyms.c Tue Jun 12 15:43:46 2001
++++ new/kernel/ksyms.c Tue Jun 12 15:44:07 2001
+@@ -229,6 +229,7 @@
+ EXPORT_SYMBOL(prune_dcache);
+ EXPORT_SYMBOL(shrink_dcache_sb);
+ EXPORT_SYMBOL(shrink_dcache_parent);
++EXPORT_SYMBOL(flush_dentry_tuxinfo);
+ EXPORT_SYMBOL(find_inode_number);
+ EXPORT_SYMBOL(is_subdir);
+ EXPORT_SYMBOL(get_unused_fd);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-2
new file mode 100644
index 000000000000..033be85d71d6
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-2
@@ -0,0 +1,21 @@
+diff -urN tux-gen-read-ref/net/tux/output.c tux-gen-read/net/tux/output.c
+--- tux-gen-read-ref/net/tux/output.c Fri Oct 12 08:53:29 2001
++++ tux-gen-read/net/tux/output.c Fri Oct 12 08:54:31 2001
+@@ -191,7 +191,7 @@
+ req->desc.buf = (char *) &sock_desc;
+ req->desc.error = 0;
+ Dprintk("sendfile(), desc.count: %d.\n", req->desc.count);
+- do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, sock_send_actor, nonblock);
++ __do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, sock_send_actor, nonblock);
+ if (req->desc.written > 0) {
+ req->bytes_sent += req->desc.written;
+ req->output_len -= req->desc.written;
+@@ -259,7 +259,7 @@
+ req->desc.buf = NULL;
+ req->desc.error = 0;
+
+- do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc,
++ __do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc,
+ file_fetch_actor, nonblock);
+ if (nonblock && (req->desc.error == -EWOULDBLOCKIO))
+ return 1;
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1
new file mode 100644
index 000000000000..fda52175a473
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1
@@ -0,0 +1,11 @@
+--- 2.4.10pre2aa2/net/tux/logger.c.~1~ Fri Aug 31 17:52:05 2001
++++ 2.4.10pre2aa2/net/tux/logger.c Fri Aug 31 17:53:41 2001
+@@ -613,7 +613,7 @@
+ * Reduce the cache footprint of the logger file - it's
+ * typically write-once.
+ */
+- flush_inode_pages(log_filp->f_dentry->d_inode);
++ invalidate_inode_pages2(log_filp->f_dentry->d_inode->i_mapping);
+
+ out_lock:
+ spin_lock(&log_lock);
diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-1
new file mode 100644
index 000000000000..a7a49c64f04c
--- /dev/null
+++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-1
@@ -0,0 +1,32 @@
+diff -urN uml-ref/arch/um/kernel/sys_call_table.c uml/arch/um/kernel/sys_call_table.c
+--- uml-ref/arch/um/kernel/sys_call_table.c Tue Jul 10 17:58:59 2001
++++ uml/arch/um/kernel/sys_call_table.c Tue Jul 10 17:59:15 2001
+@@ -12,12 +12,9 @@
+ extern syscall_handler_t sys_ni_syscall;
+ extern syscall_handler_t sys_exit;
+ extern syscall_handler_t sys_fork;
+-extern syscall_handler_t sys_read;
+-extern syscall_handler_t sys_write;
+ extern syscall_handler_t sys_creat;
+ extern syscall_handler_t sys_link;
+ extern syscall_handler_t sys_unlink;
+-extern syscall_handler_t sys_chdir;
+ extern syscall_handler_t sys_mknod;
+ extern syscall_handler_t sys_chmod;
+ extern syscall_handler_t sys_lchown16;
+@@ -56,15 +53,12 @@
+ extern syscall_handler_t sys_umount;
+ extern syscall_handler_t sys_ni_syscall;
+ extern syscall_handler_t sys_ioctl;
+-extern syscall_handler_t sys_fcntl;
+ extern syscall_handler_t sys_ni_syscall;
+ extern syscall_handler_t sys_setpgid;
+ extern syscall_handler_t sys_ni_syscall;
+ extern syscall_handler_t sys_olduname;
+ extern syscall_handler_t sys_umask;
+-extern syscall_handler_t sys_chroot;
+ extern syscall_handler_t sys_ustat;
+-extern syscall_handler_t sys_dup2;
+ extern syscall_handler_t sys_getppid;
+ extern syscall_handler_t sys_getpgrp;
+ extern syscall_handler_t sys_sigaction;