From 715b351ca0263d384c9491eab685e69f13a333ad Mon Sep 17 00:00:00 2001 From: Daniel Robbins Date: Fri, 16 Nov 2001 19:05:45 +0000 Subject: andrea patches... --- .../files/2.4.15pre1aa1/00_3.5G-address-space-2 | 160 ++ .../files/2.4.15pre1aa1/00_alpha-fp-disabled-2 | 92 ++ .../files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1 | 12 + .../files/2.4.15pre1aa1/00_binfmt-elf-checks-1 | 125 ++ .../2.4.15pre1aa1/00_flush-inode-reschedule-2 | 24 + .../files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1 | 11 + .../files/2.4.15pre1aa1/00_loop-sem-1 | 28 + .../files/2.4.15pre1aa1/00_lowlatency-fixes-2 | 105 ++ .../files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2 | Bin 0 -> 30928 bytes .../files/2.4.15pre1aa1/00_mmap-enomem-1 | 10 + .../files/2.4.15pre1aa1/00_module-gfp-5 | 119 ++ .../files/2.4.15pre1aa1/00_nanosleep-5 | 92 ++ .../2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2 | Bin 0 -> 5274 bytes .../files/2.4.15pre1aa1/00_o_direct-4 | 304 ++++ .../files/2.4.15pre1aa1/00_ordered-freeing-1 | 45 + .../files/2.4.15pre1aa1/00_poll-nfds-2 | 12 + .../files/2.4.15pre1aa1/00_rb-export-1 | 52 + .../files/2.4.15pre1aa1/00_rcu-poll-2 | 457 ++++++ .../files/2.4.15pre1aa1/00_rwsem-fair-23 | 1400 +++++++++++++++++ .../2.4.15pre1aa1/00_rwsem-fair-23-recursive-4 | 271 ++++ .../2.4.15pre1aa1/00_silent-stack-overflow-10 | 394 +++++ .../files/2.4.15pre1aa1/00_spinlock-cacheline-2 | 136 ++ .../files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1 | 20 + .../files/2.4.15pre1aa1/00_vm_raend-race-1 | 35 + .../files/2.4.15pre1aa1/00_vmalloc-cache-flush-1 | 10 + .../files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1 | 12 + .../files/2.4.15pre1aa1/00_x86-sa_interrupt-1 | 18 + .../files/2.4.15pre1aa1/00_xtime-lock-1 | 22 + .../files/2.4.15pre1aa1/10_compiler.h-2 | 77 + .../files/2.4.15pre1aa1/10_lvm-deadlock-fix-1 | 10 + .../files/2.4.15pre1aa1/10_lvm-incremental-1 | 80 + .../files/2.4.15pre1aa1/10_lvm-snapshot-check-1 | 39 + .../2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2 | 64 + .../files/2.4.15pre1aa1/10_no-virtual-2 | 56 + .../files/2.4.15pre1aa1/10_numa-sched-13 | 800 ++++++++++ .../files/2.4.15pre1aa1/10_parent-timeslice-8 | 61 + .../linux-sources/files/2.4.15pre1aa1/10_vm-13 | 1595 ++++++++++++++++++++ .../files/2.4.15pre1aa1/20_highmem-debug-7 | 37 + .../linux-sources/files/2.4.15pre1aa1/20_numa-mm-1 | 327 ++++ .../files/2.4.15pre1aa1/20_share-timeslice-2 | 41 + .../files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2 | Bin 0 -> 105675 bytes .../files/2.4.15pre1aa1/51_uml-ac-to-aa-5 | 60 + .../files/2.4.15pre1aa1/54_uml-sa_interrupt-1 | 18 + .../files/2.4.15pre1aa1/60_atomic-alloc-6 | 55 + .../files/2.4.15pre1aa1/60_atomic-lookup-5 | 137 ++ .../files/2.4.15pre1aa1/60_net-exports-1 | 183 +++ .../files/2.4.15pre1aa1/60_pagecache-atomic-3 | 69 + .../files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2 | Bin 0 -> 85178 bytes .../files/2.4.15pre1aa1/60_tux-config-stuff-1 | 22 + .../files/2.4.15pre1aa1/60_tux-create_child-1 | 23 + .../files/2.4.15pre1aa1/60_tux-data-1 | 12 + .../files/2.4.15pre1aa1/60_tux-dprintk-2 | 40 + .../files/2.4.15pre1aa1/60_tux-exports-1 | 122 ++ .../files/2.4.15pre1aa1/60_tux-kstat-2 | 136 ++ .../files/2.4.15pre1aa1/60_tux-process-1 | 42 + .../files/2.4.15pre1aa1/60_tux-syscall-2 | 113 ++ .../files/2.4.15pre1aa1/60_tux-sysctl-2 | 69 + .../files/2.4.15pre1aa1/60_tux-timer_t-1 | 11 + .../linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2 | 79 + .../files/2.4.15pre1aa1/62_tux-generic-file-read-2 | 21 + .../2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1 | 11 + .../linux-sources/files/2.4.15pre1aa1/62_tux-uml-1 | 32 + 62 files changed, 8408 insertions(+) create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-5 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-8 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-13 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-7 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-5 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-6 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-3 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-2 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1 create mode 100644 sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-1 (limited to 'sys-kernel/linux-sources') diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2 new file mode 100644 index 000000000000..f27b0347208d --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_3.5G-address-space-2 @@ -0,0 +1,160 @@ +diff -urN 2.4.11pre6/arch/i386/Makefile 3.5G/arch/i386/Makefile +--- 2.4.11pre6/arch/i386/Makefile Tue May 1 19:35:18 2001 ++++ 3.5G/arch/i386/Makefile Tue Oct 9 04:45:22 2001 +@@ -106,6 +106,9 @@ + + MAKEBOOT = $(MAKE) -C arch/$(ARCH)/boot + ++arch/i386/vmlinux.lds: arch/i386/vmlinux.lds.S FORCE ++ $(CPP) -C -P -I$(HPATH) -imacros $(HPATH)/asm-i386/page_offset.h -Ui386 arch/i386/vmlinux.lds.S >arch/i386/vmlinux.lds ++ + vmlinux: arch/i386/vmlinux.lds + + FORCE: ; +diff -urN 2.4.11pre6/arch/i386/config.in 3.5G/arch/i386/config.in +--- 2.4.11pre6/arch/i386/config.in Tue Oct 9 00:10:11 2001 ++++ 3.5G/arch/i386/config.in Tue Oct 9 04:45:36 2001 +@@ -158,12 +158,15 @@ + "off CONFIG_NOHIGHMEM \ + 4GB CONFIG_HIGHMEM4G \ + 64GB CONFIG_HIGHMEM64G" off +-if [ "$CONFIG_HIGHMEM4G" = "y" ]; then ++if [ "$CONFIG_HIGHMEM4G" = "y" -o "$CONFIG_HIGHMEM64G" = "y" ]; then + define_bool CONFIG_HIGHMEM y ++else ++ define_bool CONFIG_HIGHMEM n + fi + if [ "$CONFIG_HIGHMEM64G" = "y" ]; then +- define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y ++else ++ bool '3.5GB user address space' CONFIG_05GB + fi + + bool 'Math emulation' CONFIG_MATH_EMULATION +diff -urN 2.4.11pre6/arch/i386/vmlinux.lds.S 3.5G/arch/i386/vmlinux.lds.S +--- 2.4.11pre6/arch/i386/vmlinux.lds.S Thu Jan 1 01:00:00 1970 ++++ 3.5G/arch/i386/vmlinux.lds.S Tue Oct 9 04:45:22 2001 +@@ -0,0 +1,83 @@ ++/* ld script to make i386 Linux kernel ++ * Written by Martin Mares ; ++ */ ++OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") ++OUTPUT_ARCH(i386) ++ENTRY(_start) ++SECTIONS ++{ ++ . = PAGE_OFFSET_RAW + 0x100000; ++ _text = .; /* Text and read-only data */ ++ .text : { ++ *(.text) ++ *(.fixup) ++ *(.gnu.warning) ++ } = 0x9090 ++ .text.lock : { *(.text.lock) } /* out-of-line lock text */ ++ ++ _etext = .; /* End of text section */ ++ ++ .rodata : { *(.rodata) *(.rodata.*) } ++ .kstrtab : { *(.kstrtab) } ++ ++ . = ALIGN(16); /* Exception table */ ++ __start___ex_table = .; ++ __ex_table : { *(__ex_table) } ++ __stop___ex_table = .; ++ ++ __start___ksymtab = .; /* Kernel symbol table */ ++ __ksymtab : { *(__ksymtab) } ++ __stop___ksymtab = .; ++ ++ .data : { /* Data */ ++ *(.data) ++ CONSTRUCTORS ++ } ++ ++ _edata = .; /* End of data section */ ++ ++ . = ALIGN(8192); /* init_task */ ++ .data.init_task : { *(.data.init_task) } ++ ++ . = ALIGN(4096); /* Init code and data */ ++ __init_begin = .; ++ .text.init : { *(.text.init) } ++ .data.init : { *(.data.init) } ++ . = ALIGN(16); ++ __setup_start = .; ++ .setup.init : { *(.setup.init) } ++ __setup_end = .; ++ __initcall_start = .; ++ .initcall.init : { *(.initcall.init) } ++ __initcall_end = .; ++ . = ALIGN(4096); ++ __init_end = .; ++ ++ . = ALIGN(4096); ++ .data.page_aligned : { *(.data.idt) } ++ ++ . = ALIGN(32); ++ .data.cacheline_aligned : { *(.data.cacheline_aligned) } ++ ++ __bss_start = .; /* BSS */ ++ .bss : { ++ *(.bss) ++ } ++ _end = . ; ++ ++ /* Sections to be discarded */ ++ /DISCARD/ : { ++ *(.text.exit) ++ *(.data.exit) ++ *(.exitcall.exit) ++ } ++ ++ /* Stabs debugging sections. */ ++ .stab 0 : { *(.stab) } ++ .stabstr 0 : { *(.stabstr) } ++ .stab.excl 0 : { *(.stab.excl) } ++ .stab.exclstr 0 : { *(.stab.exclstr) } ++ .stab.index 0 : { *(.stab.index) } ++ .stab.indexstr 0 : { *(.stab.indexstr) } ++ .comment 0 : { *(.comment) } ++} +diff -urN 2.4.11pre6/include/asm-i386/page.h 3.5G/include/asm-i386/page.h +--- 2.4.11pre6/include/asm-i386/page.h Sun Sep 23 21:11:40 2001 ++++ 3.5G/include/asm-i386/page.h Tue Oct 9 04:45:22 2001 +@@ -78,7 +78,9 @@ + * and CONFIG_HIGHMEM64G options in the kernel configuration. + */ + +-#define __PAGE_OFFSET (0xC0000000) ++#include ++ ++#define __PAGE_OFFSET (PAGE_OFFSET_RAW) + + #ifndef __ASSEMBLY__ + +diff -urN 2.4.11pre6/include/asm-i386/page_offset.h 3.5G/include/asm-i386/page_offset.h +--- 2.4.11pre6/include/asm-i386/page_offset.h Thu Jan 1 01:00:00 1970 ++++ 3.5G/include/asm-i386/page_offset.h Tue Oct 9 04:45:22 2001 +@@ -0,0 +1,6 @@ ++#include ++#ifndef CONFIG_05GB ++#define PAGE_OFFSET_RAW 0xC0000000 ++#else ++#define PAGE_OFFSET_RAW 0xE0000000 ++#endif +diff -urN 2.4.11pre6/include/asm-i386/processor.h 3.5G/include/asm-i386/processor.h +--- 2.4.11pre6/include/asm-i386/processor.h Tue Oct 9 00:11:19 2001 ++++ 3.5G/include/asm-i386/processor.h Tue Oct 9 04:45:22 2001 +@@ -270,7 +270,11 @@ + /* This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ ++#ifndef CONFIG_05GB + #define TASK_UNMAPPED_BASE (TASK_SIZE / 3) ++#else ++#define TASK_UNMAPPED_BASE (TASK_SIZE / 16) ++#endif + + /* + * Size of io_bitmap in longwords: 32 is ports 0-0x3ff. diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-2 new file mode 100644 index 000000000000..4925f0442f08 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_alpha-fp-disabled-2 @@ -0,0 +1,92 @@ +diff -urN 2.4.10pre9/arch/alpha/kernel/entry.S alpha-fp/arch/alpha/kernel/entry.S +--- 2.4.10pre9/arch/alpha/kernel/entry.S Sat Aug 11 08:03:53 2001 ++++ alpha-fp/arch/alpha/kernel/entry.S Fri Sep 14 06:30:18 2001 +@@ -290,6 +290,8 @@ + .end __kernel_execve + + .align 3 ++.globl do_switch_fp_start ++.globl do_switch_fp_end + .ent do_switch_stack + do_switch_stack: + lda $30,-SWITCH_STACK_SIZE($30) +@@ -301,6 +303,7 @@ + stq $14,40($30) + stq $15,48($30) + stq $26,56($30) ++do_switch_fp_start: + stt $f0,64($30) + stt $f1,72($30) + stt $f2,80($30) +@@ -335,10 +338,13 @@ + stt $f30,304($30) + stt $f0,312($30) # save fpcr in slot of $f31 + ldt $f0,64($30) # dont let "do_switch_stack" change fp state. ++do_switch_fp_end: + ret $31,($1),1 + .end do_switch_stack + + .align 3 ++.globl undo_switch_fp_start ++.globl undo_switch_fp_end + .ent undo_switch_stack + undo_switch_stack: + ldq $9,0($30) +@@ -349,6 +355,7 @@ + ldq $14,40($30) + ldq $15,48($30) + ldq $26,56($30) ++undo_switch_fp_start: + ldt $f30,312($30) # get saved fpcr + ldt $f0,64($30) + ldt $f1,72($30) +@@ -382,6 +389,7 @@ + ldt $f28,288($30) + ldt $f29,296($30) + ldt $f30,304($30) ++undo_switch_fp_end: + lda $30,SWITCH_STACK_SIZE($30) + ret $31,($1),1 + .end undo_switch_stack +diff -urN 2.4.10pre9/arch/alpha/kernel/proto.h alpha-fp/arch/alpha/kernel/proto.h +--- 2.4.10pre9/arch/alpha/kernel/proto.h Sun Apr 1 20:36:06 2001 ++++ alpha-fp/arch/alpha/kernel/proto.h Fri Sep 14 06:30:18 2001 +@@ -134,6 +134,11 @@ + extern void entUna(void); + extern void entDbg(void); + ++extern void do_switch_fp_start(void); ++extern void do_switch_fp_end(void); ++extern void undo_switch_fp_start(void); ++extern void undo_switch_fp_end(void); ++ + /* process.c */ + extern void cpu_idle(void) __attribute__((noreturn)); + +diff -urN 2.4.10pre9/arch/alpha/kernel/traps.c alpha-fp/arch/alpha/kernel/traps.c +--- 2.4.10pre9/arch/alpha/kernel/traps.c Fri Sep 14 04:05:38 2001 ++++ alpha-fp/arch/alpha/kernel/traps.c Fri Sep 14 06:32:19 2001 +@@ -218,6 +218,23 @@ + unsigned long a2, unsigned long a3, unsigned long a4, + unsigned long a5, struct pt_regs regs) + { ++ if (type == 3 && !(regs.ps & 8) ) { ++ /* ++ * Handle a rare case where the user has disabled floating ++ * point using the clrfen PALcall and the kernel is attempting ++ * to view floating point state. This happens in two asm stubs: ++ * do_switch_stack and undo_switch_stack. ++ * If this is the case, we modify the return value to pass ++ * over this section and resume from there. ++ */ ++ if (regs.pc == (unsigned long) do_switch_fp_start) { ++ regs.pc = (unsigned long) do_switch_fp_end; ++ return; ++ } else if (regs.pc == (unsigned long) undo_switch_fp_start) { ++ regs.pc = (unsigned long) undo_switch_fp_end; ++ return; ++ } ++ } + if (!opDEC_testing || type != 4) { + die_if_kernel((type == 1 ? "Kernel Bug" : "Instruction fault"), + ®s, type, 0); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1 new file mode 100644 index 000000000000..b7c7f999d36a --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_backout-gcc-3_0-patch-1 @@ -0,0 +1,12 @@ +diff -urN 2.4.6pre3/kernel/timer.c backoutgcc/kernel/timer.c +--- 2.4.6pre3/kernel/timer.c Wed Jun 13 04:02:52 2001 ++++ backoutgcc/kernel/timer.c Wed Jun 13 15:49:13 2001 +@@ -32,7 +32,7 @@ + long tick = (1000000 + HZ/2) / HZ; /* timer interrupt period */ + + /* The current time */ +-struct timeval xtime __attribute__ ((aligned (16))); ++volatile struct timeval xtime __attribute__ ((aligned (16))); + + /* Don't completely fail for HZ > 500. */ + int tickadj = 500/HZ ? : 1; /* microsecs */ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1 new file mode 100644 index 000000000000..a37218b4ad2e --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_binfmt-elf-checks-1 @@ -0,0 +1,125 @@ +diff -urN 2.4.11pre3/fs/binfmt_elf.c elf/fs/binfmt_elf.c +--- 2.4.11pre3/fs/binfmt_elf.c Thu Oct 4 10:06:57 2001 ++++ elf/fs/binfmt_elf.c Thu Oct 4 18:23:34 2001 +@@ -78,13 +78,13 @@ + + #define BAD_ADDR(x) ((unsigned long)(x) > TASK_SIZE) + +-static void set_brk(unsigned long start, unsigned long end) ++static unsigned long set_brk(unsigned long start, unsigned long end) + { + start = ELF_PAGEALIGN(start); + end = ELF_PAGEALIGN(end); + if (end <= start) +- return; +- do_brk(start, end - start); ++ return 0; ++ return do_brk(start, end - start); + } + + +@@ -300,6 +300,7 @@ + elf_type |= MAP_FIXED; + + map_addr = elf_map(interpreter, load_addr + vaddr, eppnt, elf_prot, elf_type); ++ error = map_addr; + if (BAD_ADDR(map_addr)) + goto out_close; + +@@ -338,8 +339,11 @@ + elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* What we have mapped so far */ + + /* Map the last of the bss segment */ +- if (last_bss > elf_bss) +- do_brk(elf_bss, last_bss - elf_bss); ++ if (last_bss > elf_bss) { ++ error = do_brk(elf_bss, last_bss - elf_bss); ++ if (BAD_ADDR(error)) ++ goto out_close; ++ } + + *interp_load_addr = load_addr; + error = ((unsigned long) interp_elf_ex->e_entry) + load_addr; +@@ -626,7 +630,11 @@ + /* There was a PT_LOAD segment with p_memsz > p_filesz + before this one. Map anonymous pages, if needed, + and clear the area. */ +- set_brk (elf_bss + load_bias, elf_brk + load_bias); ++ error = set_brk (elf_bss + load_bias, elf_brk + load_bias); ++ /* here retval is zero */ ++ if (BAD_ADDR(error)) ++ goto out_free_dentry; ++ + nbyte = ELF_PAGEOFFSET(elf_bss); + if (nbyte) { + nbyte = ELF_MIN_ALIGN - nbyte; +@@ -653,8 +661,9 @@ + } + + error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt, elf_prot, elf_flags); ++ /* here retval is zero */ + if (BAD_ADDR(error)) +- continue; ++ goto out_free_dentry; + + if (!load_addr_set) { + load_addr_set = 1; +@@ -703,11 +712,10 @@ + fput(interpreter); + kfree(elf_interpreter); + ++ /* here retval is zero */ + if (BAD_ADDR(elf_entry)) { +- printk(KERN_ERR "Unable to load interpreter\n"); +- kfree(elf_phdata); +- send_sig(SIGSEGV, current, 0); +- return 0; ++ printk(KERN_WARNING "Unable to load interpreter\n"); ++ goto out_free_ph; + } + } + +@@ -741,7 +749,10 @@ + /* Calling set_brk effectively mmaps the pages that we need + * for the bss and break sections + */ +- set_brk(elf_bss, elf_brk); ++ error = set_brk(elf_bss, elf_brk); ++ /* here retval is zero */ ++ if (BAD_ADDR(error)) ++ goto out; + + padzero(elf_bss); + +@@ -781,14 +792,15 @@ + start_thread(regs, elf_entry, bprm->p); + if (current->ptrace & PT_PTRACED) + send_sig(SIGTRAP, current, 0); +- retval = 0; ++ /* here retval is zero */ + out: + return retval; + + /* error cleanup */ + out_free_dentry: + allow_write_access(interpreter); +- fput(interpreter); ++ if (interpreter) ++ fput(interpreter); + out_free_interp: + if (elf_interpreter) + kfree(elf_interpreter); +@@ -866,8 +878,11 @@ + + len = ELF_PAGESTART(elf_phdata->p_filesz + elf_phdata->p_vaddr + ELF_MIN_ALIGN - 1); + bss = elf_phdata->p_memsz + elf_phdata->p_vaddr; +- if (bss > len) +- do_brk(len, bss - len); ++ if (bss > len) { ++ error = do_brk(len, bss - len); ++ if (BAD_ADDR(error)) ++ goto out_free_ph; ++ } + error = 0; + + out_free_ph: diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-2 new file mode 100644 index 000000000000..aeaa1e95788c --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_flush-inode-reschedule-2 @@ -0,0 +1,24 @@ +diff -urN 2.4.10pre12/fs/inode.c inode-resched/fs/inode.c +--- 2.4.10pre12/fs/inode.c Thu Sep 20 01:44:07 2001 ++++ inode-resched/fs/inode.c Thu Sep 20 20:02:35 2001 +@@ -17,6 +17,7 @@ + #include + #include + #include ++#include + + /* + * New inode.c implementation. +@@ -295,6 +296,12 @@ + * so we have to start looking from the list head. + */ + tmp = head; ++ ++ if (unlikely(current->need_resched)) { ++ spin_unlock(&inode_lock); ++ schedule(); ++ spin_lock(&inode_lock); ++ } + } + } + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1 new file mode 100644 index 000000000000..474ad52ee803 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_gcc-30-volatile-xtime-1 @@ -0,0 +1,11 @@ +--- 2.4.6pre2aa1/include/linux/sched.h.~1~ Wed Jun 13 00:44:45 2001 ++++ 2.4.6pre2aa1/include/linux/sched.h Wed Jun 13 00:47:23 2001 +@@ -541,7 +541,7 @@ + extern unsigned long volatile jiffies; + extern unsigned long itimer_ticks; + extern unsigned long itimer_next; +-extern struct timeval xtime; ++extern volatile struct timeval xtime; + extern void do_timer(struct pt_regs *); + + extern unsigned int * prof_buffer; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-1 new file mode 100644 index 000000000000..b9da3b6ff726 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_loop-sem-1 @@ -0,0 +1,28 @@ +diff -urN 2.4.9ac13/drivers/block/loop.c 2.4.9ac14/drivers/block/loop.c +--- 2.4.9ac13/drivers/block/loop.c Fri Sep 21 22:09:16 2001 ++++ 2.4.9ac14/drivers/block/loop.c Sat Sep 22 06:09:39 2001 +@@ -177,6 +177,8 @@ + unsigned size, offset; + int len; + ++ down(&(file->f_dentry->d_inode->i_sem)); ++ + index = pos >> PAGE_CACHE_SHIFT; + offset = pos & (PAGE_CACHE_SIZE - 1); + len = bh->b_size; +@@ -210,6 +212,7 @@ + deactivate_page(page); + page_cache_release(page); + } ++ up(&(file->f_dentry->d_inode->i_sem)); + return 0; + + write_fail: +@@ -221,6 +224,7 @@ + deactivate_page(page); + page_cache_release(page); + fail: ++ up(&(file->f_dentry->d_inode->i_sem)); + return -1; + } + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2 new file mode 100644 index 000000000000..6546915bc284 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lowlatency-fixes-2 @@ -0,0 +1,105 @@ +diff -urN 2.4.13pre3/fs/buffer.c sched/fs/buffer.c +--- 2.4.13pre3/fs/buffer.c Tue Oct 16 02:03:44 2001 ++++ sched/fs/buffer.c Wed Oct 17 23:40:56 2001 +@@ -231,6 +231,7 @@ + static void write_unlocked_buffers(kdev_t dev) + { + do { ++ conditional_schedule(); + spin_lock(&lru_list_lock); + } while (write_some_buffers(dev)); + run_task_queue(&tq_disk); +@@ -280,6 +281,7 @@ + static int wait_for_locked_buffers(kdev_t dev, int index, int refile) + { + do { ++ conditional_schedule(); + spin_lock(&lru_list_lock); + } while (wait_for_buffers(dev, index, refile)); + return 0; +diff -urN 2.4.13pre3/fs/proc/array.c sched/fs/proc/array.c +--- 2.4.13pre3/fs/proc/array.c Tue Oct 16 02:03:45 2001 ++++ sched/fs/proc/array.c Wed Oct 17 23:40:56 2001 +@@ -415,6 +415,8 @@ + pte_t page = *pte; + struct page *ptpage; + ++ conditional_schedule(); ++ + address += PAGE_SIZE; + pte++; + if (pte_none(page)) +diff -urN 2.4.13pre3/fs/proc/generic.c sched/fs/proc/generic.c +--- 2.4.13pre3/fs/proc/generic.c Sun Sep 23 21:11:40 2001 ++++ sched/fs/proc/generic.c Wed Oct 17 23:40:56 2001 +@@ -98,7 +98,9 @@ + retval = n; + break; + } +- ++ ++ conditional_schedule(); ++ + /* This is a hack to allow mangling of file pos independent + * of actual bytes read. Simply place the data at page, + * return the bytes, and set `start' to the desired offset +diff -urN 2.4.13pre3/include/linux/condsched.h sched/include/linux/condsched.h +--- 2.4.13pre3/include/linux/condsched.h Thu Jan 1 01:00:00 1970 ++++ sched/include/linux/condsched.h Wed Oct 17 23:40:56 2001 +@@ -0,0 +1,14 @@ ++#ifndef _LINUX_CONDSCHED_H ++#define _LINUX_CONDSCHED_H ++ ++#ifndef __ASSEMBLY__ ++#define conditional_schedule() \ ++do { \ ++ if (unlikely(current->need_resched)) { \ ++ __set_current_state(TASK_RUNNING); \ ++ schedule(); \ ++ } \ ++} while(0) ++#endif ++ ++#endif +diff -urN 2.4.13pre3/include/linux/sched.h sched/include/linux/sched.h +--- 2.4.13pre3/include/linux/sched.h Thu Oct 11 10:41:52 2001 ++++ sched/include/linux/sched.h Wed Oct 17 23:40:56 2001 +@@ -13,6 +13,7 @@ + #include + #include + #include ++#include + + #include + #include +diff -urN 2.4.13pre3/mm/filemap.c sched/mm/filemap.c +--- 2.4.13pre3/mm/filemap.c Tue Oct 16 02:03:47 2001 ++++ sched/mm/filemap.c Wed Oct 17 23:55:51 2001 +@@ -671,6 +671,8 @@ + struct page **hash = page_hash(mapping, offset); + struct page *page; + ++ conditional_schedule(); ++ + spin_lock(&pagecache_lock); + page = __find_page_nolock(mapping, offset, *hash); + spin_unlock(&pagecache_lock); +@@ -1263,6 +1265,9 @@ + offset &= ~PAGE_CACHE_MASK; + + page_cache_release(page); ++ ++ conditional_schedule(); ++ + if (ret == nr && desc->count) + continue; + break; +@@ -2753,6 +2758,8 @@ + SetPageReferenced(page); + UnlockPage(page); + page_cache_release(page); ++ ++ conditional_schedule(); + + if (status < 0) + break; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2 new file mode 100644 index 000000000000..d4c7923c5211 Binary files /dev/null and b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_lvm-1.0.1-rc4-3.bz2 differ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-1 new file mode 100644 index 000000000000..5bc437c15fe1 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_mmap-enomem-1 @@ -0,0 +1,10 @@ +--- 2.4.10pre11aa1/mm/mmap.c.~1~ Tue Sep 18 06:01:02 2001 ++++ 2.4.10pre11aa1/mm/mmap.c Tue Sep 18 06:02:45 2001 +@@ -479,7 +479,6 @@ + } + + /* Clear old maps */ +- error = -ENOMEM; + munmap_back: + vma = find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); + if (vma && vma->vm_start < addr + len) { diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5 new file mode 100644 index 000000000000..391123e25175 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_module-gfp-5 @@ -0,0 +1,119 @@ +diff -urN 2.4.10pre12/include/asm-alpha/module.h module-gfp/include/asm-alpha/module.h +--- 2.4.10pre12/include/asm-alpha/module.h Thu Sep 20 01:44:11 2001 ++++ module-gfp/include/asm-alpha/module.h Thu Sep 20 06:40:25 2001 +@@ -4,8 +4,8 @@ + * This file contains the alpha architecture specific module code. + */ + +-#define module_map(x) vmalloc(x) +-#define module_unmap(x) vfree(x) ++#define module_map(x) alloc_exact(x) ++#define module_unmap(x) free_exact((x), (x)->size) + #define module_arch_init(x) alpha_module_init(x) + #define arch_init_modules(x) alpha_init_modules(x) + +diff -urN 2.4.10pre12/include/asm-i386/module.h module-gfp/include/asm-i386/module.h +--- 2.4.10pre12/include/asm-i386/module.h Thu Sep 20 01:44:11 2001 ++++ module-gfp/include/asm-i386/module.h Thu Sep 20 06:40:25 2001 +@@ -4,8 +4,8 @@ + * This file contains the i386 architecture specific module code. + */ + +-#define module_map(x) vmalloc(x) +-#define module_unmap(x) vfree(x) ++#define module_map(x) alloc_exact(x) ++#define module_unmap(x) free_exact((x), (x)->size) + #define module_arch_init(x) (0) + #define arch_init_modules(x) do { } while (0) + +diff -urN 2.4.10pre12/include/linux/mm.h module-gfp/include/linux/mm.h +--- 2.4.10pre12/include/linux/mm.h Thu Sep 20 05:09:07 2001 ++++ module-gfp/include/linux/mm.h Thu Sep 20 06:40:25 2001 +@@ -411,6 +411,9 @@ + extern void FASTCALL(__free_pages(struct page *page, unsigned int order)); + extern void FASTCALL(free_pages(unsigned long addr, unsigned int order)); + ++extern void * FASTCALL(alloc_exact(unsigned int size)); ++extern void FASTCALL(free_exact(void * addr, unsigned int size)); ++ + #define __free_page(page) __free_pages((page), 0) + #define free_page(addr) free_pages((addr),0) + +diff -urN 2.4.10pre12/kernel/ksyms.c module-gfp/kernel/ksyms.c +--- 2.4.10pre12/kernel/ksyms.c Thu Sep 20 01:44:19 2001 ++++ module-gfp/kernel/ksyms.c Thu Sep 20 06:40:25 2001 +@@ -96,6 +96,8 @@ + EXPORT_SYMBOL(get_zeroed_page); + EXPORT_SYMBOL(__free_pages); + EXPORT_SYMBOL(free_pages); ++EXPORT_SYMBOL(free_exact); ++EXPORT_SYMBOL(alloc_exact); + EXPORT_SYMBOL(num_physpages); + EXPORT_SYMBOL(kmem_find_general_cachep); + EXPORT_SYMBOL(kmem_cache_create); +diff -urN 2.4.10pre12/mm/page_alloc.c module-gfp/mm/page_alloc.c +--- 2.4.10pre12/mm/page_alloc.c Thu Sep 20 01:44:20 2001 ++++ module-gfp/mm/page_alloc.c Thu Sep 20 06:40:42 2001 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + int nr_swap_pages; + int nr_active_pages; +@@ -443,6 +444,54 @@ + if (addr != 0) + __free_pages(virt_to_page(addr), order); + } ++ ++static inline int nextorder(unsigned int x) ++{ ++ int c = -PAGE_SHIFT; ++ while (x) { ++ x >>= 1; ++ c++; ++ } ++ if (c < 0) ++ c = 0; ++ return c; ++} ++ ++void * alloc_exact(unsigned int size) ++{ ++ struct page *p, *w; ++ int order = nextorder(size); ++ ++ p = alloc_pages(GFP_KERNEL, order); ++ if (p) { ++ struct page *end = p + (1UL << order); ++ for (w = p+1; w < end; ++w) ++ set_page_count(w, 1); ++ for (w = p + (size>>PAGE_SHIFT)+1; w < end; ++w) ++ __free_pages(w, 0); ++ return (void *) page_address(p); ++ } ++ ++ return vmalloc(size); ++} ++ ++void free_exact(void * addr, unsigned int size) ++{ ++ struct page * w; ++ unsigned long mptr = (unsigned long) addr; ++ int sz; ++ ++ if (mptr >= VMALLOC_START && mptr + size <= VMALLOC_END) { ++ vfree(addr); ++ return; ++ } ++ w = virt_to_page(addr); ++ for (sz = size; sz > 0; sz -= PAGE_SIZE, ++w) { ++ if (atomic_read(&w->count) != 1) ++ BUG(); ++ __free_pages(w, 0); ++ } ++} + + /* + * Total amount of free (allocatable) RAM: diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-5 new file mode 100644 index 000000000000..6a9f637daaf1 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_nanosleep-5 @@ -0,0 +1,92 @@ +diff -urN 2.4.6pre6/include/linux/time.h nanosleep/include/linux/time.h +--- 2.4.6pre6/include/linux/time.h Thu Jun 14 18:07:48 2001 ++++ nanosleep/include/linux/time.h Thu Jun 28 11:47:14 2001 +@@ -48,6 +48,27 @@ + value->tv_sec = jiffies / HZ; + } + ++static __inline__ int ++timespec_before(struct timespec a, struct timespec b) ++{ ++ if (a.tv_sec == b.tv_sec) ++ return a.tv_nsec < b.tv_nsec; ++ return a.tv_sec < b.tv_sec; ++} ++ ++/* computes `a - b' and write the result in `result', assumes `a >= b' */ ++static inline void ++timespec_less(struct timespec a, struct timespec b, struct timespec * result) ++{ ++ if (a.tv_nsec < b.tv_nsec) ++ { ++ a.tv_sec--; ++ a.tv_nsec += 1000000000; ++ } ++ ++ result->tv_sec = a.tv_sec - b.tv_sec; ++ result->tv_nsec = a.tv_nsec - b.tv_nsec; ++} + + /* Converts Gregorian date to seconds since 1970-01-01 00:00:00. + * Assumes input in normal date format, i.e. 1980-12-31 23:59:59 +@@ -89,6 +110,27 @@ + time_t tv_sec; /* seconds */ + suseconds_t tv_usec; /* microseconds */ + }; ++ ++/* computes `a - b' and write the result in `result', assumes `a >= b' */ ++static inline void ++timeval_less(struct timeval a, struct timeval b, struct timeval * result) ++{ ++ if (a.tv_usec < b.tv_usec) ++ { ++ a.tv_sec--; ++ a.tv_usec += 1000000; ++ } ++ ++ result->tv_sec = a.tv_sec - b.tv_sec; ++ result->tv_usec = a.tv_usec - b.tv_usec; ++} ++ ++static __inline__ void ++timeval_to_timespec(struct timeval tv, struct timespec * ts) ++{ ++ ts->tv_sec = tv.tv_sec; ++ ts->tv_nsec = (long) tv.tv_usec * 1000; ++} + + struct timezone { + int tz_minuteswest; /* minutes west of Greenwich */ +diff -urN 2.4.6pre6/kernel/timer.c nanosleep/kernel/timer.c +--- 2.4.6pre6/kernel/timer.c Thu Jun 28 11:38:09 2001 ++++ nanosleep/kernel/timer.c Thu Jun 28 11:48:47 2001 +@@ -798,6 +798,7 @@ + { + struct timespec t; + unsigned long expire; ++ struct timeval before, after; + + if(copy_from_user(&t, rqtp, sizeof(struct timespec))) + return -EFAULT; +@@ -822,11 +823,20 @@ + expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec); + + current->state = TASK_INTERRUPTIBLE; ++ get_fast_time(&before); + expire = schedule_timeout(expire); ++ get_fast_time(&after); + + if (expire) { + if (rmtp) { +- jiffies_to_timespec(expire, &t); ++ struct timespec elapsed; ++ ++ timeval_less(after, before, &after); ++ timeval_to_timespec(after, &elapsed); ++ if (timespec_before(elapsed, t)) ++ timespec_less(t, elapsed, &t); ++ else ++ t.tv_nsec = t.tv_sec = 0; + if (copy_to_user(rmtp, &t, sizeof(struct timespec))) + return -EFAULT; + } diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2 new file mode 100644 index 000000000000..0ca831610811 Binary files /dev/null and b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_netconsole-2.4.10-C2-2.bz2 differ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4 new file mode 100644 index 000000000000..ad3eecfcf6a9 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_o_direct-4 @@ -0,0 +1,304 @@ +diff -urN 2.4.13pre6/fs/block_dev.c o_direct/fs/block_dev.c +--- 2.4.13pre6/fs/block_dev.c Sun Oct 21 20:03:47 2001 ++++ o_direct/fs/block_dev.c Tue Oct 23 14:18:35 2001 +@@ -113,6 +113,11 @@ + return 0; + } + ++static int blkdev_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) ++{ ++ return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, blkdev_get_block); ++} ++ + static int blkdev_writepage(struct page * page) + { + return block_write_full_page(page, blkdev_get_block); +@@ -632,6 +637,7 @@ + sync_page: block_sync_page, + prepare_write: blkdev_prepare_write, + commit_write: blkdev_commit_write, ++ direct_IO: blkdev_direct_IO, + }; + + struct file_operations def_blk_fops = { +diff -urN 2.4.13pre6/fs/buffer.c o_direct/fs/buffer.c +--- 2.4.13pre6/fs/buffer.c Sun Oct 21 20:03:47 2001 ++++ o_direct/fs/buffer.c Tue Oct 23 14:18:35 2001 +@@ -1942,6 +1942,47 @@ + return tmp.b_blocknr; + } + ++int generic_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize, get_block_t * get_block) ++{ ++ int i, nr_blocks, retval; ++ unsigned long * blocks = iobuf->blocks; ++ ++ nr_blocks = iobuf->length / blocksize; ++ /* build the blocklist */ ++ for (i = 0; i < nr_blocks; i++, blocknr++) { ++ struct buffer_head bh; ++ ++ bh.b_state = 0; ++ bh.b_dev = inode->i_dev; ++ bh.b_size = blocksize; ++ ++ retval = get_block(inode, blocknr, &bh, rw == READ ? 0 : 1); ++ if (retval) ++ goto out; ++ ++ if (rw == READ) { ++ if (buffer_new(&bh)) ++ BUG(); ++ if (!buffer_mapped(&bh)) { ++ /* there was an hole in the filesystem */ ++ blocks[i] = -1UL; ++ continue; ++ } ++ } else { ++ if (buffer_new(&bh)) ++ unmap_underlying_metadata(&bh); ++ if (!buffer_mapped(&bh)) ++ BUG(); ++ } ++ blocks[i] = bh.b_blocknr; ++ } ++ ++ retval = brw_kiovec(rw, 1, &iobuf, inode->i_dev, iobuf->blocks, blocksize); ++ ++ out: ++ return retval; ++} ++ + /* + * IO completion routine for a buffer_head being used for kiobuf IO: we + * can't dispatch the kiobuf callback until io_count reaches 0. +diff -urN 2.4.13pre6/fs/ext2/inode.c o_direct/fs/ext2/inode.c +--- 2.4.13pre6/fs/ext2/inode.c Sun Oct 21 20:03:47 2001 ++++ o_direct/fs/ext2/inode.c Tue Oct 23 14:18:35 2001 +@@ -592,13 +592,18 @@ + { + return generic_block_bmap(mapping,block,ext2_get_block); + } ++static int ext2_direct_IO(int rw, struct inode * inode, struct kiobuf * iobuf, unsigned long blocknr, int blocksize) ++{ ++ return generic_direct_IO(rw, inode, iobuf, blocknr, blocksize, ext2_get_block); ++} + struct address_space_operations ext2_aops = { + readpage: ext2_readpage, + writepage: ext2_writepage, + sync_page: block_sync_page, + prepare_write: ext2_prepare_write, + commit_write: generic_commit_write, +- bmap: ext2_bmap ++ bmap: ext2_bmap, ++ direct_IO: ext2_direct_IO, + }; + + /* +diff -urN 2.4.13pre6/include/linux/fs.h o_direct/include/linux/fs.h +--- 2.4.13pre6/include/linux/fs.h Sun Oct 21 20:03:51 2001 ++++ o_direct/include/linux/fs.h Tue Oct 23 14:18:35 2001 +@@ -1368,6 +1368,7 @@ + int generic_block_bmap(struct address_space *, long, get_block_t *); + int generic_commit_write(struct file *, struct page *, unsigned, unsigned); + int block_truncate_page(struct address_space *, loff_t, get_block_t *); ++extern int generic_direct_IO(int, struct inode *, struct kiobuf *, unsigned long, int, get_block_t *); + extern void create_empty_buffers(struct page *, kdev_t, unsigned long); + + extern int waitfor_one_page(struct page*); +diff -urN 2.4.13pre6/kernel/ksyms.c o_direct/kernel/ksyms.c +--- 2.4.13pre6/kernel/ksyms.c Sun Oct 21 20:03:52 2001 ++++ o_direct/kernel/ksyms.c Tue Oct 23 14:18:47 2001 +@@ -199,6 +199,7 @@ + EXPORT_SYMBOL(unlock_buffer); + EXPORT_SYMBOL(__wait_on_buffer); + EXPORT_SYMBOL(___wait_on_page); ++EXPORT_SYMBOL(generic_direct_IO); + EXPORT_SYMBOL(block_write_full_page); + EXPORT_SYMBOL(block_read_full_page); + EXPORT_SYMBOL(block_prepare_write); +diff -urN 2.4.13pre6/mm/filemap.c o_direct/mm/filemap.c +--- 2.4.13pre6/mm/filemap.c Sun Oct 21 20:03:52 2001 ++++ o_direct/mm/filemap.c Tue Oct 23 14:18:35 2001 +@@ -1356,6 +1356,87 @@ + UPDATE_ATIME(inode); + } + ++static ssize_t generic_file_direct_IO(int rw, struct file * filp, char * buf, size_t count, loff_t offset) ++{ ++ ssize_t retval; ++ int new_iobuf, chunk_size, blocksize_mask, blocksize, blocksize_bits, iosize, progress; ++ struct kiobuf * iobuf; ++ struct inode * inode = filp->f_dentry->d_inode; ++ struct address_space * mapping = inode->i_mapping; ++ ++ new_iobuf = 0; ++ iobuf = filp->f_iobuf; ++ if (test_and_set_bit(0, &filp->f_iobuf_lock)) { ++ /* ++ * A parallel read/write is using the preallocated iobuf ++ * so just run slow and allocate a new one. ++ */ ++ retval = alloc_kiovec(1, &iobuf); ++ if (retval) ++ goto out; ++ new_iobuf = 1; ++ } ++ ++ blocksize = 1 << inode->i_blkbits; ++ blocksize_bits = inode->i_blkbits; ++ blocksize_mask = blocksize - 1; ++ chunk_size = KIO_MAX_ATOMIC_IO << 10; ++ ++ retval = -EINVAL; ++ if ((offset & blocksize_mask) || (count & blocksize_mask)) ++ goto out_free; ++ if (!mapping->a_ops->direct_IO) ++ goto out_free; ++ ++ /* ++ * Flush to disk exlusively the _data_, metadata must remains ++ * completly asynchronous or performance will go to /dev/null. ++ */ ++ filemap_fdatasync(mapping); ++ retval = fsync_inode_data_buffers(inode); ++ filemap_fdatawait(mapping); ++ if (retval < 0) ++ goto out_free; ++ ++ progress = retval = 0; ++ while (count > 0) { ++ iosize = count; ++ if (iosize > chunk_size) ++ iosize = chunk_size; ++ ++ retval = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize); ++ if (retval) ++ break; ++ ++ retval = mapping->a_ops->direct_IO(rw, inode, iobuf, (offset+progress) >> blocksize_bits, blocksize); ++ ++ if (rw == READ && retval > 0) ++ mark_dirty_kiobuf(iobuf, retval); ++ ++ if (retval >= 0) { ++ count -= retval; ++ buf += retval; ++ progress += retval; ++ } ++ ++ unmap_kiobuf(iobuf); ++ ++ if (retval != iosize) ++ break; ++ } ++ ++ if (progress) ++ retval = progress; ++ ++ out_free: ++ if (!new_iobuf) ++ clear_bit(0, &filp->f_iobuf_lock); ++ else ++ free_kiovec(1, &iobuf); ++ out: ++ return retval; ++} ++ + int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) + { + char *kaddr; +@@ -1389,6 +1470,9 @@ + if ((ssize_t) count < 0) + return -EINVAL; + ++ if (filp->f_flags & O_DIRECT) ++ goto o_direct; ++ + retval = -EFAULT; + if (access_ok(VERIFY_WRITE, buf, count)) { + retval = 0; +@@ -1407,7 +1491,29 @@ + retval = desc.error; + } + } ++ out: + return retval; ++ ++ o_direct: ++ { ++ loff_t pos = *ppos, size; ++ struct address_space *mapping = filp->f_dentry->d_inode->i_mapping; ++ struct inode *inode = mapping->host; ++ ++ retval = 0; ++ if (!count) ++ goto out; /* skip atime */ ++ size = inode->i_size; ++ if (pos < size) { ++ if (pos + count > size) ++ count = size - pos; ++ retval = generic_file_direct_IO(READ, filp, buf, count, pos); ++ if (retval > 0) ++ *ppos = pos + retval; ++ } ++ UPDATE_ATIME(filp->f_dentry->d_inode); ++ goto out; ++ } + } + + static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) +@@ -2660,7 +2766,8 @@ + + written = 0; + +- if (file->f_flags & O_APPEND) ++ /* FIXME: this is for backwards compatibility with 2.4 */ ++ if (!S_ISBLK(inode->i_mode) && file->f_flags & O_APPEND) + pos = inode->i_size; + + /* +@@ -2740,6 +2847,9 @@ + inode->i_ctime = inode->i_mtime = CURRENT_TIME; + mark_inode_dirty_sync(inode); + ++ if (file->f_flags & O_DIRECT) ++ goto o_direct; ++ + do { + unsigned long index, offset; + long page_fault; +@@ -2814,6 +2924,7 @@ + if ((status >= 0) && (file->f_flags & O_SYNC)) + status = generic_osync_inode(inode, OSYNC_METADATA|OSYNC_DATA); + ++out_status: + err = written ? written : status; + out: + +@@ -2822,6 +2933,25 @@ + fail_write: + status = -EFAULT; + goto unlock; ++ ++o_direct: ++ written = generic_file_direct_IO(WRITE, file, (char *) buf, count, pos); ++ if (written > 0) { ++ loff_t end = pos + written; ++ if (end > inode->i_size && !S_ISBLK(inode->i_mode)) { ++ inode->i_size = end; ++ mark_inode_dirty(inode); ++ } ++ *ppos = end; ++ invalidate_inode_pages2(mapping); ++ } ++ /* ++ * Sync the fs metadata but not the minor inode changes and ++ * of course not the data as we did direct DMA for the IO. ++ */ ++ if (written >= 0 && file->f_flags & O_SYNC) ++ status = generic_osync_inode(inode, OSYNC_METADATA); ++ goto out_status; + } + + void __init page_cache_init(unsigned long mempages) diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-1 new file mode 100644 index 000000000000..8f67e8b9f8f5 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_ordered-freeing-1 @@ -0,0 +1,45 @@ +diff -urN 2.4.13pre1/arch/i386/mm/init.c ordered/arch/i386/mm/init.c +--- 2.4.13pre1/arch/i386/mm/init.c Sun Sep 23 21:11:28 2001 ++++ ordered/arch/i386/mm/init.c Fri Oct 12 19:04:54 2001 +@@ -469,7 +469,7 @@ + if (page_is_ram(tmp) && PageReserved(mem_map+tmp)) + reservedpages++; + #ifdef CONFIG_HIGHMEM +- for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { ++ for (tmp = highend_pfn-1; tmp >= highstart_pfn; tmp--) { + struct page *page = mem_map + tmp; + + if (!page_is_ram(tmp)) { +diff -urN 2.4.13pre1/mm/bootmem.c ordered/mm/bootmem.c +--- 2.4.13pre1/mm/bootmem.c Sun Sep 23 21:11:43 2001 ++++ ordered/mm/bootmem.c Fri Oct 12 19:04:56 2001 +@@ -237,14 +237,15 @@ + { + struct page *page = pgdat->node_mem_map; + bootmem_data_t *bdata = pgdat->bdata; +- unsigned long i, count, total = 0; +- unsigned long idx; ++ unsigned long count, total = 0; ++ long idx, i; + + if (!bdata->node_bootmem_map) BUG(); + + count = 0; + idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); +- for (i = 0; i < idx; i++, page++) { ++ page += --idx; ++ for (i = idx; i >= 0; i--, page--) { + if (!test_bit(i, bdata->node_bootmem_map)) { + count++; + ClearPageReserved(page); +@@ -260,7 +261,9 @@ + */ + page = virt_to_page(bdata->node_bootmem_map); + count = 0; +- for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { ++ idx = ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; ++ page += --idx; ++ for (i = idx; i >= 0; i--, page--) { + count++; + ClearPageReserved(page); + set_page_count(page, 1); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-2 new file mode 100644 index 000000000000..388d9b600ee9 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_poll-nfds-2 @@ -0,0 +1,12 @@ +diff -urN 2.4.9/fs/select.c poll/fs/select.c +--- 2.4.9/fs/select.c Thu Aug 16 22:03:38 2001 ++++ poll/fs/select.c Sat Aug 18 03:09:11 2001 +@@ -416,7 +416,7 @@ + int nchunks, nleft; + + /* Do a sanity check on nfds ... */ +- if (nfds > NR_OPEN) ++ if (nfds > current->rlim[RLIMIT_NOFILE].rlim_cur) + return -EINVAL; + + if (timeout) { diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-1 new file mode 100644 index 000000000000..64b2914b2816 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rb-export-1 @@ -0,0 +1,52 @@ +Date: Mon, 24 Sep 2001 02:17:58 -0500 +From: Mark J Roberts +To: andrea@suse.de +Subject: [PATCH] Export lib/rbtree.c symbols so modules can use it too. +Message-ID: <20010924021758.A202@znex> +Mime-Version: 1.0 +Content-Type: text/plain; charset=us-ascii +Content-Disposition: inline + +Hi, + +I'd like to use your rbtree implementation in a module, but +rb_insert_color and rb_erase aren't exported. This patch against +2.4.10 exports them. + +diff -uX dontdiff linux-2.4.10/lib/Makefile linux/lib/Makefile +--- linux-2.4.10/lib/Makefile Mon Sep 17 22:31:15 2001 ++++ linux/lib/Makefile Sun Sep 23 23:21:56 2001 +@@ -8,7 +8,7 @@ + + L_TARGET := lib.a + +-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o ++export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o + + obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o + +diff -uX dontdiff linux-2.4.10/lib/rbtree.c linux/lib/rbtree.c +--- linux-2.4.10/lib/rbtree.c Mon Sep 17 22:30:23 2001 ++++ linux/lib/rbtree.c Sun Sep 23 23:23:13 2001 +@@ -20,6 +20,7 @@ + */ + + #include ++#include + + static void __rb_rotate_left(rb_node_t * node, rb_root_t * root) + { +@@ -125,6 +126,7 @@ + + root->rb_node->rb_color = RB_BLACK; + } ++EXPORT_SYMBOL(rb_insert_color); + + static void __rb_erase_color(rb_node_t * node, rb_node_t * parent, + rb_root_t * root) +@@ -291,3 +293,4 @@ + if (color == RB_BLACK) + __rb_erase_color(child, parent, root); + } ++EXPORT_SYMBOL(rb_erase); + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2 new file mode 100644 index 000000000000..030dab952cf4 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rcu-poll-2 @@ -0,0 +1,457 @@ +diff -urN 2.4.14pre3/include/linux/rcupdate.h rcu/include/linux/rcupdate.h +--- 2.4.14pre3/include/linux/rcupdate.h Thu Jan 1 01:00:00 1970 ++++ rcu/include/linux/rcupdate.h Sun Oct 28 15:24:02 2001 +@@ -0,0 +1,59 @@ ++/* ++ * Read-Copy Update mechanism for mutual exclusion ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Copyright (c) International Business Machines Corp., 2001 ++ * ++ * Author: Dipankar Sarma ++ * ++ * Based on the original work by Paul McKenney ++ * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. ++ * Papers: ++ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf ++ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) ++ * ++ * For detailed explanation of Read-Copy Update mechanism see - ++ * http://lse.sourceforge.net/locking/rcupdate.html ++ * ++ */ ++ ++#ifndef __LINUX_RCUPDATE_H ++#define __LINUX_RCUPDATE_H ++ ++#include ++ ++/* ++ * Callback structure for use with call_rcu(). ++ */ ++struct rcu_head { ++ struct list_head list; ++ void (*func)(void *obj); ++ void *arg; ++}; ++ ++#define RCU_HEAD_INIT(head) { LIST_HEAD_INIT(head.list), NULL, NULL } ++#define RCU_HEAD(head) struct rcu_head head = RCU_HEAD_INIT(head) ++#define INIT_RCU_HEAD(ptr) do { \ ++ INIT_LIST_HEAD(&(ptr)->list); (ptr)->func = NULL; (ptr)->arg = NULL; \ ++} while (0) ++ ++ ++extern void FASTCALL(call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg)); ++extern void synchronize_kernel(void); ++ ++extern void rcu_init(void); ++ ++#endif /* __LINUX_RCUPDATE_H */ +diff -urN 2.4.14pre3/include/linux/sched.h rcu/include/linux/sched.h +--- 2.4.14pre3/include/linux/sched.h Thu Oct 11 10:41:52 2001 ++++ rcu/include/linux/sched.h Sun Oct 28 15:24:37 2001 +@@ -159,6 +159,7 @@ + extern void flush_scheduled_tasks(void); + extern int start_context_thread(void); + extern int current_is_keventd(void); ++extern void force_cpu_reschedule(int cpu); + + /* + * The default fd array needs to be at least BITS_PER_LONG, +@@ -547,6 +548,18 @@ + extern unsigned long itimer_next; + extern struct timeval xtime; + extern void do_timer(struct pt_regs *); ++ ++/* per-cpu schedule data */ ++typedef struct schedule_data_s { ++ struct task_struct * curr; ++ cycles_t last_schedule; ++ long quiescent; ++} schedule_data_t ____cacheline_aligned; ++ ++extern schedule_data_t schedule_data[NR_CPUS]; ++#define cpu_curr(cpu) (schedule_data[(cpu)].curr) ++#define last_schedule(cpu) (schedule_data[(cpu)].last_schedule) ++#define RCU_quiescent(cpu) (schedule_data[(cpu)].quiescent) + + extern unsigned int * prof_buffer; + extern unsigned long prof_len; +diff -urN 2.4.14pre3/init/main.c rcu/init/main.c +--- 2.4.14pre3/init/main.c Wed Oct 24 08:04:27 2001 ++++ rcu/init/main.c Sun Oct 28 15:26:58 2001 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -554,6 +555,7 @@ + printk("Kernel command line: %s\n", saved_command_line); + parse_options(command_line); + trap_init(); ++ rcu_init(); + init_IRQ(); + sched_init(); + softirq_init(); +diff -urN 2.4.14pre3/kernel/Makefile rcu/kernel/Makefile +--- 2.4.14pre3/kernel/Makefile Sun Sep 23 21:11:42 2001 ++++ rcu/kernel/Makefile Sun Oct 28 15:23:48 2001 +@@ -9,12 +9,12 @@ + + O_TARGET := kernel.o + +-export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o ++export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o exec_domain.o printk.o rcupdate.o + + obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ + module.o exit.o itimer.o info.o time.o softirq.o resource.o \ + sysctl.o acct.o capability.o ptrace.o timer.o user.o \ +- signal.o sys.o kmod.o context.o ++ signal.o sys.o kmod.o context.o rcupdate.o + + obj-$(CONFIG_UID16) += uid16.o + obj-$(CONFIG_MODULES) += ksyms.o +diff -urN 2.4.14pre3/kernel/rcupdate.c rcu/kernel/rcupdate.c +--- 2.4.14pre3/kernel/rcupdate.c Thu Jan 1 01:00:00 1970 ++++ rcu/kernel/rcupdate.c Sun Oct 28 15:26:37 2001 +@@ -0,0 +1,229 @@ ++/* ++ * Read-Copy Update mechanism for mutual exclusion ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ++ * ++ * Copyright (c) International Business Machines Corp., 2001 ++ * Copyright (C) Andrea Arcangeli SuSE, 2001 ++ * ++ * Author: Dipankar Sarma , ++ * Andrea Arcangeli ++ * ++ * Based on the original work by Paul McKenney ++ * and inputs from Andrea Arcangeli, Rusty Russell, Andi Kleen etc. ++ * Papers: ++ * http://www.rdrop.com/users/paulmck/paper/rclockpdcsproof.pdf ++ * http://lse.sourceforge.net/locking/rclock_OLS.2001.05.01c.sc.pdf (OLS2001) ++ * ++ * For detailed explanation of Read-Copy Update mechanism see - ++ * http://lse.sourceforge.net/locking/rcupdate.html ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define DEBUG ++ ++#ifdef CONFIG_SMP ++/* Definition for rcupdate control block. */ ++static spinlock_t rcu_lock; ++static struct list_head rcu_nxtlist; ++static struct list_head rcu_curlist; ++static struct tasklet_struct rcu_tasklet; ++static unsigned long rcu_qsmask; ++static int rcu_polling_in_progress; ++static long rcu_quiescent_checkpoint[NR_CPUS]; ++#endif ++ ++/* ++ * Register a new rcu callback. This will be invoked as soon ++ * as all CPUs have performed a context switch or been seen in the ++ * idle loop or in a user process. ++ */ ++void call_rcu(struct rcu_head *head, void (*func)(void *arg), void *arg) ++{ ++#ifdef CONFIG_SMP ++ head->func = func; ++ head->arg = arg; ++ ++ spin_lock_bh(&rcu_lock); ++ list_add(&head->list, &rcu_nxtlist); ++ spin_unlock_bh(&rcu_lock); ++ ++ tasklet_hi_schedule(&rcu_tasklet); ++#else ++ local_bh_disable(); ++ func(arg); ++ local_bh_enable(); ++#endif ++} ++ ++#ifdef CONFIG_SMP ++static int rcu_prepare_polling(void) ++{ ++ int stop; ++ int i; ++ ++#ifdef DEBUG ++ if (!list_empty(&rcu_curlist)) ++ BUG(); ++#endif ++ ++ stop = 1; ++ if (!list_empty(&rcu_nxtlist)) { ++ list_splice(&rcu_nxtlist, &rcu_curlist); ++ INIT_LIST_HEAD(&rcu_nxtlist); ++ ++ rcu_polling_in_progress = 1; ++ ++ for (i = 0; i < smp_num_cpus; i++) { ++ int cpu = cpu_logical_map(i); ++ ++ if (cpu != smp_processor_id()) { ++ rcu_qsmask |= 1UL << cpu; ++ rcu_quiescent_checkpoint[cpu] = RCU_quiescent(cpu); ++ force_cpu_reschedule(cpu); ++ } ++ } ++ stop = 0; ++ } ++ ++ return stop; ++} ++ ++/* ++ * Invoke the completed RCU callbacks. ++ */ ++static void rcu_invoke_callbacks(void) ++{ ++ struct list_head *entry; ++ struct rcu_head *head; ++ ++#ifdef DEBUG ++ if (list_empty(&rcu_curlist)) ++ BUG(); ++#endif ++ ++ entry = rcu_curlist.prev; ++ do { ++ head = list_entry(entry, struct rcu_head, list); ++ entry = entry->prev; ++ ++ head->func(head->arg); ++ } while (entry != &rcu_curlist); ++ ++ INIT_LIST_HEAD(&rcu_curlist); ++} ++ ++static int rcu_completion(void) ++{ ++ int stop; ++ ++ rcu_polling_in_progress = 0; ++ rcu_invoke_callbacks(); ++ ++ stop = rcu_prepare_polling(); ++ ++ return stop; ++} ++ ++static int rcu_polling(void) ++{ ++ int i; ++ int stop; ++ ++ for (i = 0; i < smp_num_cpus; i++) { ++ int cpu = cpu_logical_map(i); ++ ++ if (rcu_qsmask & (1UL << cpu)) ++ if (rcu_quiescent_checkpoint[cpu] != RCU_quiescent(cpu)) ++ rcu_qsmask &= ~(1UL << cpu); ++ } ++ ++ stop = 0; ++ if (!rcu_qsmask) ++ stop = rcu_completion(); ++ ++ return stop; ++} ++ ++/* ++ * Look into the per-cpu callback information to see if there is ++ * any processing necessary - if so do it. ++ */ ++static void rcu_process_callbacks(unsigned long data) ++{ ++ int stop; ++ ++ spin_lock(&rcu_lock); ++ if (!rcu_polling_in_progress) ++ stop = rcu_prepare_polling(); ++ else ++ stop = rcu_polling(); ++ spin_unlock(&rcu_lock); ++ ++ if (!stop) ++ tasklet_hi_schedule(&rcu_tasklet); ++} ++ ++/* Because of FASTCALL declaration of complete, we use this wrapper */ ++static void wakeme_after_rcu(void *completion) ++{ ++ complete(completion); ++} ++ ++#endif /* CONFIG_SMP */ ++ ++/* ++ * Initializes rcu mechanism. Assumed to be called early. ++ * That is before local timer(SMP) or jiffie timer (uniproc) is setup. ++ */ ++void __init rcu_init(void) ++{ ++#ifdef CONFIG_SMP ++ tasklet_init(&rcu_tasklet, rcu_process_callbacks, 0UL); ++ INIT_LIST_HEAD(&rcu_nxtlist); ++ INIT_LIST_HEAD(&rcu_curlist); ++ spin_lock_init(&rcu_lock); ++#endif ++} ++ ++/* ++ * Wait until all the CPUs have gone through a "quiescent" state. ++ */ ++void synchronize_kernel(void) ++{ ++#ifdef CONFIG_SMP ++ struct rcu_head rcu; ++ DECLARE_COMPLETION(completion); ++ ++ /* Will wake me after RCU finished */ ++ call_rcu(&rcu, wakeme_after_rcu, &completion); ++ ++ /* Wait for it */ ++ wait_for_completion(&completion); ++#endif ++} ++ ++EXPORT_SYMBOL(call_rcu); ++EXPORT_SYMBOL(synchronize_kernel); +diff -urN 2.4.14pre3/kernel/sched.c rcu/kernel/sched.c +--- 2.4.14pre3/kernel/sched.c Wed Oct 24 08:04:27 2001 ++++ rcu/kernel/sched.c Sun Oct 28 15:27:24 2001 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -97,16 +98,7 @@ + * We align per-CPU scheduling data on cacheline boundaries, + * to prevent cacheline ping-pong. + */ +-static union { +- struct schedule_data { +- struct task_struct * curr; +- cycles_t last_schedule; +- } schedule_data; +- char __pad [SMP_CACHE_BYTES]; +-} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; +- +-#define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr +-#define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule ++schedule_data_t schedule_data[NR_CPUS] __cacheline_aligned = {{&init_task,0}}; + + struct kernel_stat kstat; + extern struct task_struct *child_reaper; +@@ -532,7 +524,7 @@ + */ + asmlinkage void schedule(void) + { +- struct schedule_data * sched_data; ++ schedule_data_t * sched_data; + struct task_struct *prev, *next, *p; + struct list_head *tmp; + int this_cpu, c; +@@ -554,7 +546,7 @@ + * 'sched_data' is protected by the fact that we can run + * only one process per CPU. + */ +- sched_data = & aligned_data[this_cpu].schedule_data; ++ sched_data = &schedule_data[this_cpu]; + + spin_lock_irq(&runqueue_lock); + +@@ -608,6 +600,8 @@ + */ + sched_data->curr = next; + #ifdef CONFIG_SMP ++ RCU_quiescent(this_cpu)++; ++ + next->has_cpu = 1; + next->processor = this_cpu; + #endif +@@ -861,6 +855,17 @@ + + void scheduling_functions_end_here(void) { } + ++void force_cpu_reschedule(int cpu) ++{ ++ spin_lock_irq(&runqueue_lock); ++ cpu_curr(cpu)->need_resched = 1; ++ spin_unlock_irq(&runqueue_lock); ++ ++#ifdef CONFIG_SMP ++ smp_send_reschedule(cpu); ++#endif ++} ++ + #ifndef __alpha__ + + /* +@@ -1057,7 +1062,7 @@ + // Subtract non-idle processes running on other CPUs. + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); +- if (aligned_data[cpu].schedule_data.curr != idle_task(cpu)) ++ if (cpu_curr(cpu) != idle_task(cpu)) + nr_pending--; + } + #else +@@ -1314,8 +1319,8 @@ + + void __init init_idle(void) + { +- struct schedule_data * sched_data; +- sched_data = &aligned_data[smp_processor_id()].schedule_data; ++ schedule_data_t * sched_data; ++ sched_data = &schedule_data[smp_processor_id()]; + + if (current != &init_task && task_on_runqueue(current)) { + printk("UGH! (%d:%d) was on the runqueue, removing.\n", diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23 new file mode 100644 index 000000000000..3762a6cf1244 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23 @@ -0,0 +1,1400 @@ +diff -urN rwsem-ref/arch/alpha/config.in rwsem/arch/alpha/config.in +--- rwsem-ref/arch/alpha/config.in Wed Oct 10 02:14:52 2001 ++++ rwsem/arch/alpha/config.in Fri Oct 12 08:14:19 2001 +@@ -5,8 +5,6 @@ + + define_bool CONFIG_ALPHA y + define_bool CONFIG_UID16 n +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + + mainmenu_name "Kernel configuration of Linux for Alpha machines" + +diff -urN rwsem-ref/arch/arm/config.in rwsem/arch/arm/config.in +--- rwsem-ref/arch/arm/config.in Fri Oct 12 06:14:51 2001 ++++ rwsem/arch/arm/config.in Fri Oct 12 08:14:19 2001 +@@ -9,8 +9,6 @@ + define_bool CONFIG_SBUS n + define_bool CONFIG_MCA n + define_bool CONFIG_UID16 y +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + define_bool CONFIG_GENERIC_BUST_SPINLOCK n + + +diff -urN rwsem-ref/arch/cris/config.in rwsem/arch/cris/config.in +--- rwsem-ref/arch/cris/config.in Wed Oct 10 02:14:53 2001 ++++ rwsem/arch/cris/config.in Fri Oct 12 08:14:19 2001 +@@ -5,8 +5,6 @@ + mainmenu_name "Linux/CRIS Kernel Configuration" + + define_bool CONFIG_UID16 y +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + mainmenu_option next_comment + comment 'Code maturity level options' +diff -urN rwsem-ref/arch/i386/config.in rwsem/arch/i386/config.in +--- rwsem-ref/arch/i386/config.in Wed Oct 10 02:14:55 2001 ++++ rwsem/arch/i386/config.in Fri Oct 12 08:14:19 2001 +@@ -50,8 +50,6 @@ + define_bool CONFIG_X86_CMPXCHG n + define_bool CONFIG_X86_XADD n + define_int CONFIG_X86_L1_CACHE_SHIFT 4 +- define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +- define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + else + define_bool CONFIG_X86_WP_WORKS_OK y + define_bool CONFIG_X86_INVLPG y +@@ -59,8 +57,6 @@ + define_bool CONFIG_X86_XADD y + define_bool CONFIG_X86_BSWAP y + define_bool CONFIG_X86_POPAD_OK y +- define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n +- define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + fi + if [ "$CONFIG_M486" = "y" ]; then + define_int CONFIG_X86_L1_CACHE_SHIFT 4 +diff -urN rwsem-ref/arch/ia64/config.in rwsem/arch/ia64/config.in +--- rwsem-ref/arch/ia64/config.in Sat Aug 11 08:03:54 2001 ++++ rwsem/arch/ia64/config.in Fri Oct 12 08:14:19 2001 +@@ -23,8 +23,6 @@ + define_bool CONFIG_EISA n + define_bool CONFIG_MCA n + define_bool CONFIG_SBUS n +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + if [ "$CONFIG_IA64_HP_SIM" = "n" ]; then + define_bool CONFIG_ACPI y +diff -urN rwsem-ref/arch/m68k/config.in rwsem/arch/m68k/config.in +--- rwsem-ref/arch/m68k/config.in Wed Jul 4 04:03:45 2001 ++++ rwsem/arch/m68k/config.in Fri Oct 12 08:14:19 2001 +@@ -4,8 +4,6 @@ + # + + define_bool CONFIG_UID16 y +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + mainmenu_name "Linux/68k Kernel Configuration" + +diff -urN rwsem-ref/arch/mips/config.in rwsem/arch/mips/config.in +--- rwsem-ref/arch/mips/config.in Sun Sep 23 21:11:28 2001 ++++ rwsem/arch/mips/config.in Fri Oct 12 08:14:19 2001 +@@ -68,8 +68,6 @@ + fi + bool 'Support for Alchemy Semi PB1000 board' CONFIG_MIPS_PB1000 + +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + # + # Select some configuration options automatically for certain systems. +diff -urN rwsem-ref/arch/mips64/config.in rwsem/arch/mips64/config.in +--- rwsem-ref/arch/mips64/config.in Sun Sep 23 21:11:29 2001 ++++ rwsem/arch/mips64/config.in Fri Oct 12 08:14:19 2001 +@@ -27,9 +27,6 @@ + fi + endmenu + +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n +- + # + # Select some configuration options automatically based on user selections + # +diff -urN rwsem-ref/arch/parisc/config.in rwsem/arch/parisc/config.in +--- rwsem-ref/arch/parisc/config.in Tue May 1 19:35:20 2001 ++++ rwsem/arch/parisc/config.in Fri Oct 12 08:14:19 2001 +@@ -7,8 +7,6 @@ + + define_bool CONFIG_PARISC y + define_bool CONFIG_UID16 n +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + mainmenu_option next_comment + comment 'Code maturity level options' +diff -urN rwsem-ref/arch/ppc/config.in rwsem/arch/ppc/config.in +--- rwsem-ref/arch/ppc/config.in Sun Sep 23 21:11:29 2001 ++++ rwsem/arch/ppc/config.in Fri Oct 12 08:14:19 2001 +@@ -4,8 +4,6 @@ + # see Documentation/kbuild/config-language.txt. + # + define_bool CONFIG_UID16 n +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + + mainmenu_name "Linux/PowerPC Kernel Configuration" + +diff -urN rwsem-ref/arch/s390/config.in rwsem/arch/s390/config.in +--- rwsem-ref/arch/s390/config.in Sat Aug 11 08:03:56 2001 ++++ rwsem/arch/s390/config.in Fri Oct 12 08:14:19 2001 +@@ -7,8 +7,6 @@ + define_bool CONFIG_EISA n + define_bool CONFIG_MCA n + define_bool CONFIG_UID16 y +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + mainmenu_name "Linux Kernel Configuration" + define_bool CONFIG_ARCH_S390 y +diff -urN rwsem-ref/arch/s390x/config.in rwsem/arch/s390x/config.in +--- rwsem-ref/arch/s390x/config.in Fri Oct 12 06:14:55 2001 ++++ rwsem/arch/s390x/config.in Fri Oct 12 08:14:19 2001 +@@ -6,8 +6,6 @@ + define_bool CONFIG_ISA n + define_bool CONFIG_EISA n + define_bool CONFIG_MCA n +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + define_bool CONFIG_GENERIC_BUST_SPINLOCK n + + mainmenu_name "Linux Kernel Configuration" +diff -urN rwsem-ref/arch/sh/config.in rwsem/arch/sh/config.in +--- rwsem-ref/arch/sh/config.in Sun Sep 23 21:11:30 2001 ++++ rwsem/arch/sh/config.in Fri Oct 12 08:14:19 2001 +@@ -7,8 +7,6 @@ + define_bool CONFIG_SUPERH y + + define_bool CONFIG_UID16 y +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + mainmenu_option next_comment + comment 'Code maturity level options' +diff -urN rwsem-ref/arch/sparc/config.in rwsem/arch/sparc/config.in +--- rwsem-ref/arch/sparc/config.in Wed Jul 4 04:03:45 2001 ++++ rwsem/arch/sparc/config.in Fri Oct 12 08:14:19 2001 +@@ -48,8 +48,6 @@ + define_bool CONFIG_SUN_CONSOLE y + define_bool CONFIG_SUN_AUXIO y + define_bool CONFIG_SUN_IO y +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK y +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM n + + bool 'Support for SUN4 machines (disables SUN4[CDM] support)' CONFIG_SUN4 + if [ "$CONFIG_SUN4" != "y" ]; then +diff -urN rwsem-ref/arch/sparc64/config.in rwsem/arch/sparc64/config.in +--- rwsem-ref/arch/sparc64/config.in Sun Sep 23 21:11:30 2001 ++++ rwsem/arch/sparc64/config.in Fri Oct 12 08:14:19 2001 +@@ -33,8 +33,6 @@ + + # Global things across all Sun machines. + define_bool CONFIG_HAVE_DEC_LOCK y +-define_bool CONFIG_RWSEM_GENERIC_SPINLOCK n +-define_bool CONFIG_RWSEM_XCHGADD_ALGORITHM y + define_bool CONFIG_ISA n + define_bool CONFIG_ISAPNP n + define_bool CONFIG_EISA n +diff -urN rwsem-ref/include/asm-alpha/rwsem.h rwsem/include/asm-alpha/rwsem.h +--- rwsem-ref/include/asm-alpha/rwsem.h Wed Oct 10 02:16:18 2001 ++++ rwsem/include/asm-alpha/rwsem.h Thu Jan 1 01:00:00 1970 +@@ -1,208 +0,0 @@ +-#ifndef _ALPHA_RWSEM_H +-#define _ALPHA_RWSEM_H +- +-/* +- * Written by Ivan Kokshaysky , 2001. +- * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h +- */ +- +-#ifndef _LINUX_RWSEM_H +-#error please dont include asm/rwsem.h directly, use linux/rwsem.h instead +-#endif +- +-#ifdef __KERNEL__ +- +-#include +-#include +-#include +- +-struct rwsem_waiter; +- +-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); +-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); +- +-/* +- * the semaphore definition +- */ +-struct rw_semaphore { +- long count; +-#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L +-#define RWSEM_ACTIVE_BIAS 0x0000000000000001L +-#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL +-#define RWSEM_WAITING_BIAS (-0x0000000100000000L) +-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) +- spinlock_t wait_lock; +- struct list_head wait_list; +-#if RWSEM_DEBUG +- int debug; +-#endif +-}; +- +-#if RWSEM_DEBUG +-#define __RWSEM_DEBUG_INIT , 0 +-#else +-#define __RWSEM_DEBUG_INIT /* */ +-#endif +- +-#define __RWSEM_INITIALIZER(name) \ +- { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ +- LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } +- +-#define DECLARE_RWSEM(name) \ +- struct rw_semaphore name = __RWSEM_INITIALIZER(name) +- +-static inline void init_rwsem(struct rw_semaphore *sem) +-{ +- sem->count = RWSEM_UNLOCKED_VALUE; +- spin_lock_init(&sem->wait_lock); +- INIT_LIST_HEAD(&sem->wait_list); +-#if RWSEM_DEBUG +- sem->debug = 0; +-#endif +-} +- +-static inline void __down_read(struct rw_semaphore *sem) +-{ +- long oldcount; +-#ifndef CONFIG_SMP +- oldcount = sem->count; +- sem->count += RWSEM_ACTIVE_READ_BIAS; +-#else +- long temp; +- __asm__ __volatile__( +- "1: ldq_l %0,%1\n" +- " addq %0,%3,%2\n" +- " stq_c %2,%1\n" +- " beq %2,2f\n" +- " mb\n" +- ".subsection 2\n" +- "2: br 1b\n" +- ".previous" +- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) +- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory"); +-#endif +- if (__builtin_expect(oldcount < 0, 0)) +- rwsem_down_read_failed(sem); +-} +- +-static inline void __down_write(struct rw_semaphore *sem) +-{ +- long oldcount; +-#ifndef CONFIG_SMP +- oldcount = sem->count; +- sem->count += RWSEM_ACTIVE_WRITE_BIAS; +-#else +- long temp; +- __asm__ __volatile__( +- "1: ldq_l %0,%1\n" +- " addq %0,%3,%2\n" +- " stq_c %2,%1\n" +- " beq %2,2f\n" +- " mb\n" +- ".subsection 2\n" +- "2: br 1b\n" +- ".previous" +- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) +- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory"); +-#endif +- if (__builtin_expect(oldcount, 0)) +- rwsem_down_write_failed(sem); +-} +- +-static inline void __up_read(struct rw_semaphore *sem) +-{ +- long oldcount; +-#ifndef CONFIG_SMP +- oldcount = sem->count; +- sem->count -= RWSEM_ACTIVE_READ_BIAS; +-#else +- long temp; +- __asm__ __volatile__( +- " mb\n" +- "1: ldq_l %0,%1\n" +- " subq %0,%3,%2\n" +- " stq_c %2,%1\n" +- " beq %2,2f\n" +- ".subsection 2\n" +- "2: br 1b\n" +- ".previous" +- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) +- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory"); +-#endif +- if (__builtin_expect(oldcount < 0, 0)) +- if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0) +- rwsem_wake(sem); +-} +- +-static inline void __up_write(struct rw_semaphore *sem) +-{ +- long count; +-#ifndef CONFIG_SMP +- sem->count -= RWSEM_ACTIVE_WRITE_BIAS; +- count = sem->count; +-#else +- long temp; +- __asm__ __volatile__( +- " mb\n" +- "1: ldq_l %0,%1\n" +- " subq %0,%3,%2\n" +- " stq_c %2,%1\n" +- " beq %2,2f\n" +- " subq %0,%3,%0\n" +- ".subsection 2\n" +- "2: br 1b\n" +- ".previous" +- :"=&r" (count), "=m" (sem->count), "=&r" (temp) +- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory"); +-#endif +- if (__builtin_expect(count, 0)) +- if ((int)count == 0) +- rwsem_wake(sem); +-} +- +-static inline void rwsem_atomic_add(long val, struct rw_semaphore *sem) +-{ +-#ifndef CONFIG_SMP +- sem->count += val; +-#else +- long temp; +- __asm__ __volatile__( +- "1: ldq_l %0,%1\n" +- " addq %0,%2,%0\n" +- " stq_c %0,%1\n" +- " beq %0,2f\n" +- ".subsection 2\n" +- "2: br 1b\n" +- ".previous" +- :"=&r" (temp), "=m" (sem->count) +- :"Ir" (val), "m" (sem->count)); +-#endif +-} +- +-static inline long rwsem_atomic_update(long val, struct rw_semaphore *sem) +-{ +-#ifndef CONFIG_SMP +- sem->count += val; +- return sem->count; +-#else +- long ret, temp; +- __asm__ __volatile__( +- "1: ldq_l %0,%1\n" +- " addq %0,%3,%2\n" +- " addq %0,%3,%0\n" +- " stq_c %2,%1\n" +- " beq %2,2f\n" +- ".subsection 2\n" +- "2: br 1b\n" +- ".previous" +- :"=&r" (ret), "=m" (sem->count), "=&r" (temp) +- :"Ir" (val), "m" (sem->count)); +- +- return ret; +-#endif +-} +- +-#endif /* __KERNEL__ */ +-#endif /* _ALPHA_RWSEM_H */ +diff -urN rwsem-ref/include/asm-i386/rwsem.h rwsem/include/asm-i386/rwsem.h +--- rwsem-ref/include/asm-i386/rwsem.h Sat Sep 22 22:07:29 2001 ++++ rwsem/include/asm-i386/rwsem.h Thu Jan 1 01:00:00 1970 +@@ -1,226 +0,0 @@ +-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+ +- * +- * Written by David Howells (dhowells@redhat.com). +- * +- * Derived from asm-i386/semaphore.h +- * +- * +- * The MSW of the count is the negated number of active writers and waiting +- * lockers, and the LSW is the total number of active locks +- * +- * The lock count is initialized to 0 (no active and no waiting lockers). +- * +- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an +- * uncontended lock. This can be determined because XADD returns the old value. +- * Readers increment by 1 and see a positive value when uncontended, negative +- * if there are writers (and maybe) readers waiting (in which case it goes to +- * sleep). +- * +- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can +- * be extended to 65534 by manually checking the whole MSW rather than relying +- * on the S flag. +- * +- * The value of ACTIVE_BIAS supports up to 65535 active processes. +- * +- * This should be totally fair - if anything is waiting, a process that wants a +- * lock will go to the back of the queue. When the currently active lock is +- * released, if there's a writer at the front of the queue, then that and only +- * that will be woken up; if there's a bunch of consequtive readers at the +- * front, then they'll all be woken up, but no other readers will be. +- */ +- +-#ifndef _I386_RWSEM_H +-#define _I386_RWSEM_H +- +-#ifndef _LINUX_RWSEM_H +-#error please dont include asm/rwsem.h directly, use linux/rwsem.h instead +-#endif +- +-#ifdef __KERNEL__ +- +-#include +-#include +- +-struct rwsem_waiter; +- +-extern struct rw_semaphore *FASTCALL(rwsem_down_read_failed(struct rw_semaphore *sem)); +-extern struct rw_semaphore *FASTCALL(rwsem_down_write_failed(struct rw_semaphore *sem)); +-extern struct rw_semaphore *FASTCALL(rwsem_wake(struct rw_semaphore *)); +- +-/* +- * the semaphore definition +- */ +-struct rw_semaphore { +- signed long count; +-#define RWSEM_UNLOCKED_VALUE 0x00000000 +-#define RWSEM_ACTIVE_BIAS 0x00000001 +-#define RWSEM_ACTIVE_MASK 0x0000ffff +-#define RWSEM_WAITING_BIAS (-0x00010000) +-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS +-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) +- spinlock_t wait_lock; +- struct list_head wait_list; +-#if RWSEM_DEBUG +- int debug; +-#endif +-}; +- +-/* +- * initialisation +- */ +-#if RWSEM_DEBUG +-#define __RWSEM_DEBUG_INIT , 0 +-#else +-#define __RWSEM_DEBUG_INIT /* */ +-#endif +- +-#define __RWSEM_INITIALIZER(name) \ +-{ RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) \ +- __RWSEM_DEBUG_INIT } +- +-#define DECLARE_RWSEM(name) \ +- struct rw_semaphore name = __RWSEM_INITIALIZER(name) +- +-static inline void init_rwsem(struct rw_semaphore *sem) +-{ +- sem->count = RWSEM_UNLOCKED_VALUE; +- spin_lock_init(&sem->wait_lock); +- INIT_LIST_HEAD(&sem->wait_list); +-#if RWSEM_DEBUG +- sem->debug = 0; +-#endif +-} +- +-/* +- * lock for reading +- */ +-static inline void __down_read(struct rw_semaphore *sem) +-{ +- __asm__ __volatile__( +- "# beginning down_read\n\t" +-LOCK_PREFIX " incl (%%eax)\n\t" /* adds 0x00000001, returns the old value */ +- " js 2f\n\t" /* jump if we weren't granted the lock */ +- "1:\n\t" +- ".section .text.lock,\"ax\"\n" +- "2:\n\t" +- " pushl %%ecx\n\t" +- " pushl %%edx\n\t" +- " call rwsem_down_read_failed\n\t" +- " popl %%edx\n\t" +- " popl %%ecx\n\t" +- " jmp 1b\n" +- ".previous" +- "# ending down_read\n\t" +- : "+m"(sem->count) +- : "a"(sem) +- : "memory", "cc"); +-} +- +-/* +- * lock for writing +- */ +-static inline void __down_write(struct rw_semaphore *sem) +-{ +- int tmp; +- +- tmp = RWSEM_ACTIVE_WRITE_BIAS; +- __asm__ __volatile__( +- "# beginning down_write\n\t" +-LOCK_PREFIX " xadd %0,(%%eax)\n\t" /* subtract 0x0000ffff, returns the old value */ +- " testl %0,%0\n\t" /* was the count 0 before? */ +- " jnz 2f\n\t" /* jump if we weren't granted the lock */ +- "1:\n\t" +- ".section .text.lock,\"ax\"\n" +- "2:\n\t" +- " pushl %%ecx\n\t" +- " call rwsem_down_write_failed\n\t" +- " popl %%ecx\n\t" +- " jmp 1b\n" +- ".previous\n" +- "# ending down_write" +- : "+d"(tmp), "+m"(sem->count) +- : "a"(sem) +- : "memory", "cc"); +-} +- +-/* +- * unlock after reading +- */ +-static inline void __up_read(struct rw_semaphore *sem) +-{ +- __s32 tmp = -RWSEM_ACTIVE_READ_BIAS; +- __asm__ __volatile__( +- "# beginning __up_read\n\t" +-LOCK_PREFIX " xadd %%edx,(%%eax)\n\t" /* subtracts 1, returns the old value */ +- " js 2f\n\t" /* jump if the lock is being waited upon */ +- "1:\n\t" +- ".section .text.lock,\"ax\"\n" +- "2:\n\t" +- " decw %%dx\n\t" /* do nothing if still outstanding active readers */ +- " jnz 1b\n\t" +- " pushl %%ecx\n\t" +- " call rwsem_wake\n\t" +- " popl %%ecx\n\t" +- " jmp 1b\n" +- ".previous\n" +- "# ending __up_read\n" +- : "+m"(sem->count), "+d"(tmp) +- : "a"(sem) +- : "memory", "cc"); +-} +- +-/* +- * unlock after writing +- */ +-static inline void __up_write(struct rw_semaphore *sem) +-{ +- __asm__ __volatile__( +- "# beginning __up_write\n\t" +- " movl %2,%%edx\n\t" +-LOCK_PREFIX " xaddl %%edx,(%%eax)\n\t" /* tries to transition 0xffff0001 -> 0x00000000 */ +- " jnz 2f\n\t" /* jump if the lock is being waited upon */ +- "1:\n\t" +- ".section .text.lock,\"ax\"\n" +- "2:\n\t" +- " decw %%dx\n\t" /* did the active count reduce to 0? */ +- " jnz 1b\n\t" /* jump back if not */ +- " pushl %%ecx\n\t" +- " call rwsem_wake\n\t" +- " popl %%ecx\n\t" +- " jmp 1b\n" +- ".previous\n" +- "# ending __up_write\n" +- : "+m"(sem->count) +- : "a"(sem), "i"(-RWSEM_ACTIVE_WRITE_BIAS) +- : "memory", "cc", "edx"); +-} +- +-/* +- * implement atomic add functionality +- */ +-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) +-{ +- __asm__ __volatile__( +-LOCK_PREFIX "addl %1,%0" +- :"=m"(sem->count) +- :"ir"(delta), "m"(sem->count)); +-} +- +-/* +- * implement exchange and add functionality +- */ +-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) +-{ +- int tmp = delta; +- +- __asm__ __volatile__( +-LOCK_PREFIX "xadd %0,(%2)" +- : "+r"(tmp), "=m"(sem->count) +- : "r"(sem), "m"(sem->count) +- : "memory"); +- +- return tmp+delta; +-} +- +-#endif /* __KERNEL__ */ +-#endif /* _I386_RWSEM_H */ +diff -urN rwsem-ref/include/linux/rwsem-spinlock.h rwsem/include/linux/rwsem-spinlock.h +--- rwsem-ref/include/linux/rwsem-spinlock.h Sat Sep 22 22:07:29 2001 ++++ rwsem/include/linux/rwsem-spinlock.h Thu Jan 1 01:00:00 1970 +@@ -1,62 +0,0 @@ +-/* rwsem-spinlock.h: fallback C implementation +- * +- * Copyright (c) 2001 David Howells (dhowells@redhat.com). +- * - Derived partially from ideas by Andrea Arcangeli +- * - Derived also from comments by Linus +- */ +- +-#ifndef _LINUX_RWSEM_SPINLOCK_H +-#define _LINUX_RWSEM_SPINLOCK_H +- +-#ifndef _LINUX_RWSEM_H +-#error please dont include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead +-#endif +- +-#include +-#include +- +-#ifdef __KERNEL__ +- +-#include +- +-struct rwsem_waiter; +- +-/* +- * the rw-semaphore definition +- * - if activity is 0 then there are no active readers or writers +- * - if activity is +ve then that is the number of active readers +- * - if activity is -1 then there is one active writer +- * - if wait_list is not empty, then there are processes waiting for the semaphore +- */ +-struct rw_semaphore { +- __s32 activity; +- spinlock_t wait_lock; +- struct list_head wait_list; +-#if RWSEM_DEBUG +- int debug; +-#endif +-}; +- +-/* +- * initialisation +- */ +-#if RWSEM_DEBUG +-#define __RWSEM_DEBUG_INIT , 0 +-#else +-#define __RWSEM_DEBUG_INIT /* */ +-#endif +- +-#define __RWSEM_INITIALIZER(name) \ +-{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEBUG_INIT } +- +-#define DECLARE_RWSEM(name) \ +- struct rw_semaphore name = __RWSEM_INITIALIZER(name) +- +-extern void FASTCALL(init_rwsem(struct rw_semaphore *sem)); +-extern void FASTCALL(__down_read(struct rw_semaphore *sem)); +-extern void FASTCALL(__down_write(struct rw_semaphore *sem)); +-extern void FASTCALL(__up_read(struct rw_semaphore *sem)); +-extern void FASTCALL(__up_write(struct rw_semaphore *sem)); +- +-#endif /* __KERNEL__ */ +-#endif /* _LINUX_RWSEM_SPINLOCK_H */ +diff -urN rwsem-ref/include/linux/rwsem.h rwsem/include/linux/rwsem.h +--- rwsem-ref/include/linux/rwsem.h Sat Sep 22 22:07:29 2001 ++++ rwsem/include/linux/rwsem.h Fri Oct 12 08:14:19 2001 +@@ -1,80 +1,120 @@ +-/* rwsem.h: R/W semaphores, public interface +- * +- * Written by David Howells (dhowells@redhat.com). +- * Derived from asm-i386/semaphore.h +- */ +- + #ifndef _LINUX_RWSEM_H + #define _LINUX_RWSEM_H + +-#include +- +-#define RWSEM_DEBUG 0 +- + #ifdef __KERNEL__ + +-#include +-#include ++#include + #include +-#include +-#include + +-struct rw_semaphore; +- +-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK +-#include /* use a generic implementation */ +-#else +-#include /* use an arch-specific implementation */ ++struct rw_semaphore ++{ ++ spinlock_t lock; ++ long count; ++#define RWSEM_READ_BIAS 1 ++#define RWSEM_WRITE_BIAS (~(~0UL >> (BITS_PER_LONG>>1))) ++ struct list_head wait; ++#if RWSEM_DEBUG ++ long __magic; + #endif ++}; + +-#ifndef rwsemtrace + #if RWSEM_DEBUG +-extern void FASTCALL(rwsemtrace(struct rw_semaphore *sem, const char *str)); ++#define __SEM_DEBUG_INIT(name) \ ++ , (long)&(name).__magic ++#define RWSEM_MAGIC(x) \ ++ do { \ ++ if ((x) != (long)&(x)) { \ ++ printk("rwsem bad magic %lx (should be %lx), ", \ ++ (long)x, (long)&(x)); \ ++ BUG(); \ ++ } \ ++ } while (0) + #else +-#define rwsemtrace(SEM,FMT) ++#define __SEM_DEBUG_INIT(name) ++#define CHECK_MAGIC(x) + #endif ++ ++#define __RWSEM_INITIALIZER(name, count) \ ++{ \ ++ SPIN_LOCK_UNLOCKED, \ ++ (count), \ ++ LIST_HEAD_INIT((name).wait) \ ++ __SEM_DEBUG_INIT(name) \ ++} ++#define RWSEM_INITIALIZER(name) __RWSEM_INITIALIZER(name, 0) ++ ++#define __DECLARE_RWSEM(name, count) \ ++ struct rw_semaphore name = __RWSEM_INITIALIZER(name, count) ++#define DECLARE_RWSEM(name) __DECLARE_RWSEM(name, 0) ++#define DECLARE_RWSEM_READ_LOCKED(name) __DECLARE_RWSEM(name, RWSEM_READ_BIAS) ++#define DECLARE_RWSEM_WRITE_LOCKED(name) __DECLARE_RWSEM(name, RWSEM_WRITE_BIAS) ++ ++#define RWSEM_READ_BLOCKING_BIAS (RWSEM_WRITE_BIAS-RWSEM_READ_BIAS) ++#define RWSEM_WRITE_BLOCKING_BIAS (0) ++ ++#define RWSEM_READ_MASK (~RWSEM_WRITE_BIAS) ++#define RWSEM_WRITE_MASK (RWSEM_WRITE_BIAS) ++ ++extern void FASTCALL(rwsem_down_failed(struct rw_semaphore *, long)); ++extern void FASTCALL(rwsem_wake(struct rw_semaphore *)); ++ ++static inline void init_rwsem(struct rw_semaphore *sem) ++{ ++ spin_lock_init(&sem->lock); ++ sem->count = 0; ++ INIT_LIST_HEAD(&sem->wait); ++#if RWSEM_DEBUG ++ sem->__magic = (long)&sem->__magic; + #endif ++} + +-/* +- * lock for reading +- */ + static inline void down_read(struct rw_semaphore *sem) + { +- rwsemtrace(sem,"Entering down_read"); +- __down_read(sem); +- rwsemtrace(sem,"Leaving down_read"); ++ int count; ++ CHECK_MAGIC(sem->__magic); ++ ++ spin_lock(&sem->lock); ++ count = sem->count; ++ sem->count += RWSEM_READ_BIAS; ++ if (unlikely(count < 0)) ++ rwsem_down_failed(sem, RWSEM_READ_BLOCKING_BIAS); ++ spin_unlock(&sem->lock); + } + +-/* +- * lock for writing +- */ + static inline void down_write(struct rw_semaphore *sem) + { +- rwsemtrace(sem,"Entering down_write"); +- __down_write(sem); +- rwsemtrace(sem,"Leaving down_write"); ++ long count; ++ CHECK_MAGIC(sem->__magic); ++ ++ spin_lock(&sem->lock); ++ count = sem->count; ++ sem->count += RWSEM_WRITE_BIAS; ++ if (unlikely(count)) ++ rwsem_down_failed(sem, RWSEM_WRITE_BLOCKING_BIAS); ++ spin_unlock(&sem->lock); + } + +-/* +- * release a read lock +- */ + static inline void up_read(struct rw_semaphore *sem) + { +- rwsemtrace(sem,"Entering up_read"); +- __up_read(sem); +- rwsemtrace(sem,"Leaving up_read"); ++ CHECK_MAGIC(sem->__magic); ++ ++ spin_lock(&sem->lock); ++ sem->count -= RWSEM_READ_BIAS; ++ if (unlikely(sem->count < 0 && !(sem->count & RWSEM_READ_MASK))) ++ rwsem_wake(sem); ++ spin_unlock(&sem->lock); + } + +-/* +- * release a write lock +- */ + static inline void up_write(struct rw_semaphore *sem) + { +- rwsemtrace(sem,"Entering up_write"); +- __up_write(sem); +- rwsemtrace(sem,"Leaving up_write"); +-} ++ CHECK_MAGIC(sem->__magic); + ++ spin_lock(&sem->lock); ++ sem->count -= RWSEM_WRITE_BIAS; ++ if (unlikely(sem->count)) ++ rwsem_wake(sem); ++ spin_unlock(&sem->lock); ++} + + #endif /* __KERNEL__ */ + #endif /* _LINUX_RWSEM_H */ +diff -urN rwsem-ref/include/linux/sched.h rwsem/include/linux/sched.h +--- rwsem-ref/include/linux/sched.h Thu Oct 11 10:41:52 2001 ++++ rwsem/include/linux/sched.h Fri Oct 12 08:14:19 2001 +@@ -239,7 +239,7 @@ + pgd: swapper_pg_dir, \ + mm_users: ATOMIC_INIT(2), \ + mm_count: ATOMIC_INIT(1), \ +- mmap_sem: __RWSEM_INITIALIZER(name.mmap_sem), \ ++ mmap_sem: RWSEM_INITIALIZER(name.mmap_sem), \ + page_table_lock: SPIN_LOCK_UNLOCKED, \ + mmlist: LIST_HEAD_INIT(name.mmlist), \ + } +diff -urN rwsem-ref/lib/Makefile rwsem/lib/Makefile +--- rwsem-ref/lib/Makefile Fri Oct 12 08:14:03 2001 ++++ rwsem/lib/Makefile Fri Oct 12 08:14:39 2001 +@@ -8,12 +8,9 @@ + + L_TARGET := lib.a + +-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o ++export-objs := cmdline.o dec_and_lock.o rwsem.o rbtree.o + +-obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o +- +-obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o +-obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o ++obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o rwsem.o + + ifneq ($(CONFIG_HAVE_DEC_LOCK),y) + obj-y += dec_and_lock.o +diff -urN rwsem-ref/lib/rwsem-spinlock.c rwsem/lib/rwsem-spinlock.c +--- rwsem-ref/lib/rwsem-spinlock.c Tue May 1 19:35:33 2001 ++++ rwsem/lib/rwsem-spinlock.c Thu Jan 1 01:00:00 1970 +@@ -1,239 +0,0 @@ +-/* rwsem-spinlock.c: R/W semaphores: contention handling functions for generic spinlock +- * implementation +- * +- * Copyright (c) 2001 David Howells (dhowells@redhat.com). +- * - Derived partially from idea by Andrea Arcangeli +- * - Derived also from comments by Linus +- */ +-#include +-#include +-#include +- +-struct rwsem_waiter { +- struct list_head list; +- struct task_struct *task; +- unsigned int flags; +-#define RWSEM_WAITING_FOR_READ 0x00000001 +-#define RWSEM_WAITING_FOR_WRITE 0x00000002 +-}; +- +-#if RWSEM_DEBUG +-void rwsemtrace(struct rw_semaphore *sem, const char *str) +-{ +- if (sem->debug) +- printk("[%d] %s({%d,%d})\n", +- current->pid,str,sem->activity,list_empty(&sem->wait_list)?0:1); +-} +-#endif +- +-/* +- * initialise the semaphore +- */ +-void init_rwsem(struct rw_semaphore *sem) +-{ +- sem->activity = 0; +- spin_lock_init(&sem->wait_lock); +- INIT_LIST_HEAD(&sem->wait_list); +-#if RWSEM_DEBUG +- sem->debug = 0; +-#endif +-} +- +-/* +- * handle the lock being released whilst there are processes blocked on it that can now run +- * - if we come here, then: +- * - the 'active count' _reached_ zero +- * - the 'waiting count' is non-zero +- * - the spinlock must be held by the caller +- * - woken process blocks are discarded from the list after having flags zeroised +- */ +-static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem) +-{ +- struct rwsem_waiter *waiter; +- int woken; +- +- rwsemtrace(sem,"Entering __rwsem_do_wake"); +- +- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); +- +- /* try to grant a single write lock if there's a writer at the front of the queue +- * - we leave the 'waiting count' incremented to signify potential contention +- */ +- if (waiter->flags & RWSEM_WAITING_FOR_WRITE) { +- sem->activity = -1; +- list_del(&waiter->list); +- waiter->flags = 0; +- wake_up_process(waiter->task); +- goto out; +- } +- +- /* grant an infinite number of read locks to the readers at the front of the queue */ +- woken = 0; +- do { +- list_del(&waiter->list); +- waiter->flags = 0; +- wake_up_process(waiter->task); +- woken++; +- if (list_empty(&sem->wait_list)) +- break; +- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); +- } while (waiter->flags&RWSEM_WAITING_FOR_READ); +- +- sem->activity += woken; +- +- out: +- rwsemtrace(sem,"Leaving __rwsem_do_wake"); +- return sem; +-} +- +-/* +- * wake a single writer +- */ +-static inline struct rw_semaphore *__rwsem_wake_one_writer(struct rw_semaphore *sem) +-{ +- struct rwsem_waiter *waiter; +- +- sem->activity = -1; +- +- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); +- list_del(&waiter->list); +- +- waiter->flags = 0; +- wake_up_process(waiter->task); +- return sem; +-} +- +-/* +- * get a read lock on the semaphore +- */ +-void __down_read(struct rw_semaphore *sem) +-{ +- struct rwsem_waiter waiter; +- struct task_struct *tsk; +- +- rwsemtrace(sem,"Entering __down_read"); +- +- spin_lock(&sem->wait_lock); +- +- if (sem->activity>=0 && list_empty(&sem->wait_list)) { +- /* granted */ +- sem->activity++; +- spin_unlock(&sem->wait_lock); +- goto out; +- } +- +- tsk = current; +- set_task_state(tsk,TASK_UNINTERRUPTIBLE); +- +- /* set up my own style of waitqueue */ +- waiter.task = tsk; +- waiter.flags = RWSEM_WAITING_FOR_READ; +- +- list_add_tail(&waiter.list,&sem->wait_list); +- +- /* we don't need to touch the semaphore struct anymore */ +- spin_unlock(&sem->wait_lock); +- +- /* wait to be given the lock */ +- for (;;) { +- if (!waiter.flags) +- break; +- schedule(); +- set_task_state(tsk, TASK_UNINTERRUPTIBLE); +- } +- +- tsk->state = TASK_RUNNING; +- +- out: +- rwsemtrace(sem,"Leaving __down_read"); +-} +- +-/* +- * get a write lock on the semaphore +- * - note that we increment the waiting count anyway to indicate an exclusive lock +- */ +-void __down_write(struct rw_semaphore *sem) +-{ +- struct rwsem_waiter waiter; +- struct task_struct *tsk; +- +- rwsemtrace(sem,"Entering __down_write"); +- +- spin_lock(&sem->wait_lock); +- +- if (sem->activity==0 && list_empty(&sem->wait_list)) { +- /* granted */ +- sem->activity = -1; +- spin_unlock(&sem->wait_lock); +- goto out; +- } +- +- tsk = current; +- set_task_state(tsk,TASK_UNINTERRUPTIBLE); +- +- /* set up my own style of waitqueue */ +- waiter.task = tsk; +- waiter.flags = RWSEM_WAITING_FOR_WRITE; +- +- list_add_tail(&waiter.list,&sem->wait_list); +- +- /* we don't need to touch the semaphore struct anymore */ +- spin_unlock(&sem->wait_lock); +- +- /* wait to be given the lock */ +- for (;;) { +- if (!waiter.flags) +- break; +- schedule(); +- set_task_state(tsk, TASK_UNINTERRUPTIBLE); +- } +- +- tsk->state = TASK_RUNNING; +- +- out: +- rwsemtrace(sem,"Leaving __down_write"); +-} +- +-/* +- * release a read lock on the semaphore +- */ +-void __up_read(struct rw_semaphore *sem) +-{ +- rwsemtrace(sem,"Entering __up_read"); +- +- spin_lock(&sem->wait_lock); +- +- if (--sem->activity==0 && !list_empty(&sem->wait_list)) +- sem = __rwsem_wake_one_writer(sem); +- +- spin_unlock(&sem->wait_lock); +- +- rwsemtrace(sem,"Leaving __up_read"); +-} +- +-/* +- * release a write lock on the semaphore +- */ +-void __up_write(struct rw_semaphore *sem) +-{ +- rwsemtrace(sem,"Entering __up_write"); +- +- spin_lock(&sem->wait_lock); +- +- sem->activity = 0; +- if (!list_empty(&sem->wait_list)) +- sem = __rwsem_do_wake(sem); +- +- spin_unlock(&sem->wait_lock); +- +- rwsemtrace(sem,"Leaving __up_write"); +-} +- +-EXPORT_SYMBOL(init_rwsem); +-EXPORT_SYMBOL(__down_read); +-EXPORT_SYMBOL(__down_write); +-EXPORT_SYMBOL(__up_read); +-EXPORT_SYMBOL(__up_write); +-#if RWSEM_DEBUG +-EXPORT_SYMBOL(rwsemtrace); +-#endif +diff -urN rwsem-ref/lib/rwsem.c rwsem/lib/rwsem.c +--- rwsem-ref/lib/rwsem.c Sat Jul 21 00:04:34 2001 ++++ rwsem/lib/rwsem.c Fri Oct 12 08:14:19 2001 +@@ -1,210 +1,63 @@ +-/* rwsem.c: R/W semaphores: contention handling functions +- * +- * Written by David Howells (dhowells@redhat.com). +- * Derived from arch/i386/kernel/semaphore.c ++/* ++ * rw_semaphores generic spinlock version ++ * Copyright (C) 2001 Andrea Arcangeli SuSE + */ +-#include ++ + #include + #include ++#include + +-struct rwsem_waiter { +- struct list_head list; +- struct task_struct *task; +- unsigned int flags; +-#define RWSEM_WAITING_FOR_READ 0x00000001 +-#define RWSEM_WAITING_FOR_WRITE 0x00000002 ++struct rwsem_wait_queue { ++ unsigned long retire; ++ struct task_struct * task; ++ struct list_head task_list; + }; + +-#if RWSEM_DEBUG +-#undef rwsemtrace +-void rwsemtrace(struct rw_semaphore *sem, const char *str) +-{ +- printk("sem=%p\n",sem); +- printk("(sem)=%08lx\n",sem->count); +- if (sem->debug) +- printk("[%d] %s({%08lx})\n",current->pid,str,sem->count); +-} +-#endif +- +-/* +- * handle the lock being released whilst there are processes blocked on it that can now run +- * - if we come here, then: +- * - the 'active part' of the count (&0x0000ffff) reached zero but has been re-incremented +- * - the 'waiting part' of the count (&0xffff0000) is negative (and will still be so) +- * - there must be someone on the queue +- * - the spinlock must be held by the caller +- * - woken process blocks are discarded from the list after having flags zeroised +- */ +-static inline struct rw_semaphore *__rwsem_do_wake(struct rw_semaphore *sem) +-{ +- struct rwsem_waiter *waiter; +- struct list_head *next; +- signed long oldcount; +- int woken, loop; +- +- rwsemtrace(sem,"Entering __rwsem_do_wake"); +- +- /* only wake someone up if we can transition the active part of the count from 0 -> 1 */ +- try_again: +- oldcount = rwsem_atomic_update(RWSEM_ACTIVE_BIAS,sem) - RWSEM_ACTIVE_BIAS; +- if (oldcount & RWSEM_ACTIVE_MASK) +- goto undo; +- +- waiter = list_entry(sem->wait_list.next,struct rwsem_waiter,list); +- +- /* try to grant a single write lock if there's a writer at the front of the queue +- * - note we leave the 'active part' of the count incremented by 1 and the waiting part +- * incremented by 0x00010000 +- */ +- if (!(waiter->flags & RWSEM_WAITING_FOR_WRITE)) +- goto readers_only; +- +- list_del(&waiter->list); +- waiter->flags = 0; +- wake_up_process(waiter->task); +- goto out; +- +- /* grant an infinite number of read locks to the readers at the front of the queue +- * - note we increment the 'active part' of the count by the number of readers (less one +- * for the activity decrement we've already done) before waking any processes up +- */ +- readers_only: +- woken = 0; +- do { +- woken++; +- +- if (waiter->list.next==&sem->wait_list) +- break; +- +- waiter = list_entry(waiter->list.next,struct rwsem_waiter,list); +- +- } while (waiter->flags & RWSEM_WAITING_FOR_READ); +- +- loop = woken; +- woken *= RWSEM_ACTIVE_BIAS-RWSEM_WAITING_BIAS; +- woken -= RWSEM_ACTIVE_BIAS; +- rwsem_atomic_add(woken,sem); +- +- next = sem->wait_list.next; +- for (; loop>0; loop--) { +- waiter = list_entry(next,struct rwsem_waiter,list); +- next = waiter->list.next; +- waiter->flags = 0; +- wake_up_process(waiter->task); +- } +- +- sem->wait_list.next = next; +- next->prev = &sem->wait_list; +- +- out: +- rwsemtrace(sem,"Leaving __rwsem_do_wake"); +- return sem; +- +- /* undo the change to count, but check for a transition 1->0 */ +- undo: +- if (rwsem_atomic_update(-RWSEM_ACTIVE_BIAS,sem)!=0) +- goto out; +- goto try_again; +-} +- +-/* +- * wait for a lock to be granted +- */ +-static inline struct rw_semaphore *rwsem_down_failed_common(struct rw_semaphore *sem, +- struct rwsem_waiter *waiter, +- signed long adjustment) ++void rwsem_down_failed(struct rw_semaphore *sem, long retire) + { + struct task_struct *tsk = current; +- signed long count; +- +- set_task_state(tsk,TASK_UNINTERRUPTIBLE); +- +- /* set up my own style of waitqueue */ +- spin_lock(&sem->wait_lock); +- waiter->task = tsk; +- +- list_add_tail(&waiter->list,&sem->wait_list); +- +- /* note that we're now waiting on the lock, but no longer actively read-locking */ +- count = rwsem_atomic_update(adjustment,sem); +- +- /* if there are no longer active locks, wake the front queued process(es) up +- * - it might even be this process, since the waker takes a more active part +- */ +- if (!(count & RWSEM_ACTIVE_MASK)) +- sem = __rwsem_do_wake(sem); ++ struct rwsem_wait_queue wait; + +- spin_unlock(&sem->wait_lock); ++ sem->count += retire; ++ wait.retire = retire; ++ wait.task = tsk; ++ INIT_LIST_HEAD(&wait.task_list); ++ list_add(&wait.task_list, &sem->wait); + +- /* wait to be given the lock */ +- for (;;) { +- if (!waiter->flags) +- break; ++ do { ++ __set_task_state(tsk, TASK_UNINTERRUPTIBLE); ++ spin_unlock(&sem->lock); + schedule(); +- set_task_state(tsk, TASK_UNINTERRUPTIBLE); +- } +- +- tsk->state = TASK_RUNNING; +- +- return sem; +-} +- +-/* +- * wait for the read lock to be granted +- */ +-struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem) +-{ +- struct rwsem_waiter waiter; +- +- rwsemtrace(sem,"Entering rwsem_down_read_failed"); +- +- waiter.flags = RWSEM_WAITING_FOR_READ; +- rwsem_down_failed_common(sem,&waiter,RWSEM_WAITING_BIAS-RWSEM_ACTIVE_BIAS); +- +- rwsemtrace(sem,"Leaving rwsem_down_read_failed"); +- return sem; ++ spin_lock(&sem->lock); ++ } while(wait.task_list.next); + } + +-/* +- * wait for the write lock to be granted +- */ +-struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem) ++void rwsem_wake(struct rw_semaphore *sem) + { +- struct rwsem_waiter waiter; ++ struct list_head * entry, * head = &sem->wait; ++ int last = 0; + +- rwsemtrace(sem,"Entering rwsem_down_write_failed"); ++ while ((entry = head->prev) != head) { ++ struct rwsem_wait_queue * wait; + +- waiter.flags = RWSEM_WAITING_FOR_WRITE; +- rwsem_down_failed_common(sem,&waiter,-RWSEM_ACTIVE_BIAS); +- +- rwsemtrace(sem,"Leaving rwsem_down_write_failed"); +- return sem; +-} ++ wait = list_entry(entry, struct rwsem_wait_queue, task_list); + +-/* +- * handle waking up a waiter on the semaphore +- * - up_read has decremented the active part of the count if we come here +- */ +-struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) +-{ +- rwsemtrace(sem,"Entering rwsem_wake"); ++ if (wait->retire == RWSEM_WRITE_BLOCKING_BIAS) { ++ if (sem->count & RWSEM_READ_MASK) ++ break; ++ last = 1; ++ } + +- spin_lock(&sem->wait_lock); +- +- /* do nothing if list empty */ +- if (!list_empty(&sem->wait_list)) +- sem = __rwsem_do_wake(sem); +- +- spin_unlock(&sem->wait_lock); +- +- rwsemtrace(sem,"Leaving rwsem_wake"); +- +- return sem; ++ /* convert write lock into read lock when read become active */ ++ sem->count -= wait->retire; ++ list_del(entry); ++ entry->next = NULL; ++ wake_up_process(wait->task); ++ ++ if (last) ++ break; ++ } + } + +-EXPORT_SYMBOL_NOVERS(rwsem_down_read_failed); +-EXPORT_SYMBOL_NOVERS(rwsem_down_write_failed); +-EXPORT_SYMBOL_NOVERS(rwsem_wake); +-#if RWSEM_DEBUG +-EXPORT_SYMBOL(rwsemtrace); +-#endif ++EXPORT_SYMBOL(rwsem_down_failed); ++EXPORT_SYMBOL(rwsem_wake); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4 new file mode 100644 index 000000000000..d54be8d152a9 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_rwsem-fair-23-recursive-4 @@ -0,0 +1,271 @@ +diff -urN 2.4.11pre3aa1/arch/alpha/mm/fault.c recurse/arch/alpha/mm/fault.c +--- 2.4.11pre3aa1/arch/alpha/mm/fault.c Sun Sep 23 21:11:28 2001 ++++ recurse/arch/alpha/mm/fault.c Thu Oct 4 18:50:12 2001 +@@ -113,7 +113,7 @@ + goto vmalloc_fault; + #endif + +- down_read(&mm->mmap_sem); ++ down_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; +@@ -147,7 +147,7 @@ + * the fault. + */ + fault = handle_mm_fault(mm, vma, address, cause > 0); +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + + if (fault < 0) + goto out_of_memory; +@@ -161,7 +161,7 @@ + * Fix it, but check if it's kernel or user first.. + */ + bad_area: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + + if (user_mode(regs)) { + force_sig(SIGSEGV, current); +@@ -198,7 +198,7 @@ + if (current->pid == 1) { + current->policy |= SCHED_YIELD; + schedule(); +- down_read(&mm->mmap_sem); ++ down_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + goto survive; + } + printk(KERN_ALERT "VM: killing process %s(%d)\n", +diff -urN 2.4.11pre3aa1/arch/i386/mm/fault.c recurse/arch/i386/mm/fault.c +--- 2.4.11pre3aa1/arch/i386/mm/fault.c Sun Sep 23 21:11:28 2001 ++++ recurse/arch/i386/mm/fault.c Thu Oct 4 18:50:12 2001 +@@ -191,7 +191,7 @@ + if (in_interrupt() || !mm) + goto no_context; + +- down_read(&mm->mmap_sem); ++ down_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + + vma = find_vma(mm, address); + if (!vma) +@@ -265,7 +265,7 @@ + if (bit < 32) + tsk->thread.screen_bitmap |= 1 << bit; + } +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + return; + + /* +@@ -273,7 +273,7 @@ + * Fix it, but check if it's kernel or user first.. + */ + bad_area: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + + /* User mode accesses just cause a SIGSEGV */ + if (error_code & 4) { +@@ -341,11 +341,11 @@ + * us unable to handle the page fault gracefully. + */ + out_of_memory: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + if (tsk->pid == 1) { + tsk->policy |= SCHED_YIELD; + schedule(); +- down_read(&mm->mmap_sem); ++ down_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + goto survive; + } + printk("VM: killing process %s\n", tsk->comm); +@@ -354,7 +354,7 @@ + goto no_context; + + do_sigbus: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + + /* + * Send a sigbus, regardless of whether we were in kernel +diff -urN 2.4.11pre3aa1/arch/ia64/mm/fault.c recurse/arch/ia64/mm/fault.c +--- 2.4.11pre3aa1/arch/ia64/mm/fault.c Tue May 1 19:35:18 2001 ++++ recurse/arch/ia64/mm/fault.c Thu Oct 4 18:50:12 2001 +@@ -60,7 +60,7 @@ + if (in_interrupt() || !mm) + goto no_context; + +- down_read(&mm->mmap_sem); ++ down_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + + vma = find_vma_prev(mm, address, &prev_vma); + if (!vma) +@@ -112,7 +112,7 @@ + default: + goto out_of_memory; + } +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + return; + + check_expansion: +@@ -135,7 +135,7 @@ + goto good_area; + + bad_area: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + if (isr & IA64_ISR_SP) { + /* + * This fault was due to a speculative load set the "ed" bit in the psr to +@@ -184,7 +184,7 @@ + return; + + out_of_memory: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + printk("VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); +diff -urN 2.4.11pre3aa1/arch/ppc/mm/fault.c recurse/arch/ppc/mm/fault.c +--- 2.4.11pre3aa1/arch/ppc/mm/fault.c Thu Oct 4 10:06:33 2001 ++++ recurse/arch/ppc/mm/fault.c Thu Oct 4 18:50:12 2001 +@@ -103,7 +103,7 @@ + bad_page_fault(regs, address, SIGSEGV); + return; + } +- down_read(&mm->mmap_sem); ++ down_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + vma = find_vma(mm, address); + if (!vma) + goto bad_area; +@@ -164,7 +164,7 @@ + goto out_of_memory; + } + +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + /* + * keep track of tlb+htab misses that are good addrs but + * just need pte's created via handle_mm_fault() +@@ -174,7 +174,7 @@ + return; + + bad_area: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + pte_errors++; + + /* User mode accesses cause a SIGSEGV */ +@@ -195,7 +195,7 @@ + * us unable to handle the page fault gracefully. + */ + out_of_memory: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + if (current->pid == 1) { + current->policy |= SCHED_YIELD; + schedule(); +@@ -209,7 +209,7 @@ + return; + + do_sigbus: +- up_read(&mm->mmap_sem); ++ up_read_recursive(&mm->mmap_sem, ¤t->mm_recursor); + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; +diff -urN 2.4.11pre3aa1/fs/exec.c recurse/fs/exec.c +--- 2.4.11pre3aa1/fs/exec.c Sun Sep 23 21:11:39 2001 ++++ recurse/fs/exec.c Thu Oct 4 18:50:12 2001 +@@ -969,9 +969,9 @@ + if (do_truncate(file->f_dentry, 0) != 0) + goto close_fail; + +- down_read(¤t->mm->mmap_sem); ++ down_read_recursive(¤t->mm->mmap_sem, ¤t->mm_recursor); + retval = binfmt->core_dump(signr, regs, file); +- up_read(¤t->mm->mmap_sem); ++ up_read_recursive(¤t->mm->mmap_sem, ¤t->mm_recursor); + + close_fail: + filp_close(file, NULL); +diff -urN 2.4.11pre3aa1/include/linux/rwsem.h recurse/include/linux/rwsem.h +--- 2.4.11pre3aa1/include/linux/rwsem.h Thu Oct 4 18:49:53 2001 ++++ recurse/include/linux/rwsem.h Thu Oct 4 18:50:12 2001 +@@ -18,6 +18,11 @@ + #endif + }; + ++struct rw_sem_recursor ++{ ++ int counter; ++}; ++ + #if RWSEM_DEBUG + #define __SEM_DEBUG_INIT(name) \ + , (long)&(name).__magic +@@ -42,6 +47,7 @@ + __SEM_DEBUG_INIT(name) \ + } + #define RWSEM_INITIALIZER(name) __RWSEM_INITIALIZER(name, 0) ++#define RWSEM_RECURSOR_INITIALIZER ((struct rw_sem_recursor) { 0, }) + + #define __DECLARE_RWSEM(name, count) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name, count) +@@ -112,6 +118,34 @@ + spin_lock(&sem->lock); + sem->count -= RWSEM_WRITE_BIAS; + if (unlikely(sem->count)) ++ rwsem_wake(sem); ++ spin_unlock(&sem->lock); ++} ++ ++static inline void down_read_recursive(struct rw_semaphore *sem, ++ struct rw_sem_recursor * recursor) ++{ ++ int count, counter; ++ CHECK_MAGIC(sem->__magic); ++ ++ spin_lock(&sem->lock); ++ count = sem->count; ++ sem->count += RWSEM_READ_BIAS; ++ counter = recursor->counter++; ++ if (unlikely(count < 0 && !counter && !(count & RWSEM_READ_MASK))) ++ rwsem_down_failed(sem, RWSEM_READ_BLOCKING_BIAS); ++ spin_unlock(&sem->lock); ++} ++ ++static inline void up_read_recursive(struct rw_semaphore *sem, ++ struct rw_sem_recursor * recursor) ++{ ++ CHECK_MAGIC(sem->__magic); ++ ++ spin_lock(&sem->lock); ++ sem->count -= RWSEM_READ_BIAS; ++ recursor->counter--; ++ if (unlikely(sem->count < 0 && !(sem->count & RWSEM_READ_MASK))) + rwsem_wake(sem); + spin_unlock(&sem->lock); + } +diff -urN 2.4.11pre3aa1/include/linux/sched.h recurse/include/linux/sched.h +--- 2.4.11pre3aa1/include/linux/sched.h Thu Oct 4 18:49:53 2001 ++++ recurse/include/linux/sched.h Thu Oct 4 18:50:12 2001 +@@ -315,6 +315,7 @@ + + struct task_struct *next_task, *prev_task; + struct mm_struct *active_mm; ++ struct rw_sem_recursor mm_recursor; + struct list_head local_pages; + unsigned int allocation_order, nr_local_pages; + +@@ -460,6 +461,7 @@ + policy: SCHED_OTHER, \ + mm: NULL, \ + active_mm: &init_mm, \ ++ mm_recursor: RWSEM_RECURSOR_INITIALIZER, \ + cpus_allowed: -1, \ + run_list: LIST_HEAD_INIT(tsk.run_list), \ + next_task: &tsk, \ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10 new file mode 100644 index 000000000000..9f5d7c11faaa --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_silent-stack-overflow-10 @@ -0,0 +1,394 @@ +diff -urN 2.4.10pre11/arch/alpha/mm/fault.c silent-stack-overflow/arch/alpha/mm/fault.c +--- 2.4.10pre11/arch/alpha/mm/fault.c Tue Sep 18 02:41:49 2001 ++++ silent-stack-overflow/arch/alpha/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -121,7 +121,7 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/arch/arm/mm/fault-common.c silent-stack-overflow/arch/arm/mm/fault-common.c +--- 2.4.10pre11/arch/arm/mm/fault-common.c Thu Aug 16 22:03:23 2001 ++++ silent-stack-overflow/arch/arm/mm/fault-common.c Tue Sep 18 10:08:51 2001 +@@ -229,7 +229,7 @@ + goto survive; + + check_stack: +- if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr)) ++ if (vma->vm_flags & VM_GROWSDOWN && !expand_stack(vma, addr, NULL)) + goto good_area; + out: + return fault; +diff -urN 2.4.10pre11/arch/cris/mm/fault.c silent-stack-overflow/arch/cris/mm/fault.c +--- 2.4.10pre11/arch/cris/mm/fault.c Sat Aug 11 08:03:54 2001 ++++ silent-stack-overflow/arch/cris/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -284,7 +284,7 @@ + if (address + PAGE_SIZE < rdusp()) + goto bad_area; + } +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + + /* +diff -urN 2.4.10pre11/arch/i386/mm/fault.c silent-stack-overflow/arch/i386/mm/fault.c +--- 2.4.10pre11/arch/i386/mm/fault.c Tue Sep 18 02:41:57 2001 ++++ silent-stack-overflow/arch/i386/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -33,7 +33,7 @@ + */ + int __verify_write(const void * addr, unsigned long size) + { +- struct vm_area_struct * vma; ++ struct vm_area_struct * vma, * prev_vma; + unsigned long start = (unsigned long) addr; + + if (!size) +@@ -79,7 +79,8 @@ + check_stack: + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, start) == 0) ++ find_vma_prev(current->mm, start, &prev_vma); ++ if (expand_stack(vma, start, prev_vma) == 0) + goto good_area; + + bad_area: +@@ -152,7 +153,7 @@ + { + struct task_struct *tsk; + struct mm_struct *mm; +- struct vm_area_struct * vma; ++ struct vm_area_struct * vma, * prev_vma; + unsigned long address; + unsigned long page; + unsigned long fixup; +@@ -213,7 +214,8 @@ + if (address + 32 < regs->esp) + goto bad_area; + } +- if (expand_stack(vma, address)) ++ find_vma_prev(mm, address, &prev_vma); ++ if (expand_stack(vma, address, prev_vma)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/arch/ia64/mm/fault.c silent-stack-overflow/arch/ia64/mm/fault.c +--- 2.4.10pre11/arch/ia64/mm/fault.c Tue May 1 19:35:18 2001 ++++ silent-stack-overflow/arch/ia64/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -122,7 +122,7 @@ + if (rgn_index(address) != rgn_index(vma->vm_start) + || rgn_offset(address) >= RGN_MAP_LIMIT) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + } else { + vma = prev_vma; +diff -urN 2.4.10pre11/arch/m68k/mm/fault.c silent-stack-overflow/arch/m68k/mm/fault.c +--- 2.4.10pre11/arch/m68k/mm/fault.c Sun Apr 1 01:17:08 2001 ++++ silent-stack-overflow/arch/m68k/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -120,7 +120,7 @@ + if (address + 256 < rdusp()) + goto map_err; + } +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto map_err; + + /* +diff -urN 2.4.10pre11/arch/mips/mm/fault.c silent-stack-overflow/arch/mips/mm/fault.c +--- 2.4.10pre11/arch/mips/mm/fault.c Sat Jul 21 00:04:05 2001 ++++ silent-stack-overflow/arch/mips/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -80,7 +80,7 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/arch/mips64/mm/fault.c silent-stack-overflow/arch/mips64/mm/fault.c +--- 2.4.10pre11/arch/mips64/mm/fault.c Tue Sep 18 02:42:13 2001 ++++ silent-stack-overflow/arch/mips64/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -132,7 +132,7 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/arch/ppc/mm/fault.c silent-stack-overflow/arch/ppc/mm/fault.c +--- 2.4.10pre11/arch/ppc/mm/fault.c Wed Jul 4 04:03:45 2001 ++++ silent-stack-overflow/arch/ppc/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -64,7 +64,7 @@ + void do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) + { +- struct vm_area_struct * vma; ++ struct vm_area_struct * vma, * prev_vma; + struct mm_struct *mm = current->mm; + siginfo_t info; + int code = SEGV_MAPERR; +@@ -111,7 +111,8 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ vma = find_vma_prev(mm, address, &prev_vma); ++ if (expand_stack(vma, address, prev_vma)) + goto bad_area; + + good_area: +diff -urN 2.4.10pre11/arch/s390/mm/fault.c silent-stack-overflow/arch/s390/mm/fault.c +--- 2.4.10pre11/arch/s390/mm/fault.c Sat Aug 11 08:03:59 2001 ++++ silent-stack-overflow/arch/s390/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -122,7 +122,7 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/arch/s390x/mm/fault.c silent-stack-overflow/arch/s390x/mm/fault.c +--- 2.4.10pre11/arch/s390x/mm/fault.c Sat Aug 11 08:04:00 2001 ++++ silent-stack-overflow/arch/s390x/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -152,7 +152,7 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/arch/sh/mm/fault.c silent-stack-overflow/arch/sh/mm/fault.c +--- 2.4.10pre11/arch/sh/mm/fault.c Tue Sep 18 02:42:19 2001 ++++ silent-stack-overflow/arch/sh/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -74,7 +74,7 @@ + check_stack: + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, start) == 0) ++ if (expand_stack(vma, start, NULL) == 0) + goto good_area; + + bad_area: +@@ -114,7 +114,7 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/arch/sparc/mm/fault.c silent-stack-overflow/arch/sparc/mm/fault.c +--- 2.4.10pre11/arch/sparc/mm/fault.c Sat Aug 11 08:04:01 2001 ++++ silent-stack-overflow/arch/sparc/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -238,7 +238,7 @@ + goto good_area; + if(!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if(expand_stack(vma, address)) ++ if(expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +@@ -485,7 +485,7 @@ + goto good_area; + if(!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if(expand_stack(vma, address)) ++ if(expand_stack(vma, address, NULL)) + goto bad_area; + good_area: + info.si_code = SEGV_ACCERR; +diff -urN 2.4.10pre11/arch/sparc64/mm/fault.c silent-stack-overflow/arch/sparc64/mm/fault.c +--- 2.4.10pre11/arch/sparc64/mm/fault.c Tue Sep 18 02:42:20 2001 ++++ silent-stack-overflow/arch/sparc64/mm/fault.c Tue Sep 18 10:08:51 2001 +@@ -340,7 +340,7 @@ + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; +- if (expand_stack(vma, address)) ++ if (expand_stack(vma, address, NULL)) + goto bad_area; + /* + * Ok, we have a good vm_area for this memory access, so +diff -urN 2.4.10pre11/include/linux/mm.h silent-stack-overflow/include/linux/mm.h +--- 2.4.10pre11/include/linux/mm.h Tue Sep 18 02:43:02 2001 ++++ silent-stack-overflow/include/linux/mm.h Tue Sep 18 10:10:24 2001 +@@ -572,11 +572,24 @@ + + #define GFP_DMA __GFP_DMA + +-/* vma is the first one with address < vma->vm_end, +- * and even address < vma->vm_start. Have to extend vma. */ +-static inline int expand_stack(struct vm_area_struct * vma, unsigned long address) ++extern int heap_stack_gap; ++ ++/* ++ * vma is the first one with address < vma->vm_end, ++ * and even address < vma->vm_start. Have to extend vma. ++ * ++ * Locking: vm_start can decrease under you if you only hold ++ * the read semaphore, you either need the write semaphore ++ * or both the read semaphore and the page_table_lock acquired ++ * if you want vm_start consistent. vm_end and the vma layout ++ * are just consistent with only the read semaphore acquired ++ * instead. ++ */ ++static inline int expand_stack(struct vm_area_struct * vma, unsigned long address, ++ struct vm_area_struct * prev_vma) + { + unsigned long grow; ++ int err = -ENOMEM; + + /* + * vma->vm_start/vm_end cannot change under us because the caller is required +@@ -584,18 +597,22 @@ + * before relocating the vma range ourself. + */ + address &= PAGE_MASK; ++ if (prev_vma && prev_vma->vm_end + (heap_stack_gap << PAGE_SHIFT) > address) ++ goto out; ++ spin_lock(&vma->vm_mm->page_table_lock); + grow = (vma->vm_start - address) >> PAGE_SHIFT; + if (vma->vm_end - address > current->rlim[RLIMIT_STACK].rlim_cur || + ((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->rlim[RLIMIT_AS].rlim_cur) +- return -ENOMEM; +- spin_lock(&vma->vm_mm->page_table_lock); ++ goto out_unlock; + vma->vm_start = address; +- vma->vm_pgoff -= grow; + vma->vm_mm->total_vm += grow; + if (vma->vm_flags & VM_LOCKED) + vma->vm_mm->locked_vm += grow; ++ err = 0; ++ out_unlock: + spin_unlock(&vma->vm_mm->page_table_lock); +- return 0; ++ out: ++ return err; + } + + /* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ +diff -urN 2.4.10pre11/include/linux/sysctl.h silent-stack-overflow/include/linux/sysctl.h +--- 2.4.10pre11/include/linux/sysctl.h Tue Sep 18 02:43:03 2001 ++++ silent-stack-overflow/include/linux/sysctl.h Tue Sep 18 10:08:51 2001 +@@ -136,7 +136,8 @@ + VM_PAGECACHE=7, /* struct: Set cache memory thresholds */ + VM_PAGERDAEMON=8, /* struct: Control kswapd behaviour */ + VM_PGT_CACHE=9, /* struct: Set page table cache parameters */ +- VM_PAGE_CLUSTER=10 /* int: set number of pages to swap together */ ++ VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */ ++ VM_HEAP_STACK_GAP=11, /* int: page gap between heap and stack */ + }; + + +diff -urN 2.4.10pre11/kernel/sysctl.c silent-stack-overflow/kernel/sysctl.c +--- 2.4.10pre11/kernel/sysctl.c Tue Sep 18 02:43:04 2001 ++++ silent-stack-overflow/kernel/sysctl.c Tue Sep 18 10:08:51 2001 +@@ -265,6 +265,8 @@ + &pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_PAGE_CLUSTER, "page-cluster", + &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec}, ++ {VM_HEAP_STACK_GAP, "heap-stack-gap", ++ &heap_stack_gap, sizeof(int), 0644, NULL, &proc_dointvec}, + {0} + }; + +diff -urN 2.4.10pre11/mm/memory.c silent-stack-overflow/mm/memory.c +--- 2.4.10pre11/mm/memory.c Tue Sep 18 02:43:04 2001 ++++ silent-stack-overflow/mm/memory.c Tue Sep 18 10:08:51 2001 +@@ -444,7 +444,7 @@ + unsigned long ptr, end; + int err; + struct mm_struct * mm; +- struct vm_area_struct * vma = 0; ++ struct vm_area_struct * vma, * prev_vma; + struct page * map; + int i; + int datain = (rw == READ); +@@ -470,19 +470,21 @@ + iobuf->length = len; + + i = 0; ++ vma = NULL; + + /* + * First of all, try to fault in all of the necessary pages + */ + while (ptr < end) { + if (!vma || ptr >= vma->vm_end) { +- vma = find_vma(current->mm, ptr); ++ vma = find_vma(mm, ptr); + if (!vma) + goto out_unlock; + if (vma->vm_start > ptr) { + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto out_unlock; +- if (expand_stack(vma, ptr)) ++ find_vma_prev(mm, ptr, &prev_vma); ++ if (expand_stack(vma, ptr, prev_vma)) + goto out_unlock; + } + if (((datain) && (!(vma->vm_flags & VM_WRITE))) || +diff -urN 2.4.10pre11/mm/mmap.c silent-stack-overflow/mm/mmap.c +--- 2.4.10pre11/mm/mmap.c Tue Sep 18 02:43:04 2001 ++++ silent-stack-overflow/mm/mmap.c Tue Sep 18 10:12:08 2001 +@@ -45,6 +45,7 @@ + }; + + int sysctl_overcommit_memory; ++int heap_stack_gap = 1; + + /* Check that a process has enough memory to allocate a + * new virtual mapping. +@@ -606,9 +607,15 @@ + + for (vma = find_vma(current->mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ ++ unsigned long __heap_stack_gap; + if (TASK_SIZE - len < addr) + return -ENOMEM; +- if (!vma || addr + len <= vma->vm_start) ++ if (!vma) ++ return addr; ++ __heap_stack_gap = 0; ++ if (vma->vm_flags & VM_GROWSDOWN) ++ __heap_stack_gap = heap_stack_gap << PAGE_SHIFT; ++ if (addr + len + __heap_stack_gap <= vma->vm_start) + return addr; + addr = vma->vm_end; + } +@@ -717,7 +724,7 @@ + + struct vm_area_struct * find_extend_vma(struct mm_struct * mm, unsigned long addr) + { +- struct vm_area_struct * vma; ++ struct vm_area_struct * vma, * prev_vma; + unsigned long start; + + addr &= PAGE_MASK; +@@ -728,9 +735,10 @@ + return vma; + if (!(vma->vm_flags & VM_GROWSDOWN)) + return NULL; +- start = vma->vm_start; +- if (expand_stack(vma, addr)) ++ find_vma_prev(mm, addr, &prev_vma); ++ if (expand_stack(vma, addr, prev_vma)) + return NULL; ++ start = vma->vm_start; + if (vma->vm_flags & VM_LOCKED) { + make_pages_present(addr, start); + } diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2 new file mode 100644 index 000000000000..98023b0de4d1 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_spinlock-cacheline-2 @@ -0,0 +1,136 @@ +diff -urN 2.4.14pre4/arch/i386/kernel/i386_ksyms.c spinlock/arch/i386/kernel/i386_ksyms.c +--- 2.4.14pre4/arch/i386/kernel/i386_ksyms.c Wed Oct 24 08:04:08 2001 ++++ spinlock/arch/i386/kernel/i386_ksyms.c Tue Oct 30 01:44:59 2001 +@@ -120,7 +120,7 @@ + + #ifdef CONFIG_SMP + EXPORT_SYMBOL(cpu_data); +-EXPORT_SYMBOL(kernel_flag); ++EXPORT_SYMBOL(kernel_flag_cacheline); + EXPORT_SYMBOL(smp_num_cpus); + EXPORT_SYMBOL(cpu_online_map); + EXPORT_SYMBOL_NOVERS(__write_lock_failed); +diff -urN 2.4.14pre4/arch/i386/kernel/smp.c spinlock/arch/i386/kernel/smp.c +--- 2.4.14pre4/arch/i386/kernel/smp.c Wed Oct 24 08:04:08 2001 ++++ spinlock/arch/i386/kernel/smp.c Tue Oct 30 01:44:59 2001 +@@ -102,7 +102,7 @@ + */ + + /* The 'big kernel lock' */ +-spinlock_t kernel_flag = SPIN_LOCK_UNLOCKED; ++spinlock_cacheline_t kernel_flag_cacheline = {SPIN_LOCK_UNLOCKED}; + + struct tlb_state cpu_tlbstate[NR_CPUS] = {[0 ... NR_CPUS-1] = { &init_mm, 0 }}; + +diff -urN 2.4.14pre4/fs/buffer.c spinlock/fs/buffer.c +--- 2.4.14pre4/fs/buffer.c Tue Oct 30 00:07:24 2001 ++++ spinlock/fs/buffer.c Tue Oct 30 01:44:59 2001 +@@ -72,7 +72,10 @@ + static rwlock_t hash_table_lock = RW_LOCK_UNLOCKED; + + static struct buffer_head *lru_list[NR_LIST]; +-static spinlock_t lru_list_lock = SPIN_LOCK_UNLOCKED; ++ ++static spinlock_cacheline_t lru_list_lock_cacheline = {SPIN_LOCK_UNLOCKED}; ++#define lru_list_lock lru_list_lock_cacheline.lock ++ + static int nr_buffers_type[NR_LIST]; + static unsigned long size_buffers_type[NR_LIST]; + +diff -urN 2.4.14pre4/include/asm-i386/smplock.h spinlock/include/asm-i386/smplock.h +--- 2.4.14pre4/include/asm-i386/smplock.h Mon Oct 29 01:49:56 2001 ++++ spinlock/include/asm-i386/smplock.h Tue Oct 30 01:44:59 2001 +@@ -8,7 +8,8 @@ + #include + #include + +-extern spinlock_t kernel_flag; ++extern spinlock_cacheline_t kernel_flag_cacheline; ++#define kernel_flag kernel_flag_cacheline.lock + + #define kernel_locked() spin_is_locked(&kernel_flag) + +diff -urN 2.4.14pre4/include/linux/spinlock.h spinlock/include/linux/spinlock.h +--- 2.4.14pre4/include/linux/spinlock.h Mon Oct 29 01:49:55 2001 ++++ spinlock/include/linux/spinlock.h Tue Oct 30 01:44:59 2001 +@@ -138,4 +138,20 @@ + extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); + #endif + ++#ifdef CONFIG_SMP ++#include ++ ++typedef union { ++ spinlock_t lock; ++ char fill_up[(SMP_CACHE_BYTES)]; ++} spinlock_cacheline_t __attribute__ ((aligned(SMP_CACHE_BYTES))); ++ ++#else /* SMP */ ++ ++typedef struct { ++ spinlock_t lock; ++} spinlock_cacheline_t; ++ ++ ++#endif + #endif /* __LINUX_SPINLOCK_H */ +diff -urN 2.4.14pre4/include/linux/swap.h spinlock/include/linux/swap.h +--- 2.4.14pre4/include/linux/swap.h Tue Oct 30 00:07:29 2001 ++++ spinlock/include/linux/swap.h Tue Oct 30 01:44:59 2001 +@@ -86,7 +86,10 @@ + extern atomic_t nr_async_pages; + extern atomic_t page_cache_size; + extern atomic_t buffermem_pages; +-extern spinlock_t pagecache_lock; ++ ++extern spinlock_cacheline_t pagecache_lock_cacheline; ++#define pagecache_lock (pagecache_lock_cacheline.lock) ++ + extern void __remove_inode_page(struct page *); + + /* Incomplete types for prototype declarations: */ +@@ -154,7 +157,8 @@ + asmlinkage long sys_swapoff(const char *); + asmlinkage long sys_swapon(const char *, int); + +-extern spinlock_t pagemap_lru_lock; ++extern spinlock_cacheline_t pagemap_lru_lock_cacheline; ++#define pagemap_lru_lock pagemap_lru_lock_cacheline.lock + + extern void FASTCALL(mark_page_accessed(struct page *)); + +diff -urN 2.4.14pre4/mm/filemap.c spinlock/mm/filemap.c +--- 2.4.14pre4/mm/filemap.c Tue Oct 30 00:07:29 2001 ++++ spinlock/mm/filemap.c Tue Oct 30 01:45:32 2001 +@@ -47,7 +47,8 @@ + unsigned int page_hash_bits; + struct page **page_hash_table; + +-spinlock_t pagecache_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; ++spinlock_cacheline_t pagecache_lock_cacheline = {SPIN_LOCK_UNLOCKED}; ++ + /* + * NOTE: to avoid deadlocking you must never acquire the pagemap_lru_lock + * with the pagecache_lock held. +@@ -57,7 +58,7 @@ + * pagemap_lru_lock -> + * pagecache_lock + */ +-spinlock_t pagemap_lru_lock ____cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED; ++spinlock_cacheline_t pagemap_lru_lock_cacheline = {SPIN_LOCK_UNLOCKED}; + + #define CLUSTER_PAGES (1 << page_cluster) + #define CLUSTER_OFFSET(x) (((x) >> page_cluster) << page_cluster) +diff -urN 2.4.14pre4/mm/highmem.c spinlock/mm/highmem.c +--- 2.4.14pre4/mm/highmem.c Wed Oct 24 08:04:27 2001 ++++ spinlock/mm/highmem.c Tue Oct 30 01:44:59 2001 +@@ -32,7 +32,8 @@ + */ + static int pkmap_count[LAST_PKMAP]; + static unsigned int last_pkmap_nr; +-static spinlock_t kmap_lock = SPIN_LOCK_UNLOCKED; ++static spinlock_cacheline_t kmap_lock_cacheline = {SPIN_LOCK_UNLOCKED}; ++#define kmap_lock kmap_lock_cacheline.lock + + pte_t * pkmap_page_table; + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1 new file mode 100644 index 000000000000..31c2fb732a75 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_strnlen_user-x86-ret1-1 @@ -0,0 +1,20 @@ +--- 2.4.10pre2aa1/arch/i386/lib/usercopy.c.~1~ Thu Aug 30 00:35:26 2001 ++++ 2.4.10pre2aa1/arch/i386/lib/usercopy.c Thu Aug 30 03:22:04 2001 +@@ -166,6 +166,8 @@ + unsigned long res, tmp; + + __asm__ __volatile__( ++ " testl %0, %0\n" ++ " jz 3f\n" + " andl %0,%%ecx\n" + "0: repne; scasb\n" + " setne %%al\n" +@@ -174,6 +176,8 @@ + "1:\n" + ".section .fixup,\"ax\"\n" + "2: xorl %%eax,%%eax\n" ++ " jmp 1b\n" ++ "3: movb $1,%%al\n" + " jmp 1b\n" + ".previous\n" + ".section __ex_table,\"a\"\n" diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-1 new file mode 100644 index 000000000000..ac2f9a2778da --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vm_raend-race-1 @@ -0,0 +1,35 @@ +--- 2.4.9aa3/mm/filemap.c.~1~ Sun Aug 19 06:59:36 2001 ++++ 2.4.9aa3/mm/filemap.c Sun Aug 19 07:33:24 2001 +@@ -1645,11 +1645,18 @@ + * pages in the previous window. + */ + if ((pgoff + (ra_window >> 1)) == vma->vm_raend) { +- unsigned long start = vma->vm_pgoff + vma->vm_raend; ++ unsigned long vm_raend = *(volatile unsigned long *) &vma->vm_raend; ++ unsigned long start = vma->vm_pgoff + vm_raend; + unsigned long end = start + ra_window; + + if (end > ((vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff)) + end = (vma->vm_end >> PAGE_SHIFT) + vma->vm_pgoff; ++ /* ++ * Sanitize 'start' as well because vm_raend is racy when only ++ * the read sem is acquired like here. ++ */ ++ if (start < vma->vm_pgoff) ++ return; + if (start > end) + return; + +@@ -1663,10 +1670,10 @@ + + /* if we're far enough past the beginning of this area, + recycle pages that are in the previous window. */ +- if (vma->vm_raend > (vma->vm_pgoff + ra_window + ra_window)) { ++ if (vm_raend > (vma->vm_pgoff + ra_window + ra_window)) { + unsigned long window = ra_window << PAGE_SHIFT; + +- end = vma->vm_start + (vma->vm_raend << PAGE_SHIFT); ++ end = vma->vm_start + (vm_raend << PAGE_SHIFT); + end -= window + window; + filemap_sync(vma, end - window, window, MS_INVALIDATE); + } diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-1 new file mode 100644 index 000000000000..2dd5cb755887 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-cache-flush-1 @@ -0,0 +1,10 @@ +--- 2.4.10pre12aa2/mm/vmalloc.c.~1~ Thu Sep 20 01:44:20 2001 ++++ 2.4.10pre12aa2/mm/vmalloc.c Fri Sep 21 00:40:48 2001 +@@ -144,6 +144,7 @@ + int ret; + + dir = pgd_offset_k(address); ++ flush_cache_all(); + spin_lock(&init_mm.page_table_lock); + do { + pmd_t *pmd; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1 new file mode 100644 index 000000000000..7006f1fb2497 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_vmalloc-tlb-flush-1 @@ -0,0 +1,12 @@ +--- 2.4.10pre14aa1/mm/vmalloc.c.~1~ Sat Sep 22 12:02:18 2001 ++++ 2.4.10pre14aa1/mm/vmalloc.c Sat Sep 22 16:54:58 2001 +@@ -164,6 +164,9 @@ + ret = 0; + } while (address && (address < end)); + spin_unlock(&init_mm.page_table_lock); ++#if !defined(__alpha__) && !defined(__i386__) ++ flush_tlb_all(); ++#endif + return ret; + } + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-1 new file mode 100644 index 000000000000..db05cc32aefe --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_x86-sa_interrupt-1 @@ -0,0 +1,18 @@ +--- 2.4.10pre2aa3/arch/i386/kernel/irq.c.~1~ Sat Sep 1 02:39:49 2001 ++++ 2.4.10pre2aa3/arch/i386/kernel/irq.c Sat Sep 1 02:40:17 2001 +@@ -443,10 +443,12 @@ + + status = 1; /* Force the "do bottom halves" bit */ + +- if (!(action->flags & SA_INTERRUPT)) +- __sti(); +- + do { ++ if (!(action->flags & SA_INTERRUPT)) ++ __sti(); ++ else ++ __cli(); ++ + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-1 new file mode 100644 index 000000000000..70728f71fd32 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/00_xtime-lock-1 @@ -0,0 +1,22 @@ +diff -urN 2.4.10/kernel/time.c xtime/kernel/time.c +--- 2.4.10/kernel/time.c Thu Nov 16 15:37:43 2000 ++++ xtime/kernel/time.c Sun Sep 23 22:20:29 2001 +@@ -38,7 +38,18 @@ + + static void do_normal_gettime(struct timeval * tm) + { ++ extern rwlock_t xtime_lock; ++ unsigned long flags; ++ /* ++ * Inspired by arch/.../kernel/time.c in which do_gettimeofday also ++ * locks xtime_lock, do_normal_gettime should do the same. After all ++ * access to xtime isn't atomic. -rolf 20010923 ++ */ ++ read_lock_irqsave(&xtime_lock, flags); ++ + *tm=xtime; ++ ++ read_unlock_irqrestore(&xtime_lock, flags); + } + + void (*do_get_fast_time)(struct timeval *) = do_normal_gettime; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-2 new file mode 100644 index 000000000000..5eac3e42d144 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_compiler.h-2 @@ -0,0 +1,77 @@ +diff -urN 2.4.10aa2/fs/inode.c compiler/fs/inode.c +--- 2.4.10aa2/fs/inode.c Fri Sep 28 02:45:57 2001 ++++ compiler/fs/inode.c Fri Sep 28 02:47:02 2001 +@@ -17,7 +17,6 @@ + #include + #include + #include +-#include + + /* + * New inode.c implementation. +diff -urN 2.4.10aa2/include/linux/kernel.h compiler/include/linux/kernel.h +--- 2.4.10aa2/include/linux/kernel.h Fri Sep 28 02:46:24 2001 ++++ compiler/include/linux/kernel.h Fri Sep 28 02:46:35 2001 +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + /* Optimization barrier */ + /* The "volatile" is due to gcc bugs */ +diff -urN 2.4.10aa2/mm/page_alloc.c compiler/mm/page_alloc.c +--- 2.4.10aa2/mm/page_alloc.c Fri Sep 28 02:45:59 2001 ++++ compiler/mm/page_alloc.c Fri Sep 28 02:46:56 2001 +@@ -17,7 +17,6 @@ + #include + #include + #include +-#include + #include + + int nr_swap_pages; +diff -urN 2.4.10aa2/mm/slab.c compiler/mm/slab.c +--- 2.4.10aa2/mm/slab.c Fri Sep 28 02:46:24 2001 ++++ compiler/mm/slab.c Fri Sep 28 02:46:35 2001 +@@ -72,7 +72,6 @@ + #include + #include + #include +-#include + #include + + /* +diff -urN 2.4.10aa2/mm/swapfile.c compiler/mm/swapfile.c +--- 2.4.10aa2/mm/swapfile.c Fri Sep 28 02:46:24 2001 ++++ compiler/mm/swapfile.c Fri Sep 28 02:46:35 2001 +@@ -14,7 +14,6 @@ + #include + #include + #include +-#include + + #include + +diff -urN 2.4.10aa2/mm/vmscan.c compiler/mm/vmscan.c +--- 2.4.10aa2/mm/vmscan.c Fri Sep 28 02:46:24 2001 ++++ compiler/mm/vmscan.c Fri Sep 28 02:46:35 2001 +@@ -21,7 +21,6 @@ + #include + #include + #include +-#include + + #include + +diff -urN 2.4.13pre1/mm/filemap.c o_direct/mm/filemap.c +--- 2.4.13pre1/mm/filemap.c Fri Oct 12 06:15:14 2001 ++++ o_direct/mm/filemap.c Fri Oct 12 08:02:24 2001 +@@ -23,7 +23,6 @@ + #include + #include + #include +-#include + + #include + #include diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-1 new file mode 100644 index 000000000000..e270f772b01c --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-deadlock-fix-1 @@ -0,0 +1,10 @@ +--- 2.4.13pre3aa2/drivers/md/lvm.c.~1~ Wed Oct 17 08:19:05 2001 ++++ 2.4.13pre3aa2/drivers/md/lvm.c Wed Oct 17 21:45:51 2001 +@@ -1067,7 +1067,6 @@ + P_DEV("blk_close MINOR: %d VG#: %d LV#: %d\n", + minor, VG_BLK(minor), LV_BLK(minor)); + +- sync_dev(inode->i_rdev); + if (lv_ptr->lv_open == 1) vg_ptr->lv_open--; + lv_ptr->lv_open--; + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-1 new file mode 100644 index 000000000000..4d8ab00700bb --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-incremental-1 @@ -0,0 +1,80 @@ +diff -urN lvm/drivers/md/lvm.c lvm-fix/drivers/md/lvm.c +--- lvm/drivers/md/lvm.c Thu Oct 4 18:51:34 2001 ++++ lvm-fix/drivers/md/lvm.c Thu Oct 4 18:51:48 2001 +@@ -259,9 +259,13 @@ + + #include "lvm-internal.h" + +-#define LVM_CORRECT_READ_AHEAD( a) \ +- if ( a < LVM_MIN_READ_AHEAD || \ +- a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD; ++#define LVM_CORRECT_READ_AHEAD(a) \ ++do { \ ++ if ((a) < LVM_MIN_READ_AHEAD || \ ++ (a) > LVM_MAX_READ_AHEAD) \ ++ (a) = LVM_DEFAULT_READ_AHEAD; \ ++ read_ahead[MAJOR_NR] = (a); \ ++} while(0) + + #ifndef WRITEA + # define WRITEA WRITE +@@ -414,17 +418,13 @@ + + static struct gendisk lvm_gendisk = + { +- MAJOR_NR, /* major # */ +- LVM_NAME, /* name of major */ +- 0, /* number of times minor is shifted +- to get real minor */ +- 1, /* maximum partitions per device */ +- lvm_hd_struct, /* partition table */ +- lvm_size, /* device size in blocks, copied +- to block_size[] */ +- MAX_LV, /* number or real devices */ +- NULL, /* internal */ +- NULL, /* pointer to next gendisk struct (internal) */ ++ major: MAJOR_NR, ++ major_name: LVM_NAME, ++ minor_shift: 0, ++ max_p: 1, ++ part: lvm_hd_struct, ++ sizes: lvm_size, ++ nr_real: MAX_LV, + }; + + /* +@@ -939,6 +939,11 @@ + return -EFAULT; + break; + ++ case BLKGETSIZE64: ++ if (put_user((u64)lv_ptr->lv_size << 9, (u64 *)arg)) ++ return -EFAULT; ++ break; ++ + + case BLKFLSBUF: + /* flush buffer cache */ +@@ -962,6 +967,7 @@ + (long) arg > LVM_MAX_READ_AHEAD) + return -EINVAL; + lv_ptr->lv_read_ahead = (long) arg; ++ read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead; + break; + + +diff -urN lvm/include/linux/lvm.h lvm-fix/include/linux/lvm.h +--- lvm/include/linux/lvm.h Thu Oct 4 18:51:34 2001 ++++ lvm-fix/include/linux/lvm.h Thu Oct 4 18:51:48 2001 +@@ -260,8 +260,9 @@ + #define LVM_MAX_STRIPES 128 /* max # of stripes */ + #define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */ + #define LVM_MAX_MIRRORS 2 /* future use */ +-#define LVM_MIN_READ_AHEAD 2 /* minimum read ahead sectors */ +-#define LVM_MAX_READ_AHEAD 120 /* maximum read ahead sectors */ ++#define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */ ++#define LVM_DEFAULT_READ_AHEAD 1024 /* default read ahead sectors for 512k scsi segments */ ++#define LVM_MAX_READ_AHEAD 10000 /* maximum read ahead sectors */ + #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */ + #define LVM_PARTITION 0xfe /* LVM partition id */ + #define LVM_NEW_PARTITION 0x8e /* new LVM partition id (10/09/1999) */ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-1 new file mode 100644 index 000000000000..e2175a75c4ad --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-check-1 @@ -0,0 +1,39 @@ +--- 0.21/drivers/md/lvm.c Sun, 07 Oct 2001 22:15:54 -0400 ++++ 0.21(w)/drivers/md/lvm.c Mon, 08 Oct 2001 15:54:42 -0400 +@@ -1142,7 +1142,8 @@ + + /* we must redo lvm_snapshot_remap_block in order to avoid a + race condition in the gap where no lock was held */ +- if (!lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) && ++ if (lv->lv_block_exception && ++ !lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv) && + !lvm_snapshot_COW(rdev, rsector, pe_start, rsector, vg, lv)) + lvm_write_COW_table_block(vg, lv); + +@@ -1151,11 +1152,12 @@ + + static inline void _remap_snapshot(kdev_t rdev, ulong rsector, + ulong pe_start, lv_t *lv, vg_t *vg) { +- int r; ++ int r = 0; + + /* check to see if this chunk is already in the snapshot */ + down_read(&lv->lv_lock); +- r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv); ++ if (lv->lv_block_exception) ++ r = lvm_snapshot_remap_block(&rdev, &rsector, pe_start, lv); + up_read(&lv->lv_lock); + + if (!r) +Index: 0.21/drivers/md/lvm-snap.c +--- 0.21/drivers/md/lvm-snap.c Sat, 06 Oct 2001 00:07:22 -0400 root (linux/i/c/38_lvm-snap.c 1.1.2.1.2.1 644) ++++ 0.21(w)/drivers/md/lvm-snap.c Mon, 08 Oct 2001 15:13:10 -0400 root (linux/i/c/38_lvm-snap.c 1.1.2.1.2.1 644) +@@ -140,6 +140,8 @@ + unsigned long mask = lv->lv_snapshot_hash_mask; + int chunk_size = lv->lv_chunk_size; + ++ if (!hash_table) ++ BUG() ; + hash_table = &hash_table[hashfn(org_dev, org_start, mask, chunk_size)]; + list_add(&exception->hash, hash_table); + } diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2 new file mode 100644 index 000000000000..a5f424b7a282 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_lvm-snapshot-hardsectsize-2 @@ -0,0 +1,64 @@ +--- 0.36/drivers/md/lvm-snap.c Thu, 11 Oct 2001 10:17:22 -0400 ++++ 0.36(w)/drivers/md/lvm-snap.c Thu, 11 Oct 2001 14:11:50 -0400 +@@ -326,6 +326,7 @@ + { + const char * reason; + unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off; ++ unsigned long phys_start ; + int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size; + struct kiobuf * iobuf; + unsigned long blocks[KIO_MAX_SECTORS]; +@@ -360,8 +361,14 @@ + + iobuf = lv_snap->lv_iobuf; + +- blksize_org = lvm_get_blksize(org_phys_dev); +- blksize_snap = lvm_get_blksize(snap_phys_dev); ++ blksize_org = get_hardsect_size(org_phys_dev); ++ blksize_snap = get_hardsect_size(snap_phys_dev); ++ ++ /* org_start must not change, we use it later on to fill in the ++ ** exception table ++ */ ++ phys_start = org_start ; ++ + max_blksize = max(blksize_org, blksize_snap); + min_blksize = min(blksize_org, blksize_snap); + max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); +@@ -376,7 +383,7 @@ + + iobuf->length = nr_sectors << 9; + +- if(!lvm_snapshot_prepare_blocks(blocks, org_start, ++ if(!lvm_snapshot_prepare_blocks(blocks, phys_start, + nr_sectors, blksize_org)) + goto fail_prepare; + +@@ -391,6 +398,9 @@ + if (__brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, blocks, + blksize_snap, lv_snap) != (nr_sectors<<9)) + goto fail_raw_write; ++ ++ phys_start += nr_sectors ; ++ snap_start += nr_sectors ; + } + + #ifdef DEBUG_SNAPSHOT +@@ -605,7 +615,7 @@ + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + +- blksize_snap = lvm_get_blksize(snap_phys_dev); ++ blksize_snap = get_hardsect_size(snap_phys_dev); + + COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t); + idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block; +@@ -654,7 +664,7 @@ + idx++; + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; +- blksize_snap = lvm_get_blksize(snap_phys_dev); ++ blksize_snap = get_hardsect_size(snap_phys_dev); + blocks[0] = snap_pe_start >> (blksize_snap >> 10); + } else blocks[0]++; + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-2 new file mode 100644 index 000000000000..125fcf3ad4c9 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_no-virtual-2 @@ -0,0 +1,56 @@ +diff -urN 2.4.6pre5/arch/i386/config.in novirtual/arch/i386/config.in +--- 2.4.6pre5/arch/i386/config.in Thu Jun 21 08:03:30 2001 ++++ novirtual/arch/i386/config.in Thu Jun 21 16:02:11 2001 +@@ -165,6 +165,9 @@ + define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y + fi ++if [ "$CONFIG_NOHIGHMEM" = "y" ]; then ++ define_bool CONFIG_NO_PAGE_VIRTUAL y ++fi + + bool 'Math emulation' CONFIG_MATH_EMULATION + bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR +diff -urN 2.4.6pre5/include/asm-i386/pgtable.h novirtual/include/asm-i386/pgtable.h +--- 2.4.6pre5/include/asm-i386/pgtable.h Thu Jun 14 18:07:49 2001 ++++ novirtual/include/asm-i386/pgtable.h Thu Jun 21 16:02:11 2001 +@@ -255,7 +255,11 @@ + * Permanent address of a page. Obviously must never be + * called on a highmem page. + */ ++#ifdef CONFIG_NO_PAGE_VIRTUAL ++#define page_address(page) __va((page - mem_map) << PAGE_SHIFT) ++#else + #define page_address(page) ((page)->virtual) ++#endif + #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + + /* +diff -urN 2.4.6pre5/include/linux/mm.h novirtual/include/linux/mm.h +--- 2.4.6pre5/include/linux/mm.h Thu Jun 21 08:03:56 2001 ++++ novirtual/include/linux/mm.h Thu Jun 21 16:02:33 2001 +@@ -160,8 +160,10 @@ + wait_queue_head_t wait; /* Page locked? Stand in line... */ + struct page **pprev_hash; /* Complement to *next_hash. */ + struct buffer_head * buffers; /* Buffer maps us to a disk block. */ ++#ifndef CONFIG_NO_PAGE_VIRTUAL + void *virtual; /* Kernel virtual address (NULL if + not kmapped, ie. highmem) */ ++#endif + struct zone_struct *zone; /* Memory zone we are in. */ + } mem_map_t; + +diff -urN 2.4.6pre5/mm/page_alloc.c novirtual/mm/page_alloc.c +--- 2.4.6pre5/mm/page_alloc.c Thu Jun 21 08:03:57 2001 ++++ novirtual/mm/page_alloc.c Thu Jun 21 16:02:11 2001 +@@ -851,8 +851,10 @@ + for (i = 0; i < size; i++) { + struct page *page = mem_map + offset + i; + page->zone = zone; ++#ifndef CONFIG_NO_PAGE_VIRTUAL + if (j != ZONE_HIGHMEM) + page->virtual = __va(zone_start_paddr); ++#endif + zone_start_paddr += PAGE_SIZE; + } + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13 new file mode 100644 index 000000000000..1eb40c9dc3e3 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_numa-sched-13 @@ -0,0 +1,800 @@ +diff -urN numa-sched-ref/arch/alpha/config.in numa-sched/arch/alpha/config.in +--- numa-sched-ref/arch/alpha/config.in Sun Oct 28 15:28:11 2001 ++++ numa-sched/arch/alpha/config.in Sun Oct 28 15:28:25 2001 +@@ -219,6 +219,9 @@ + bool 'Discontiguous Memory Support' CONFIG_DISCONTIGMEM + if [ "$CONFIG_DISCONTIGMEM" = "y" ]; then + bool ' NUMA Support' CONFIG_NUMA ++ if [ "$CONFIG_NUMA" = "y" ]; then ++ bool ' NUMA Scheduler Support' CONFIG_NUMA_SCHED ++ fi + fi + fi + +diff -urN numa-sched-ref/arch/alpha/kernel/entry.S numa-sched/arch/alpha/kernel/entry.S +--- numa-sched-ref/arch/alpha/kernel/entry.S Sun Oct 28 15:28:10 2001 ++++ numa-sched/arch/alpha/kernel/entry.S Sun Oct 28 15:28:25 2001 +@@ -35,7 +35,7 @@ + #define TASK_EXEC_DOMAIN 32 + #define TASK_NEED_RESCHED 40 + #define TASK_PTRACE 48 +-#define TASK_PROCESSOR 100 ++#define TASK_PROCESSOR 84 + + /* + * task flags (must match include/linux/sched.h): +diff -urN numa-sched-ref/include/asm-alpha/mmzone.h numa-sched/include/asm-alpha/mmzone.h +--- numa-sched-ref/include/asm-alpha/mmzone.h Sat May 26 04:03:47 2001 ++++ numa-sched/include/asm-alpha/mmzone.h Sun Oct 28 15:28:25 2001 +@@ -21,7 +21,7 @@ + #ifdef NOTYET + kern_vars_t kern_vars; + #endif +-#if defined(CONFIG_NUMA) && defined(CONFIG_NUMA_SCHED) ++#ifdef CONFIG_NUMA_SCHED + struct numa_schedule_data schedule_data; + #endif + } plat_pg_data_t; +diff -urN numa-sched-ref/include/asm-alpha/timex.h numa-sched/include/asm-alpha/timex.h +--- numa-sched-ref/include/asm-alpha/timex.h Tue Dec 29 22:56:15 1998 ++++ numa-sched/include/asm-alpha/timex.h Sun Oct 28 15:28:25 2001 +@@ -27,4 +27,8 @@ + return ret; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-arm/timex.h numa-sched/include/asm-arm/timex.h +--- numa-sched-ref/include/asm-arm/timex.h Thu Nov 16 15:37:33 2000 ++++ numa-sched/include/asm-arm/timex.h Sun Oct 28 15:28:25 2001 +@@ -23,4 +23,8 @@ + return 0; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-cris/timex.h numa-sched/include/asm-cris/timex.h +--- numa-sched-ref/include/asm-cris/timex.h Sat May 26 04:03:47 2001 ++++ numa-sched/include/asm-cris/timex.h Sun Oct 28 15:28:25 2001 +@@ -20,4 +20,8 @@ + return 0; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-i386/timex.h numa-sched/include/asm-i386/timex.h +--- numa-sched-ref/include/asm-i386/timex.h Sun Oct 28 15:04:11 2001 ++++ numa-sched/include/asm-i386/timex.h Sun Oct 28 15:44:38 2001 +@@ -47,4 +47,8 @@ + + extern unsigned long cpu_khz; + ++typedef cycles_t last_schedule_t; ++#define get_last_schedule() ({ get_cycles(); }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-ia64/timex.h numa-sched/include/asm-ia64/timex.h +--- numa-sched-ref/include/asm-ia64/timex.h Tue May 1 19:35:31 2001 ++++ numa-sched/include/asm-ia64/timex.h Sun Oct 28 15:28:25 2001 +@@ -21,4 +21,8 @@ + return ret; + } + ++typedef cycles_t last_schedule_t; ++#define get_last_schedule() ({ get_cycles(); }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif /* _ASM_IA64_TIMEX_H */ +diff -urN numa-sched-ref/include/asm-m68k/timex.h numa-sched/include/asm-m68k/timex.h +--- numa-sched-ref/include/asm-m68k/timex.h Tue Jan 5 20:20:43 1999 ++++ numa-sched/include/asm-m68k/timex.h Sun Oct 28 15:28:25 2001 +@@ -19,4 +19,8 @@ + return 0; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-mips/timex.h numa-sched/include/asm-mips/timex.h +--- numa-sched-ref/include/asm-mips/timex.h Sat May 13 17:31:25 2000 ++++ numa-sched/include/asm-mips/timex.h Sun Oct 28 15:28:25 2001 +@@ -36,6 +36,11 @@ + { + return read_32bit_cp0_register(CP0_COUNT); + } ++ ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif /* __KERNEL__ */ + + #endif /* __ASM_MIPS_TIMEX_H */ +diff -urN numa-sched-ref/include/asm-mips64/timex.h numa-sched/include/asm-mips64/timex.h +--- numa-sched-ref/include/asm-mips64/timex.h Sun Sep 23 21:11:41 2001 ++++ numa-sched/include/asm-mips64/timex.h Sun Oct 28 15:28:25 2001 +@@ -43,4 +43,8 @@ + return val; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif /* _ASM_TIMEX_H */ +diff -urN numa-sched-ref/include/asm-parisc/timex.h numa-sched/include/asm-parisc/timex.h +--- numa-sched-ref/include/asm-parisc/timex.h Thu Dec 14 22:34:13 2000 ++++ numa-sched/include/asm-parisc/timex.h Sun Oct 28 15:28:25 2001 +@@ -18,4 +18,8 @@ + return mfctl(16); + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-ppc/timex.h numa-sched/include/asm-ppc/timex.h +--- numa-sched-ref/include/asm-ppc/timex.h Sun Sep 23 21:11:41 2001 ++++ numa-sched/include/asm-ppc/timex.h Sun Oct 28 15:28:25 2001 +@@ -45,5 +45,9 @@ + return ret; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif + #endif /* __KERNEL__ */ +diff -urN numa-sched-ref/include/asm-s390/timex.h numa-sched/include/asm-s390/timex.h +--- numa-sched-ref/include/asm-s390/timex.h Fri May 12 20:41:44 2000 ++++ numa-sched/include/asm-s390/timex.h Sun Oct 28 15:28:25 2001 +@@ -26,4 +26,8 @@ + return 0; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-s390x/timex.h numa-sched/include/asm-s390x/timex.h +--- numa-sched-ref/include/asm-s390x/timex.h Thu Feb 22 03:45:11 2001 ++++ numa-sched/include/asm-s390x/timex.h Sun Oct 28 15:28:25 2001 +@@ -26,4 +26,8 @@ + return 0; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-sh/timex.h numa-sched/include/asm-sh/timex.h +--- numa-sched-ref/include/asm-sh/timex.h Fri Jan 5 02:19:29 2001 ++++ numa-sched/include/asm-sh/timex.h Sun Oct 28 15:28:25 2001 +@@ -21,4 +21,8 @@ + return 0; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif /* __ASM_SH_TIMEX_H */ +diff -urN numa-sched-ref/include/asm-sparc/timex.h numa-sched/include/asm-sparc/timex.h +--- numa-sched-ref/include/asm-sparc/timex.h Thu Mar 11 01:53:37 1999 ++++ numa-sched/include/asm-sparc/timex.h Sun Oct 28 15:28:25 2001 +@@ -17,4 +17,8 @@ + extern cycles_t cacheflush_time; + #define get_cycles() (0) + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/asm-sparc64/timex.h numa-sched/include/asm-sparc64/timex.h +--- numa-sched-ref/include/asm-sparc64/timex.h Sun Sep 23 21:11:42 2001 ++++ numa-sched/include/asm-sparc64/timex.h Sun Oct 28 15:28:25 2001 +@@ -20,4 +20,8 @@ + ret; \ + }) + ++typedef cycles_t last_schedule_t; ++#define get_last_schedule() ({ get_cycles(); }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif +diff -urN numa-sched-ref/include/linux/numa_sched.h numa-sched/include/linux/numa_sched.h +--- numa-sched-ref/include/linux/numa_sched.h Thu Jan 1 01:00:00 1970 ++++ numa-sched/include/linux/numa_sched.h Sun Oct 28 15:59:37 2001 +@@ -0,0 +1,67 @@ ++/* ++ * linux/include/linux/numa_sched.h ++ * ++ * NUMA based scheduler ++ */ ++ ++#ifndef _LINUX_NUMA_SCHED_H ++#define _LINUX_NUMA_SCHED_H ++ ++#ifdef CONFIG_NUMA_SCHED ++#include ++#include ++#include ++#include ++ ++struct numa_per_cpu_schedule_data { ++ struct task_struct * curr; ++ last_schedule_t last_schedule; ++ long quiescent; ++}; ++ ++struct numa_schedule_data { ++ struct numa_per_cpu_schedule_data per_cpu[NR_CPUS] ____cacheline_aligned; ++ struct list_head runqueue_head; ++ int nr_running, nr_threads; ++}; ++ ++#include ++ ++#define numa_nr_running_inc() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_running++; } while(0) ++#define numa_nr_running_dec() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_running--; } while(0) ++#define numa_nr_running(nid) (NODE_SCHEDULE_DATA(nid)->nr_running) ++ ++#define numa_nr_threads_inc() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_threads++; } while(0) ++#define numa_nr_threads_dec() do { NODE_SCHEDULE_DATA(numa_node_id())->nr_threads--; } while(0) ++#define numa_nr_threads(nid) (NODE_SCHEDULE_DATA(nid)->nr_threads) ++ ++#define cpu_curr(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].curr) ++#define last_schedule(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].last_schedule) ++#define RCU_quiescent(cpu) (NODE_SCHEDULE_DATA(cputonode(cpu))->per_cpu[(cpu)].quiescent) ++ ++#define numa_runqueue_head(x) (&NODE_SCHEDULE_DATA(x)->runqueue_head) ++ ++#else /* CONFIG_NUMA_SCHED */ ++ ++#define numa_nr_running_inc() do { } while(0) ++#define numa_nr_running_dec() do { } while(0) ++#define numa_nr_threads_inc() do { } while(0) ++#define numa_nr_threads_dec() do { } while(0) ++ ++/* per-cpu schedule data */ ++typedef struct schedule_data_s { ++ struct task_struct * curr; ++ last_schedule_t last_schedule; ++ long quiescent; ++} schedule_data_t ____cacheline_aligned; ++ ++extern schedule_data_t schedule_data[NR_CPUS]; ++ ++#define cpu_curr(cpu) (schedule_data[(cpu)].curr) ++#define last_schedule(cpu) (schedule_data[(cpu)].last_schedule) ++#define RCU_quiescent(cpu) (schedule_data[(cpu)].quiescent) ++ ++#define numa_runqueue_head(x) (&runqueue_head) ++#endif /* CONFIG_NUMA_SCHED */ ++ ++#endif /* __ALPHA_NUMA_SCHED_H */ +diff -urN numa-sched-ref/include/linux/sched.h numa-sched/include/linux/sched.h +--- numa-sched-ref/include/linux/sched.h Sun Oct 28 15:28:11 2001 ++++ numa-sched/include/linux/sched.h Sun Oct 28 15:44:39 2001 +@@ -27,6 +27,7 @@ + #include + #include + #include ++#include + + struct exec_domain; + +@@ -302,9 +303,9 @@ + * all fields in a single cacheline that are needed for + * the goodness() loop in schedule(). + */ +- long counter; +- long nice; +- unsigned long policy; ++ int counter; ++ int nice; ++ unsigned int policy; + struct mm_struct *mm; + int has_cpu, processor; + unsigned long cpus_allowed; +@@ -313,8 +314,9 @@ + * that's just fine.) + */ + struct list_head run_list; +- unsigned long sleep_time; +- ++#ifdef CONFIG_NUMA_SCHED ++ int nid; ++#endif + struct task_struct *next_task, *prev_task; + struct mm_struct *active_mm; + struct rw_sem_recursor mm_recursor; +@@ -464,7 +466,7 @@ + mm: NULL, \ + active_mm: &init_mm, \ + mm_recursor: RWSEM_RECURSOR_INITIALIZER, \ +- cpus_allowed: -1, \ ++ cpus_allowed: -1UL, \ + run_list: LIST_HEAD_INIT(tsk.run_list), \ + next_task: &tsk, \ + prev_task: &tsk, \ +@@ -552,18 +554,6 @@ + extern volatile struct timeval xtime; + extern void do_timer(struct pt_regs *); + +-/* per-cpu schedule data */ +-typedef struct schedule_data_s { +- struct task_struct * curr; +- cycles_t last_schedule; +- long quiescent; +-} schedule_data_t ____cacheline_aligned; +- +-extern schedule_data_t schedule_data[NR_CPUS]; +-#define cpu_curr(cpu) (schedule_data[(cpu)].curr) +-#define last_schedule(cpu) (schedule_data[(cpu)].last_schedule) +-#define RCU_quiescent(cpu) (schedule_data[(cpu)].quiescent) +- + extern unsigned int * prof_buffer; + extern unsigned long prof_len; + extern unsigned long prof_shift; +@@ -781,6 +771,30 @@ + extern void FASTCALL(add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)); + extern void FASTCALL(remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)); + ++#define nr_running_inc() \ ++do { \ ++ numa_nr_running_inc(); \ ++ nr_running++; \ ++} while (0) ++ ++#define nr_running_dec() \ ++do { \ ++ numa_nr_running_dec(); \ ++ nr_running--; \ ++} while (0) ++ ++#define nr_threads_inc() \ ++do { \ ++ numa_nr_threads_inc(); \ ++ nr_threads++; \ ++} while (0) ++ ++#define nr_threads_dec() \ ++do { \ ++ numa_nr_threads_dec(); \ ++ nr_threads--; \ ++} while (0) ++ + #define __wait_event(wq, condition) \ + do { \ + wait_queue_t __wait; \ +@@ -861,29 +875,28 @@ + #define next_thread(p) \ + list_entry((p)->thread_group.next, struct task_struct, thread_group) + +-static inline void del_from_runqueue(struct task_struct * p) +-{ +- nr_running--; +- p->sleep_time = jiffies; +- list_del(&p->run_list); +- p->run_list.next = NULL; +-} ++#define del_from_runqueue(p) \ ++do { \ ++ nr_running_dec(); \ ++ list_del(&(p)->run_list); \ ++ (p)->run_list.next = NULL; \ ++} while(0) + + static inline int task_on_runqueue(struct task_struct *p) + { + return (p->run_list.next != NULL); + } + +-static inline void unhash_process(struct task_struct *p) +-{ +- if (task_on_runqueue(p)) BUG(); +- write_lock_irq(&tasklist_lock); +- nr_threads--; +- unhash_pid(p); +- REMOVE_LINKS(p); +- list_del(&p->thread_group); +- write_unlock_irq(&tasklist_lock); +-} ++#define unhash_process(p) \ ++do { \ ++ if (task_on_runqueue(p)) BUG(); \ ++ write_lock_irq(&tasklist_lock); \ ++ nr_threads_dec(); \ ++ unhash_pid(p); \ ++ REMOVE_LINKS(p); \ ++ list_del(&(p)->thread_group); \ ++ write_unlock_irq(&tasklist_lock); \ ++} while(0) + + /* Protects ->fs, ->files, ->mm, and synchronises with wait4(). Nests inside tasklist_lock */ + static inline void task_lock(struct task_struct *p) +diff -urN numa-sched-ref/kernel/fork.c numa-sched/kernel/fork.c +--- numa-sched-ref/kernel/fork.c Sun Oct 28 15:28:10 2001 ++++ numa-sched/kernel/fork.c Sun Oct 28 15:28:25 2001 +@@ -635,7 +635,6 @@ + { + int i; + p->has_cpu = 0; +- p->processor = current->processor; + /* ?? should we just memset this ?? */ + for(i = 0; i < smp_num_cpus; i++) + p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0; +@@ -712,7 +711,7 @@ + + SET_LINKS(p); + hash_pid(p); +- nr_threads++; ++ nr_threads_inc(); + write_unlock_irq(&tasklist_lock); + + if (p->ptrace & PT_PTRACED) +diff -urN numa-sched-ref/kernel/sched.c numa-sched/kernel/sched.c +--- numa-sched-ref/kernel/sched.c Sun Oct 28 15:28:11 2001 ++++ numa-sched/kernel/sched.c Sun Oct 28 15:38:10 2001 +@@ -10,6 +10,7 @@ + * 1998-11-19 Implemented schedule_timeout() and related stuff + * by Andrea Arcangeli + * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar ++ * 2001-01-29 first NUMA scheduler attempt by Andrea Arcangeli, SuSE + */ + + /* +@@ -92,6 +93,8 @@ + spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ + rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ + ++#ifndef CONFIG_NUMA_SCHED ++ + static LIST_HEAD(runqueue_head); + + /* +@@ -100,14 +103,31 @@ + */ + schedule_data_t schedule_data[NR_CPUS] __cacheline_aligned = {{&init_task,0}}; + ++#define init_numa_schedule_data() do { } while(0) ++ ++#else /* CONFIG_NUMA_SCHED */ ++ ++static void __init init_numa_schedule_data(void) ++{ ++ int i; ++ ++ for (i = 0; i < numnodes; i++) { ++ INIT_LIST_HEAD(&NODE_SCHEDULE_DATA(i)->runqueue_head); ++ NODE_SCHEDULE_DATA(i)->nr_running = 0; ++ NODE_SCHEDULE_DATA(i)->nr_threads = 0; ++ } ++} ++#endif /* CONFIG_NUMA_SCHED */ ++ + struct kernel_stat kstat; + extern struct task_struct *child_reaper; + + #ifdef CONFIG_SMP + + #define idle_task(cpu) (init_tasks[cpu_number_map(cpu)]) ++#define logical_idle_task(cpu) (init_tasks[cpu]) + #define can_schedule(p,cpu) ((!(p)->has_cpu) && \ +- ((p)->cpus_allowed & (1 << cpu))) ++ ((p)->cpus_allowed & (1UL << cpu))) + + #else + +@@ -205,8 +225,8 @@ + #ifdef CONFIG_SMP + int this_cpu = smp_processor_id(); + struct task_struct *tsk, *target_tsk; +- int cpu, best_cpu, i, max_prio; +- cycles_t oldest_idle; ++ int cpu, best_cpu, i, max_prio, found_idle; ++ last_schedule_t oldest_idle; + + /* + * shortcut if the woken up task's last CPU is +@@ -214,17 +234,17 @@ + */ + best_cpu = p->processor; + if (can_schedule(p, best_cpu)) { +- tsk = idle_task(best_cpu); +- if (cpu_curr(best_cpu) == tsk) { +- int need_resched; ++ target_tsk = idle_task(best_cpu); ++ if (cpu_curr(best_cpu) == target_tsk) { ++ long need_resched; + send_now_idle: + /* + * If need_resched == -1 then we can skip sending + * the IPI altogether, tsk->need_resched is + * actively watched by the idle thread. + */ +- need_resched = tsk->need_resched; +- tsk->need_resched = 1; ++ need_resched = target_tsk->need_resched; ++ target_tsk->need_resched = 1; + if ((best_cpu != this_cpu) && !need_resched) + smp_send_reschedule(best_cpu); + return; +@@ -238,13 +258,17 @@ + * one will have the least active cache context.) Also find + * the executing process which has the least priority. + */ +- oldest_idle = (cycles_t) -1; + target_tsk = NULL; + max_prio = 0; ++ found_idle = 0; + + for (i = 0; i < smp_num_cpus; i++) { + cpu = cpu_logical_map(i); +- if (!can_schedule(p, cpu)) ++ if ( ++#ifdef CONFIG_NUMA_SCHED ++ cputonode(cpu) != p->nid || ++#endif ++ !can_schedule(p, cpu)) + continue; + tsk = cpu_curr(cpu); + /* +@@ -252,13 +276,14 @@ + * a priority list between idle CPUs, but this is not + * a problem. + */ +- if (tsk == idle_task(cpu)) { +- if (last_schedule(cpu) < oldest_idle) { ++ if (tsk == logical_idle_task(i)) { ++ if (!found_idle || last_schedule_before(last_schedule(cpu), oldest_idle)) { + oldest_idle = last_schedule(cpu); + target_tsk = tsk; ++ found_idle = 1; + } + } else { +- if (oldest_idle == -1ULL) { ++ if (!found_idle) { + int prio = preemption_goodness(tsk, p, cpu); + + if (prio > max_prio) { +@@ -268,15 +293,33 @@ + } + } + } +- tsk = target_tsk; +- if (tsk) { +- if (oldest_idle != -1ULL) { +- best_cpu = tsk->processor; +- goto send_now_idle; ++ ++#ifdef CONFIG_NUMA_SCHED ++ if (!target_tsk) ++ /* Make sure to use the idle cpus in the other nodes */ ++ for (i = 0; i < smp_num_cpus; i++) { ++ cpu = cpu_logical_map(i); ++ if (cputonode(cpu) == p->nid || !can_schedule(p, cpu)) ++ continue; ++ tsk = cpu_curr(cpu); ++ if (tsk == logical_idle_task(i)) { ++ if (!found_idle || last_schedule_before(last_schedule(cpu), oldest_idle)) { ++ oldest_idle = last_schedule(cpu); ++ target_tsk = tsk; ++ found_idle = 1; ++ target_tsk->nid = cputonode(cpu); ++ } ++ } + } +- tsk->need_resched = 1; +- if (tsk->processor != this_cpu) +- smp_send_reschedule(tsk->processor); ++#endif ++ ++ if (target_tsk) { ++ best_cpu = target_tsk->processor; ++ if (found_idle) ++ goto send_now_idle; ++ target_tsk->need_resched = 1; ++ if (best_cpu != this_cpu) ++ smp_send_reschedule(best_cpu); + } + return; + +@@ -300,20 +343,20 @@ + */ + static inline void add_to_runqueue(struct task_struct * p) + { +- list_add(&p->run_list, &runqueue_head); +- nr_running++; ++ list_add(&p->run_list, numa_runqueue_head(p->nid)); ++ nr_running_inc(); + } + + static inline void move_last_runqueue(struct task_struct * p) + { + list_del(&p->run_list); +- list_add_tail(&p->run_list, &runqueue_head); ++ list_add_tail(&p->run_list, numa_runqueue_head(p->nid)); + } + + static inline void move_first_runqueue(struct task_struct * p) + { + list_del(&p->run_list); +- list_add(&p->run_list, &runqueue_head); ++ list_add(&p->run_list, numa_runqueue_head(p->nid)); + } + + /* +@@ -336,9 +379,9 @@ + p->state = TASK_RUNNING; + if (task_on_runqueue(p)) + goto out; +- add_to_runqueue(p); + if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id()))) + reschedule_idle(p); ++ add_to_runqueue(p); + success = 1; + out: + spin_unlock_irqrestore(&runqueue_lock, flags); +@@ -524,10 +567,12 @@ + */ + asmlinkage void schedule(void) + { +- schedule_data_t * sched_data; + struct task_struct *prev, *next, *p; + struct list_head *tmp; + int this_cpu, c; ++#ifdef CONFIG_NUMA_SCHED ++ int recalculate_all; ++#endif + + + spin_lock_prefetch(&runqueue_lock); +@@ -542,12 +587,6 @@ + + release_kernel_lock(prev, this_cpu); + +- /* +- * 'sched_data' is protected by the fact that we can run +- * only one process per CPU. +- */ +- sched_data = &schedule_data[this_cpu]; +- + spin_lock_irq(&runqueue_lock); + + /* move an exhausted RR process to be last.. */ +@@ -581,7 +620,7 @@ + goto still_running; + + still_running_back: +- list_for_each(tmp, &runqueue_head) { ++ list_for_each(tmp, numa_runqueue_head(numa_node_id())) { + p = list_entry(tmp, struct task_struct, run_list); + if (can_schedule(p, this_cpu)) { + int weight = goodness(p, this_cpu, prev->active_mm); +@@ -590,6 +629,27 @@ + } + } + ++#ifdef CONFIG_NUMA_SCHED ++ recalculate_all = 0; ++ if (c < 0) { ++ int nid; ++ ++ recalculate_all = 1; ++ for (nid = 0; nid < numnodes; nid++) { ++ if (nid == numa_node_id()) ++ continue; ++ list_for_each(tmp, numa_runqueue_head(nid)) { ++ p = list_entry(tmp, struct task_struct, run_list); ++ if (can_schedule(p, this_cpu)) { ++ int weight = goodness(p, this_cpu, prev->active_mm); ++ if (weight > c) ++ c = weight, next = p; ++ } ++ } ++ } ++ } ++#endif ++ + /* Do we need to re-calculate counters? */ + if (!c) + goto recalculate; +@@ -598,12 +658,18 @@ + * switching to the next task, save this fact in + * sched_data. + */ +- sched_data->curr = next; ++ cpu_curr(this_cpu) = next; + #ifdef CONFIG_SMP + RCU_quiescent(this_cpu)++; + + next->has_cpu = 1; + next->processor = this_cpu; ++#ifdef CONFIG_NUMA_SCHED ++ if (next != idle_task(this_cpu) && next->nid != numa_node_id()) { ++ next->nid = numa_node_id(); ++ move_last_runqueue(next); ++ } ++#endif + #endif + spin_unlock_irq(&runqueue_lock); + +@@ -621,7 +687,7 @@ + * and it's approximate, so we do not have to maintain + * it while holding the runqueue spinlock. + */ +- sched_data->last_schedule = get_cycles(); ++ last_schedule(this_cpu) = get_last_schedule(); + + /* + * We drop the scheduler lock early (it's a global spinlock), +@@ -680,8 +746,13 @@ + struct task_struct *p; + spin_unlock_irq(&runqueue_lock); + read_lock(&tasklist_lock); +- for_each_task(p) ++ for_each_task(p) { ++#ifdef CONFIG_NUMA_SCHED ++ if (!recalculate_all && p->nid != numa_node_id()) ++ continue; ++#endif + p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); ++ } + read_unlock(&tasklist_lock); + spin_lock_irq(&runqueue_lock); + } +@@ -1062,7 +1133,7 @@ + // Subtract non-idle processes running on other CPUs. + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); +- if (cpu_curr(cpu) != idle_task(cpu)) ++ if (cpu_curr(cpu) != logical_idle_task(i)) + nr_pending--; + } + #else +@@ -1319,16 +1390,15 @@ + + void __init init_idle(void) + { +- schedule_data_t * sched_data; +- sched_data = &schedule_data[smp_processor_id()]; ++ int cpu = smp_processor_id(); + + if (current != &init_task && task_on_runqueue(current)) { + printk("UGH! (%d:%d) was on the runqueue, removing.\n", + smp_processor_id(), current->pid); + del_from_runqueue(current); + } +- sched_data->curr = current; +- sched_data->last_schedule = get_cycles(); ++ cpu_curr(cpu) = current; ++ last_schedule(cpu) = get_last_schedule(); + clear_bit(current->processor, &wait_init_idle); + } + +@@ -1359,4 +1429,6 @@ + */ + atomic_inc(&init_mm.mm_count); + enter_lazy_tlb(&init_mm, current, cpu); ++ ++ init_numa_schedule_data(); + } diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-8 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-8 new file mode 100644 index 000000000000..3cafb1c56042 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_parent-timeslice-8 @@ -0,0 +1,61 @@ +diff -urN parent-timeslice-ref/include/linux/sched.h parent-timeslice/include/linux/sched.h +--- parent-timeslice-ref/include/linux/sched.h Wed Oct 24 13:18:54 2001 ++++ parent-timeslice/include/linux/sched.h Wed Oct 24 13:19:00 2001 +@@ -317,6 +317,7 @@ + #ifdef CONFIG_NUMA_SCHED + int nid; + #endif ++ int get_child_timeslice; + struct task_struct *next_task, *prev_task; + struct mm_struct *active_mm; + struct rw_sem_recursor mm_recursor; +diff -urN parent-timeslice-ref/kernel/exit.c parent-timeslice/kernel/exit.c +--- parent-timeslice-ref/kernel/exit.c Wed Oct 24 08:04:27 2001 ++++ parent-timeslice/kernel/exit.c Wed Oct 24 13:19:35 2001 +@@ -61,9 +61,11 @@ + * timeslices, because any timeslice recovered here + * was given away by the parent in the first place.) + */ +- current->counter += p->counter; +- if (current->counter >= MAX_COUNTER) +- current->counter = MAX_COUNTER; ++ if (p->get_child_timeslice) { ++ current->counter += p->counter; ++ if (current->counter >= MAX_COUNTER) ++ current->counter = MAX_COUNTER; ++ } + p->pid = 0; + free_task_struct(p); + } else { +@@ -164,6 +166,7 @@ + p->exit_signal = SIGCHLD; + p->self_exec_id++; + p->p_opptr = child_reaper; ++ p->get_child_timeslice = 0; + if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0); + } + } +diff -urN parent-timeslice-ref/kernel/fork.c parent-timeslice/kernel/fork.c +--- parent-timeslice-ref/kernel/fork.c Wed Oct 24 13:18:54 2001 ++++ parent-timeslice/kernel/fork.c Wed Oct 24 13:19:00 2001 +@@ -682,6 +682,9 @@ + if (!current->counter) + current->need_resched = 1; + ++ /* Tell the parent if it can get back its timeslice when child exits */ ++ p->get_child_timeslice = 1; ++ + /* + * Ok, add it to the run-queues and make it + * visible to the rest of the system. +diff -urN parent-timeslice-ref/kernel/sched.c parent-timeslice/kernel/sched.c +--- parent-timeslice-ref/kernel/sched.c Wed Oct 24 13:18:54 2001 ++++ parent-timeslice/kernel/sched.c Wed Oct 24 13:19:00 2001 +@@ -758,6 +758,7 @@ + continue; + #endif + p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); ++ p->get_child_timeslice = 0; + } + read_unlock(&tasklist_lock); + spin_lock_irq(&runqueue_lock); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-13 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-13 new file mode 100644 index 000000000000..40da91b65304 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/10_vm-13 @@ -0,0 +1,1595 @@ +diff -urN vm-ref/arch/i386/config.in vm/arch/i386/config.in +--- vm-ref/arch/i386/config.in Fri Nov 9 08:29:24 2001 ++++ vm/arch/i386/config.in Fri Nov 9 08:29:33 2001 +@@ -404,6 +404,7 @@ + bool ' Magic SysRq key' CONFIG_MAGIC_SYSRQ + bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE ++ bool ' Debug allocation faliures' CONFIG_DEBUG_GFP + fi + + endmenu +diff -urN vm-ref/arch/sparc/kernel/sys_sunos.c vm/arch/sparc/kernel/sys_sunos.c +--- vm-ref/arch/sparc/kernel/sys_sunos.c Thu Aug 16 22:03:25 2001 ++++ vm/arch/sparc/kernel/sys_sunos.c Fri Nov 9 08:29:33 2001 +@@ -193,7 +193,7 @@ + * fool it, but this should catch most mistakes. + */ + freepages = atomic_read(&buffermem_pages) >> PAGE_SHIFT; +- freepages += atomic_read(&page_cache_size); ++ freepages += page_cache_size; + freepages >>= 1; + freepages += nr_free_pages(); + freepages += nr_swap_pages; +diff -urN vm-ref/arch/sparc64/kernel/sys_sunos32.c vm/arch/sparc64/kernel/sys_sunos32.c +--- vm-ref/arch/sparc64/kernel/sys_sunos32.c Thu Aug 16 22:03:26 2001 ++++ vm/arch/sparc64/kernel/sys_sunos32.c Fri Nov 9 08:29:33 2001 +@@ -157,7 +157,7 @@ + * fool it, but this should catch most mistakes. + */ + freepages = atomic_read(&buffermem_pages) >> PAGE_SHIFT; +- freepages += atomic_read(&page_cache_size); ++ freepages += page_cache_size; + freepages >>= 1; + freepages += nr_free_pages(); + freepages += nr_swap_pages; +diff -urN vm-ref/fs/buffer.c vm/fs/buffer.c +--- vm-ref/fs/buffer.c Fri Nov 9 08:29:25 2001 ++++ vm/fs/buffer.c Fri Nov 9 08:29:33 2001 +@@ -115,7 +115,7 @@ + int dummy5; /* unused */ + } b_un; + unsigned int data[N_PARAM]; +-} bdf_prm = {{40, 0, 0, 0, 5*HZ, 30*HZ, 60, 0, 0}}; ++} bdf_prm = {{20, 0, 0, 0, 5*HZ, 30*HZ, 40, 0, 0}}; + + /* These are the min and max parameter values that we will allow to be assigned */ + int bdflush_min[N_PARAM] = { 0, 10, 5, 25, 0, 1*HZ, 0, 0, 0}; +@@ -124,7 +124,6 @@ + void unlock_buffer(struct buffer_head *bh) + { + clear_bit(BH_Wait_IO, &bh->b_state); +- clear_bit(BH_launder, &bh->b_state); + clear_bit(BH_Lock, &bh->b_state); + smp_mb__after_clear_bit(); + if (waitqueue_active(&bh->b_wait)) +@@ -179,6 +178,7 @@ + do { + struct buffer_head * bh = *array++; + bh->b_end_io = end_buffer_io_sync; ++ clear_bit(BH_Pending_IO, &bh->b_state); + submit_bh(WRITE, bh); + } while (--count); + } +@@ -211,6 +211,7 @@ + if (atomic_set_buffer_clean(bh)) { + __refile_buffer(bh); + get_bh(bh); ++ set_bit(BH_Pending_IO, &bh->b_state); + array[count++] = bh; + if (count < NRSYNC) + continue; +@@ -238,7 +239,6 @@ + conditional_schedule(); + spin_lock(&lru_list_lock); + } while (write_some_buffers(dev)); +- run_task_queue(&tq_disk); + } + + /* +@@ -710,12 +710,8 @@ + + static void free_more_memory(void) + { +- zone_t * zone = contig_page_data.node_zonelists[GFP_NOFS & GFP_ZONEMASK].zones[0]; +- +- balance_dirty(); + wakeup_bdflush(); +- try_to_free_pages(zone, GFP_NOFS, 0); +- run_task_queue(&tq_disk); ++ try_to_free_pages_nozone(GFP_NOIO); + current->policy |= SCHED_YIELD; + __set_current_state(TASK_RUNNING); + schedule(); +@@ -1057,19 +1053,17 @@ + if (state < 0) + return; + +- /* If we're getting into imbalance, start write-out */ +- spin_lock(&lru_list_lock); +- write_some_buffers(NODEV); ++ wakeup_bdflush(); + + /* + * And if we're _really_ out of balance, wait for +- * some of the dirty/locked buffers ourselves and +- * start bdflush. ++ * some of the dirty/locked buffers ourselves. + * This will throttle heavy writers. + */ + if (state > 0) { ++ spin_lock(&lru_list_lock); ++ write_some_buffers(NODEV); + wait_for_some_buffers(NODEV); +- wakeup_bdflush(); + } + } + +@@ -2376,23 +2370,28 @@ + return 1; + } + +-static int sync_page_buffers(struct buffer_head *head, unsigned int gfp_mask) ++static int sync_page_buffers(struct buffer_head *head) + { + struct buffer_head * bh = head; +- int tryagain = 0; ++ int tryagain = 1; + + do { + if (!buffer_dirty(bh) && !buffer_locked(bh)) + continue; + ++ if (unlikely(buffer_pending_IO(bh))) { ++ tryagain = 0; ++ continue; ++ } ++ + /* Don't start IO first time around.. */ +- if (!test_and_set_bit(BH_Wait_IO, &bh->b_state)) ++ if (!test_and_set_bit(BH_Wait_IO, &bh->b_state)) { ++ tryagain = 0; + continue; ++ } + + /* Second time through we start actively writing out.. */ + if (test_and_set_bit(BH_Lock, &bh->b_state)) { +- if (!test_bit(BH_launder, &bh->b_state)) +- continue; + wait_on_buffer(bh); + tryagain = 1; + continue; +@@ -2405,7 +2404,6 @@ + + __mark_buffer_clean(bh); + get_bh(bh); +- set_bit(BH_launder, &bh->b_state); + bh->b_end_io = end_buffer_io_sync; + submit_bh(WRITE, bh); + tryagain = 0; +@@ -2479,7 +2477,7 @@ + spin_unlock(&lru_list_lock); + if (gfp_mask & __GFP_IO) { + if ((gfp_mask & __GFP_HIGHIO) || !PageHighMem(page)) { +- if (sync_page_buffers(bh, gfp_mask)) { ++ if (sync_page_buffers(bh)) { + /* no IO or waiting next time */ + gfp_mask = 0; + goto cleaned_buffers_try_again; +@@ -2730,7 +2728,7 @@ + + spin_lock(&lru_list_lock); + if (!write_some_buffers(NODEV) || balance_dirty_state() < 0) { +- wait_for_some_buffers(NODEV); ++ run_task_queue(&tq_disk); + interruptible_sleep_on(&bdflush_wait); + } + } +@@ -2761,8 +2759,6 @@ + complete((struct completion *)startup); + + for (;;) { +- wait_for_some_buffers(NODEV); +- + /* update interval */ + interval = bdf_prm.b_un.interval; + if (interval) { +@@ -2790,6 +2786,7 @@ + printk(KERN_DEBUG "kupdate() activated...\n"); + #endif + sync_old_buffers(); ++ run_task_queue(&tq_disk); + } + } + +diff -urN vm-ref/fs/proc/proc_misc.c vm/fs/proc/proc_misc.c +--- vm-ref/fs/proc/proc_misc.c Wed Oct 24 08:04:23 2001 ++++ vm/fs/proc/proc_misc.c Fri Nov 9 08:29:33 2001 +@@ -149,7 +149,7 @@ + #define B(x) ((unsigned long long)(x) << PAGE_SHIFT) + si_meminfo(&i); + si_swapinfo(&i); +- pg_size = atomic_read(&page_cache_size) - i.bufferram ; ++ pg_size = page_cache_size - i.bufferram; + + len = sprintf(page, " total: used: free: shared: buffers: cached:\n" + "Mem: %8Lu %8Lu %8Lu %8Lu %8Lu %8Lu\n" +diff -urN vm-ref/include/linux/fs.h vm/include/linux/fs.h +--- vm-ref/include/linux/fs.h Fri Nov 9 08:29:24 2001 ++++ vm/include/linux/fs.h Fri Nov 9 08:29:33 2001 +@@ -215,7 +215,7 @@ + BH_New, /* 1 if the buffer is new and not yet written out */ + BH_Async, /* 1 if the buffer is under end_buffer_io_async I/O */ + BH_Wait_IO, /* 1 if we should write out this buffer */ +- BH_launder, /* 1 if we should throttle on this buffer */ ++ BH_Pending_IO, /* 1 if the buffer is locked but not in the I/O queue yet */ + + BH_PrivateStart,/* not a state bit, but the first bit available + * for private allocation by other entities +@@ -276,6 +276,7 @@ + #define buffer_mapped(bh) __buffer_state(bh,Mapped) + #define buffer_new(bh) __buffer_state(bh,New) + #define buffer_async(bh) __buffer_state(bh,Async) ++#define buffer_pending_IO(bh) __buffer_state(bh,Pending_IO) + + #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) + +diff -urN vm-ref/include/linux/mm.h vm/include/linux/mm.h +--- vm-ref/include/linux/mm.h Fri Nov 9 08:29:24 2001 ++++ vm/include/linux/mm.h Fri Nov 9 08:29:33 2001 +@@ -294,8 +294,10 @@ + #define TryLockPage(page) test_and_set_bit(PG_locked, &(page)->flags) + #define PageChecked(page) test_bit(PG_checked, &(page)->flags) + #define SetPageChecked(page) set_bit(PG_checked, &(page)->flags) ++ + #define PageLaunder(page) test_bit(PG_launder, &(page)->flags) + #define SetPageLaunder(page) set_bit(PG_launder, &(page)->flags) ++#define ClearPageLaunder(page) clear_bit(PG_launder, &(page)->flags) + + extern void FASTCALL(set_page_dirty(struct page *)); + +@@ -391,6 +393,8 @@ + #define __free_page(page) __free_pages((page), 0) + #define free_page(addr) free_pages((addr),0) + ++extern int start_aggressive_readahead(unsigned int); ++ + extern void show_free_areas(void); + extern void show_free_areas_node(pg_data_t *pgdat); + +@@ -451,8 +455,8 @@ + return page_count(page) - !!page->buffers == 1; + } + +-extern int can_share_swap_page(struct page *); +-extern int remove_exclusive_swap_page(struct page *); ++extern int FASTCALL(make_exclusive_page(struct page *, int)); ++extern int FASTCALL(remove_exclusive_swap_page(struct page *)); + + extern void __free_pte(pte_t); + +diff -urN vm-ref/include/linux/mmzone.h vm/include/linux/mmzone.h +--- vm-ref/include/linux/mmzone.h Mon Nov 5 05:26:23 2001 ++++ vm/include/linux/mmzone.h Fri Nov 9 08:29:33 2001 +@@ -40,7 +40,17 @@ + spinlock_t lock; + unsigned long free_pages; + unsigned long pages_min, pages_low, pages_high; +- int need_balance; ++ /* ++ * The below fields are protected by different locks (or by ++ * no lock at all like need_balance), so they're longs to ++ * provide an atomic granularity against each other on ++ * all architectures. ++ */ ++ unsigned long need_balance; ++ /* protected by the pagemap_lru_lock */ ++ unsigned long nr_active_pages, nr_inactive_pages; ++ /* protected by the pagecache_lock */ ++ unsigned long nr_cache_pages; + + /* + * free areas of different sizes +@@ -113,8 +123,8 @@ + extern int numnodes; + extern pg_data_t *pgdat_list; + +-#define memclass(pgzone, classzone) (((pgzone)->zone_pgdat == (classzone)->zone_pgdat) \ +- && ((pgzone) <= (classzone))) ++#define memclass(pgzone, classzone) \ ++ (((pgzone) - (pgzone)->zone_pgdat->node_zones) <= ((classzone) - (classzone)->zone_pgdat->node_zones)) + + /* + * The following two are not meant for general usage. They are here as +diff -urN vm-ref/include/linux/pagemap.h vm/include/linux/pagemap.h +--- vm-ref/include/linux/pagemap.h Tue Nov 6 02:04:54 2001 ++++ vm/include/linux/pagemap.h Fri Nov 9 08:29:33 2001 +@@ -45,7 +45,7 @@ + #define PAGE_HASH_BITS (page_hash_bits) + #define PAGE_HASH_SIZE (1 << PAGE_HASH_BITS) + +-extern atomic_t page_cache_size; /* # of pages currently in the hash table */ ++extern unsigned long page_cache_size; /* # of pages currently in the hash table */ + extern struct page **page_hash_table; + + extern void page_cache_init(unsigned long); +diff -urN vm-ref/include/linux/sched.h vm/include/linux/sched.h +--- vm-ref/include/linux/sched.h Fri Nov 9 08:29:24 2001 ++++ vm/include/linux/sched.h Fri Nov 9 08:29:33 2001 +@@ -280,6 +280,14 @@ + extern struct user_struct root_user; + #define INIT_USER (&root_user) + ++struct zone_struct; ++ ++struct local_pages { ++ struct list_head list; ++ unsigned int order, nr; ++ struct zone_struct * classzone; ++}; ++ + struct task_struct { + /* + * offsets of these are hardcoded elsewhere - touch with care +@@ -318,8 +326,7 @@ + struct task_struct *next_task, *prev_task; + struct mm_struct *active_mm; + struct rw_sem_recursor mm_recursor; +- struct list_head local_pages; +- unsigned int allocation_order, nr_local_pages; ++ struct local_pages local_pages; + + /* task state */ + struct linux_binfmt *binfmt; +@@ -416,7 +423,6 @@ + #define PF_DUMPCORE 0x00000200 /* dumped core */ + #define PF_SIGNALED 0x00000400 /* killed by a signal */ + #define PF_MEMALLOC 0x00000800 /* Allocating memory */ +-#define PF_MEMDIE 0x00001000 /* Killed for out-of-memory */ + #define PF_FREE_PAGES 0x00002000 /* per process page freeing */ + + #define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ +diff -urN vm-ref/include/linux/swap.h vm/include/linux/swap.h +--- vm-ref/include/linux/swap.h Fri Nov 9 08:29:25 2001 ++++ vm/include/linux/swap.h Fri Nov 9 08:29:33 2001 +@@ -88,7 +88,7 @@ + extern int nr_active_pages; + extern int nr_inactive_pages; + extern atomic_t nr_async_pages; +-extern atomic_t page_cache_size; ++extern unsigned long page_cache_size; + extern atomic_t buffermem_pages; + + extern spinlock_cacheline_t pagecache_lock_cacheline; +@@ -115,6 +115,8 @@ + /* linux/mm/vmscan.c */ + extern wait_queue_head_t kswapd_wait; + extern int FASTCALL(try_to_free_pages(zone_t *, unsigned int, unsigned int)); ++extern int FASTCALL(try_to_free_pages_nozone(unsigned int)); ++extern int vm_scan_ratio, vm_balance_ratio, vm_mapped_ratio; + + /* linux/mm/page_io.c */ + extern void rw_swap_page(int, struct page *); +@@ -178,32 +180,128 @@ + BUG(); \ + } while (0) + ++#define inc_nr_active_pages(page) \ ++do { \ ++ pg_data_t * __pgdat; \ ++ zone_t * __classzone, * __overflow; \ ++ \ ++ __classzone = (page)->zone; \ ++ __pgdat = __classzone->zone_pgdat; \ ++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \ ++ \ ++ while (__classzone < __overflow) { \ ++ __classzone->nr_active_pages++; \ ++ __classzone++; \ ++ } \ ++ nr_active_pages++; \ ++} while (0) ++ ++#define dec_nr_active_pages(page) \ ++do { \ ++ pg_data_t * __pgdat; \ ++ zone_t * __classzone, * __overflow; \ ++ \ ++ __classzone = (page)->zone; \ ++ __pgdat = __classzone->zone_pgdat; \ ++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \ ++ \ ++ while (__classzone < __overflow) { \ ++ __classzone->nr_active_pages--; \ ++ __classzone++; \ ++ } \ ++ nr_active_pages--; \ ++} while (0) ++ ++#define inc_nr_inactive_pages(page) \ ++do { \ ++ pg_data_t * __pgdat; \ ++ zone_t * __classzone, * __overflow; \ ++ \ ++ __classzone = (page)->zone; \ ++ __pgdat = __classzone->zone_pgdat; \ ++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \ ++ \ ++ while (__classzone < __overflow) { \ ++ __classzone->nr_inactive_pages++; \ ++ __classzone++; \ ++ } \ ++ nr_inactive_pages++; \ ++} while (0) ++ ++#define dec_nr_inactive_pages(page) \ ++do { \ ++ pg_data_t * __pgdat; \ ++ zone_t * __classzone, * __overflow; \ ++ \ ++ __classzone = (page)->zone; \ ++ __pgdat = __classzone->zone_pgdat; \ ++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \ ++ \ ++ while (__classzone < __overflow) { \ ++ __classzone->nr_inactive_pages--; \ ++ __classzone++; \ ++ } \ ++ nr_inactive_pages--; \ ++} while (0) ++ + #define add_page_to_active_list(page) \ + do { \ + DEBUG_LRU_PAGE(page); \ + SetPageActive(page); \ + list_add(&(page)->lru, &active_list); \ +- nr_active_pages++; \ ++ inc_nr_active_pages(page); \ + } while (0) + + #define add_page_to_inactive_list(page) \ + do { \ + DEBUG_LRU_PAGE(page); \ + list_add(&(page)->lru, &inactive_list); \ +- nr_inactive_pages++; \ ++ inc_nr_inactive_pages(page); \ + } while (0) + + #define del_page_from_active_list(page) \ + do { \ + list_del(&(page)->lru); \ + ClearPageActive(page); \ +- nr_active_pages--; \ ++ dec_nr_active_pages(page); \ + } while (0) + + #define del_page_from_inactive_list(page) \ + do { \ + list_del(&(page)->lru); \ +- nr_inactive_pages--; \ ++ dec_nr_inactive_pages(page); \ ++} while (0) ++ ++#define inc_nr_cache_pages(page) \ ++do { \ ++ pg_data_t * __pgdat; \ ++ zone_t * __classzone, * __overflow; \ ++ \ ++ __classzone = (page)->zone; \ ++ __pgdat = __classzone->zone_pgdat; \ ++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \ ++ \ ++ while (__classzone < __overflow) { \ ++ __classzone->nr_cache_pages++; \ ++ __classzone++; \ ++ } \ ++ page_cache_size++; \ ++} while (0) ++ ++#define dec_nr_cache_pages(page) \ ++do { \ ++ pg_data_t * __pgdat; \ ++ zone_t * __classzone, * __overflow; \ ++ \ ++ __classzone = (page)->zone; \ ++ __pgdat = __classzone->zone_pgdat; \ ++ __overflow = __pgdat->node_zones + __pgdat->nr_zones; \ ++ \ ++ while (__classzone < __overflow) { \ ++ __classzone->nr_cache_pages--; \ ++ __classzone++; \ ++ } \ ++ page_cache_size--; \ + } while (0) + + /* +diff -urN vm-ref/include/linux/sysctl.h vm/include/linux/sysctl.h +--- vm-ref/include/linux/sysctl.h Fri Nov 9 08:29:24 2001 ++++ vm/include/linux/sysctl.h Fri Nov 9 08:29:33 2001 +@@ -134,12 +134,13 @@ + VM_FREEPG=3, /* struct: Set free page thresholds */ + VM_BDFLUSH=4, /* struct: Control buffer cache flushing */ + VM_OVERCOMMIT_MEMORY=5, /* Turn off the virtual memory safety limit */ +- VM_BUFFERMEM=6, /* struct: Set buffer memory thresholds */ +- VM_PAGECACHE=7, /* struct: Set cache memory thresholds */ + VM_PAGERDAEMON=8, /* struct: Control kswapd behaviour */ + VM_PGT_CACHE=9, /* struct: Set page table cache parameters */ + VM_PAGE_CLUSTER=10, /* int: set number of pages to swap together */ + VM_HEAP_STACK_GAP=11, /* int: page gap between heap and stack */ ++ VM_SCAN_RATIO=12, /* part of the inactive list to scan */ ++ VM_BALANCE_RATIO=13, /* balance active and inactive caches */ ++ VM_MAPPED_RATIO=14, /* pageout when we find too many mapped pages */ + }; + + +diff -urN vm-ref/kernel/fork.c vm/kernel/fork.c +--- vm-ref/kernel/fork.c Sun Sep 23 21:11:43 2001 ++++ vm/kernel/fork.c Fri Nov 9 08:29:33 2001 +@@ -649,7 +649,7 @@ + p->lock_depth = -1; /* -1 = no lock */ + p->start_time = jiffies; + +- INIT_LIST_HEAD(&p->local_pages); ++ INIT_LIST_HEAD(&p->local_pages.list); + + retval = -ENOMEM; + /* copy all the process information */ +diff -urN vm-ref/kernel/ksyms.c vm/kernel/ksyms.c +--- vm-ref/kernel/ksyms.c Fri Nov 9 08:29:24 2001 ++++ vm/kernel/ksyms.c Fri Nov 9 08:29:33 2001 +@@ -89,6 +89,7 @@ + EXPORT_SYMBOL(exit_sighand); + + /* internal kernel memory management */ ++EXPORT_SYMBOL(start_aggressive_readahead); + EXPORT_SYMBOL(_alloc_pages); + EXPORT_SYMBOL(__alloc_pages); + EXPORT_SYMBOL(alloc_pages_node); +diff -urN vm-ref/kernel/sysctl.c vm/kernel/sysctl.c +--- vm-ref/kernel/sysctl.c Fri Nov 9 08:29:24 2001 ++++ vm/kernel/sysctl.c Fri Nov 9 08:29:33 2001 +@@ -30,6 +30,7 @@ + #include + #include + #include ++#include + + #include + +@@ -259,6 +260,12 @@ + }; + + static ctl_table vm_table[] = { ++ {VM_SCAN_RATIO, "vm_scan_ratio", ++ &vm_scan_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, ++ {VM_BALANCE_RATIO, "vm_balance_ratio", ++ &vm_balance_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, ++ {VM_MAPPED_RATIO, "vm_mapped_ratio", ++ &vm_mapped_ratio, sizeof(int), 0644, NULL, &proc_dointvec}, + {VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0644, NULL, + &proc_dointvec_minmax, &sysctl_intvec, NULL, + &bdflush_min, &bdflush_max}, +diff -urN vm-ref/mm/filemap.c vm/mm/filemap.c +--- vm-ref/mm/filemap.c Fri Nov 9 08:29:25 2001 ++++ vm/mm/filemap.c Fri Nov 9 08:29:33 2001 +@@ -43,7 +43,7 @@ + * SMP-threaded pagemap-LRU 1999, Andrea Arcangeli + */ + +-atomic_t page_cache_size = ATOMIC_INIT(0); ++unsigned long page_cache_size; + unsigned int page_hash_bits; + struct page **page_hash_table; + +@@ -75,7 +75,7 @@ + next->pprev_hash = &page->next_hash; + if (page->buffers) + PAGE_BUG(page); +- atomic_inc(&page_cache_size); ++ inc_nr_cache_pages(page); + } + + static inline void add_page_to_inode_queue(struct address_space *mapping, struct page * page) +@@ -105,7 +105,7 @@ + next->pprev_hash = pprev; + *pprev = next; + page->pprev_hash = NULL; +- atomic_dec(&page_cache_size); ++ dec_nr_cache_pages(page); + } + + /* +@@ -781,7 +781,7 @@ + + void unlock_page(struct page *page) + { +- clear_bit(PG_launder, &(page)->flags); ++ ClearPageLaunder(page); + smp_mb__before_clear_bit(); + if (!test_and_clear_bit(PG_locked, &(page)->flags)) + BUG(); +@@ -1914,8 +1914,7 @@ + * Found the page and have a reference on it, need to check sharing + * and possibly copy it over to another page.. + */ +- mark_page_accessed(page); +- flush_page_to_ram(page); ++ activate_page(page); + return page; + + no_cached_page: +@@ -3017,8 +3016,15 @@ + } + unlock: + kunmap(page); ++ ++ /* ++ * Mark the page accessed if we wrote the ++ * beginning or we just did an lseek. ++ */ ++ if (!offset || !file->f_reada) ++ SetPageReferenced(page); ++ + /* Mark it unlocked again and drop the page.. */ +- SetPageReferenced(page); + UnlockPage(page); + page_cache_release(page); + +diff -urN vm-ref/mm/memory.c vm/mm/memory.c +--- vm-ref/mm/memory.c Fri Nov 9 08:29:24 2001 ++++ vm/mm/memory.c Fri Nov 9 08:29:33 2001 +@@ -913,15 +913,11 @@ + if (!VALID_PAGE(old_page)) + goto bad_wp_page; + +- if (!TryLockPage(old_page)) { +- int reuse = can_share_swap_page(old_page); +- unlock_page(old_page); +- if (reuse) { +- flush_cache_page(vma, address); +- establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); +- spin_unlock(&mm->page_table_lock); +- return 1; /* Minor fault */ +- } ++ if (make_exclusive_page(old_page, 1)) { ++ flush_cache_page(vma, address); ++ establish_pte(vma, address, page_table, pte_mkyoung(pte_mkdirty(pte_mkwrite(pte)))); ++ spin_unlock(&mm->page_table_lock); ++ return 1; /* Minor fault */ + } + + /* +@@ -939,6 +935,19 @@ + * Re-check the pte - we dropped the lock + */ + spin_lock(&mm->page_table_lock); ++ /* ++ * keep the page pinned until we return runnable ++ * to avoid another thread to skip the break_cow ++ * path, so we're sure pte_same below check also implys ++ * that the _contents_ of the old_page didn't changed ++ * under us (not only that the pagetable is the same). ++ * ++ * Since we have the page_table_lock acquired here, if the ++ * pte is the same it means we're still holding an additional ++ * reference on the old_page so we can safely ++ * page_cache_release(old_page) before the "pte_same == true" path. ++ */ ++ page_cache_release(old_page); + if (pte_same(*page_table, pte)) { + if (PageReserved(old_page)) + ++mm->rss; +@@ -950,7 +959,6 @@ + } + spin_unlock(&mm->page_table_lock); + page_cache_release(new_page); +- page_cache_release(old_page); + return 1; /* Minor fault */ + + bad_wp_page: +@@ -1106,7 +1114,8 @@ + ret = 2; + } + +- lock_page(page); ++ if (!Page_Uptodate(page)) ++ wait_on_page(page); + + /* + * Back out if somebody else faulted in this pte while we +@@ -1115,7 +1124,6 @@ + spin_lock(&mm->page_table_lock); + if (!pte_same(*page_table, orig_pte)) { + spin_unlock(&mm->page_table_lock); +- unlock_page(page); + page_cache_release(page); + return 1; + } +@@ -1123,14 +1131,14 @@ + /* The page isn't present yet, go ahead with the fault. */ + + swap_free(entry); +- if (vm_swap_full()) +- remove_exclusive_swap_page(page); +- + mm->rss++; + pte = mk_pte(page, vma->vm_page_prot); +- if (write_access && can_share_swap_page(page)) +- pte = pte_mkdirty(pte_mkwrite(pte)); +- unlock_page(page); ++ if (make_exclusive_page(page, write_access)) { ++ if (write_access) ++ pte = pte_mkdirty(pte); ++ if (vma->vm_flags & VM_WRITE) ++ pte = pte_mkwrite(pte); ++ } + + flush_page_to_ram(page); + flush_icache_page(vma, page); +@@ -1168,8 +1176,8 @@ + + spin_lock(&mm->page_table_lock); + if (!pte_none(*page_table)) { +- page_cache_release(page); + spin_unlock(&mm->page_table_lock); ++ page_cache_release(page); + return 1; + } + mm->rss++; +@@ -1225,7 +1233,7 @@ + struct page * page = alloc_page(GFP_HIGHUSER); + if (!page) + return -1; +- copy_highpage(page, new_page); ++ copy_user_highpage(page, new_page, address); + page_cache_release(new_page); + lru_cache_add(page); + new_page = page; +@@ -1252,9 +1260,9 @@ + entry = pte_mkwrite(pte_mkdirty(entry)); + set_pte(page_table, entry); + } else { ++ spin_unlock(&mm->page_table_lock); + /* One of our sibling threads was faster, back out. */ + page_cache_release(new_page); +- spin_unlock(&mm->page_table_lock); + return 1; + } + +diff -urN vm-ref/mm/mmap.c vm/mm/mmap.c +--- vm-ref/mm/mmap.c Fri Nov 9 08:29:24 2001 ++++ vm/mm/mmap.c Fri Nov 9 08:29:33 2001 +@@ -69,7 +69,7 @@ + return 1; + + /* The page cache contains buffer pages these days.. */ +- free = atomic_read(&page_cache_size); ++ free = page_cache_size; + free += nr_free_pages(); + free += nr_swap_pages; + +diff -urN vm-ref/mm/oom_kill.c vm/mm/oom_kill.c +--- vm-ref/mm/oom_kill.c Tue Nov 6 02:04:54 2001 ++++ vm/mm/oom_kill.c Fri Nov 9 08:29:33 2001 +@@ -150,7 +150,6 @@ + * exit() and clear out its resources quickly... + */ + p->counter = 5 * HZ; +- p->flags |= PF_MEMALLOC | PF_MEMDIE; + + /* This process has hardware access, be more careful. */ + if (cap_t(p->cap_effective) & CAP_TO_MASK(CAP_SYS_RAWIO)) { +diff -urN vm-ref/mm/page_alloc.c vm/mm/page_alloc.c +--- vm-ref/mm/page_alloc.c Fri Nov 9 08:29:24 2001 ++++ vm/mm/page_alloc.c Fri Nov 9 08:29:33 2001 +@@ -138,14 +138,14 @@ + return; + + local_freelist: +- if (current->nr_local_pages) ++ if ((current->local_pages.nr && !current->local_pages.order) || ++ !memclass(page->zone, current->local_pages.classzone) || ++ in_interrupt()) + goto back_local_freelist; +- if (in_interrupt()) +- goto back_local_freelist; + +- list_add(&page->list, ¤t->local_pages); ++ list_add(&page->list, ¤t->local_pages.list); + page->index = order; +- current->nr_local_pages++; ++ current->local_pages.nr++; + } + + #define MARK_USED(index, order, area) \ +@@ -230,35 +230,36 @@ + static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed) + { + struct page * page = NULL; +- int __freed = 0; ++ int __freed; + +- if (!(gfp_mask & __GFP_WAIT)) +- goto out; + if (in_interrupt()) + BUG(); + +- current->allocation_order = order; ++ current->local_pages.order = order; ++ current->local_pages.classzone = classzone; + current->flags |= PF_MEMALLOC | PF_FREE_PAGES; + + __freed = try_to_free_pages(classzone, gfp_mask, order); + + current->flags &= ~(PF_MEMALLOC | PF_FREE_PAGES); + +- if (current->nr_local_pages) { ++ if (current->local_pages.nr) { + struct list_head * entry, * local_pages; + struct page * tmp; + int nr_pages; + +- local_pages = ¤t->local_pages; ++ local_pages = ¤t->local_pages.list; + + if (likely(__freed)) { + /* pick from the last inserted so we're lifo */ + entry = local_pages->next; + do { + tmp = list_entry(entry, struct page, list); +- if (tmp->index == order && memclass(tmp->zone, classzone)) { ++ if (!memclass(tmp->zone, classzone)) ++ BUG(); ++ if (tmp->index == order) { + list_del(entry); +- current->nr_local_pages--; ++ current->local_pages.nr--; + set_page_count(tmp, 1); + page = tmp; + +@@ -284,7 +285,7 @@ + } while ((entry = entry->next) != local_pages); + } + +- nr_pages = current->nr_local_pages; ++ nr_pages = current->local_pages.nr; + /* free in reverse order so that the global order will be lifo */ + while ((entry = local_pages->prev) != local_pages) { + list_del(entry); +@@ -293,9 +294,8 @@ + if (!nr_pages--) + BUG(); + } +- current->nr_local_pages = 0; ++ current->local_pages.nr = 0; + } +- out: + *freed = __freed; + return page; + } +@@ -353,8 +353,7 @@ + + /* here we're in the low on memory slow path */ + +-rebalance: +- if (current->flags & (PF_MEMALLOC | PF_MEMDIE)) { ++ if (current->flags & PF_MEMALLOC && !in_interrupt()) { + zone = zonelist->zones; + for (;;) { + zone_t *z = *(zone++); +@@ -370,34 +369,52 @@ + + /* Atomic allocations - we can't balance anything */ + if (!(gfp_mask & __GFP_WAIT)) +- return NULL; ++ goto out; + ++ rebalance: + page = balance_classzone(classzone, gfp_mask, order, &freed); + if (page) + return page; + + zone = zonelist->zones; +- for (;;) { +- zone_t *z = *(zone++); +- if (!z) +- break; ++ if (likely(freed)) { ++ for (;;) { ++ zone_t *z = *(zone++); ++ if (!z) ++ break; + +- if (zone_free_pages(z, order) > z->pages_min) { +- page = rmqueue(z, order); +- if (page) +- return page; ++ if (zone_free_pages(z, order) > z->pages_min) { ++ page = rmqueue(z, order); ++ if (page) ++ return page; ++ } + } +- } ++ goto rebalance; ++ } else { ++ /* ++ * Check that no other task is been killed meanwhile, ++ * in such a case we can succeed the allocation. ++ */ ++ for (;;) { ++ zone_t *z = *(zone++); ++ if (!z) ++ break; + +- /* Don't let big-order allocations loop */ +- if (order > 3) +- return NULL; ++ if (zone_free_pages(z, order) > z->pages_high) { ++ page = rmqueue(z, order); ++ if (page) ++ return page; ++ } ++ } ++ } + +- /* Yield for kswapd, and try again */ +- current->policy |= SCHED_YIELD; +- __set_current_state(TASK_RUNNING); +- schedule(); +- goto rebalance; ++ out: ++ printk(KERN_NOTICE "__alloc_pages: %u-order allocation failed (gfp=0x%x/%i)\n", ++ order, gfp_mask, !!(current->flags & PF_MEMALLOC)); ++#ifdef CONFIG_DEBUG_GFP ++ show_stack(NULL); ++#endif ++ return NULL; + } + + /* +@@ -520,17 +537,24 @@ + { + pg_data_t *pgdat = pgdat_list; + unsigned int sum = 0; ++ zonelist_t *zonelist; ++ zone_t **zonep, *zone; + + do { +- zonelist_t *zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); +- zone_t **zonep = zonelist->zones; +- zone_t *zone; ++ zonelist = pgdat->node_zonelists + (GFP_USER & GFP_ZONEMASK); ++ zonep = zonelist->zones; + +- for (zone = *zonep++; zone; zone = *zonep++) { +- unsigned long size = zone->size; +- unsigned long high = zone->pages_high; +- if (size > high) +- sum += size - high; ++ zone = *zonep; ++ if (zone) { ++ sum += zone->nr_cache_pages; ++ do { ++ unsigned int free = zone->free_pages - zone->pages_high; ++ zonep++; ++ zone = *zonep; ++ if (free <= 0) ++ continue; ++ sum += free; ++ } while (zone); + } + + pgdat = pgdat->node_next; +@@ -553,6 +577,62 @@ + } + #endif + ++/* ++ * If it returns non zero it means there's lots of ram "free" ++ * (note: not in cache!) so any caller will know that ++ * he can allocate some memory to do some more aggressive ++ * (possibly wasteful) readahead. The state of the memory ++ * should be rechecked after every few pages allocated for ++ * doing this aggressive readahead. ++ * ++ * The gfp_mask parameter specifies in which kind of memory ++ * the readahead information will be applocated to. ++ */ ++int start_aggressive_readahead(unsigned int gfp_mask) ++{ ++ pg_data_t *pgdat = pgdat_list; ++ zonelist_t *zonelist; ++ zone_t **zonep, *zone; ++ int ret = 0; ++ ++ do { ++ zonelist = pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK); ++ zonep = zonelist->zones; ++ ++ for (zone = *zonep++; zone; zone = *zonep++) ++ if (zone->free_pages > zone->pages_high * 2) ++ ret = 1; ++ ++ pgdat = pgdat->node_next; ++ } while (pgdat); ++ ++ return ret; ++} ++ ++int try_to_free_pages_nozone(unsigned int gfp_mask) ++{ ++ pg_data_t *pgdat = pgdat_list; ++ zonelist_t *zonelist; ++ zone_t **zonep; ++ int ret = 0; ++ unsigned long pf_free_pages; ++ ++ pf_free_pages = current->flags & PF_FREE_PAGES; ++ current->flags &= ~PF_FREE_PAGES; ++ ++ do { ++ zonelist = pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK); ++ zonep = zonelist->zones; ++ ++ ret |= try_to_free_pages(*zonep, gfp_mask, 0); ++ ++ pgdat = pgdat->node_next; ++ } while (pgdat); ++ ++ current->flags |= pf_free_pages; ++ return ret; ++} ++ + #define K(x) ((x) << (PAGE_SHIFT-10)) + + /* +@@ -758,6 +838,7 @@ + zone->zone_pgdat = pgdat; + zone->free_pages = 0; + zone->need_balance = 0; ++ zone->nr_active_pages = zone->nr_inactive_pages = 0; + if (!size) + continue; + +diff -urN vm-ref/mm/page_io.c vm/mm/page_io.c +--- vm-ref/mm/page_io.c Tue Nov 6 02:04:54 2001 ++++ vm/mm/page_io.c Fri Nov 9 08:29:33 2001 +@@ -41,7 +41,6 @@ + kdev_t dev = 0; + int block_size; + struct inode *swapf = 0; +- int wait = 0; + + if (rw == READ) { + ClearPageUptodate(page); +@@ -73,18 +72,6 @@ + + /* block_size == PAGE_SIZE/zones_used */ + brw_page(rw, page, dev, zones, block_size); +- +- /* Note! For consistency we do all of the logic, +- * decrementing the page count, and unlocking the page in the +- * swap lock map - in the IO completion handler. +- */ +- if (!wait) +- return 1; +- +- wait_on_page(page); +- /* This shouldn't happen, but check to be sure. */ +- if (page_count(page) == 0) +- printk(KERN_ERR "rw_swap_page: page unused while waiting!\n"); + + return 1; + } +diff -urN vm-ref/mm/slab.c vm/mm/slab.c +--- vm-ref/mm/slab.c Sun Sep 23 21:11:43 2001 ++++ vm/mm/slab.c Fri Nov 9 08:29:33 2001 +@@ -914,8 +914,6 @@ + slab_t *slabp; + int ret; + +- drain_cpu_caches(cachep); +- + spin_lock_irq(&cachep->spinlock); + + /* If the cache is growing, stop shrinking. */ +@@ -985,6 +983,8 @@ + kmem_cache_t, next); + list_del(&cachep->next); + up(&cache_chain_sem); ++ ++ drain_cpu_caches(cachep); + + if (__kmem_cache_shrink(cachep)) { + printk(KERN_ERR "kmem_cache_destroy: Can't free all objects %p\n", +diff -urN vm-ref/mm/swap.c vm/mm/swap.c +--- vm-ref/mm/swap.c Thu Nov 8 04:07:20 2001 ++++ vm/mm/swap.c Fri Nov 9 08:29:33 2001 +@@ -38,9 +38,13 @@ + */ + static inline void activate_page_nolock(struct page * page) + { +- if (PageLRU(page) && !PageActive(page)) { +- del_page_from_inactive_list(page); +- add_page_to_active_list(page); ++ if (PageLRU(page)) { ++ if (!PageActive(page)) { ++ del_page_from_inactive_list(page); ++ add_page_to_active_list(page); ++ ClearPageReferenced(page); ++ } else ++ SetPageReferenced(page); + } + } + +diff -urN vm-ref/mm/swap_state.c vm/mm/swap_state.c +--- vm-ref/mm/swap_state.c Tue Nov 6 02:04:54 2001 ++++ vm/mm/swap_state.c Fri Nov 9 08:29:33 2001 +@@ -117,7 +117,9 @@ + if (!PageLocked(page)) + BUG(); + +- block_flushpage(page, 0); ++ if (!block_flushpage(page, 0)) ++ /* an anonymous page cannot have page->buffers set */ ++ BUG(); + + entry.val = page->index; + +diff -urN vm-ref/mm/swapfile.c vm/mm/swapfile.c +--- vm-ref/mm/swapfile.c Tue Nov 6 02:04:54 2001 ++++ vm/mm/swapfile.c Fri Nov 9 08:29:33 2001 +@@ -227,6 +227,7 @@ + * Check if we're the only user of a swap page, + * when the page is locked. + */ ++static int FASTCALL(exclusive_swap_page(struct page *page)); + static int exclusive_swap_page(struct page *page) + { + int retval = 0; +@@ -240,12 +241,13 @@ + if (p->swap_map[SWP_OFFSET(entry)] == 1) { + /* Recheck the page count with the pagecache lock held.. */ + spin_lock(&pagecache_lock); +- if (page_count(page) - !!page->buffers == 2) ++ if (PageSwapCache(page) && page_count(page) - !!page->buffers == 2) + retval = 1; + spin_unlock(&pagecache_lock); + } + swap_info_put(p); + } ++ + return retval; + } + +@@ -257,21 +259,42 @@ + * work, but we opportunistically check whether + * we need to get all the locks first.. + */ +-int can_share_swap_page(struct page *page) ++int make_exclusive_page(struct page *page, int write) + { + int retval = 0; + +- if (!PageLocked(page)) +- BUG(); + switch (page_count(page)) { + case 3: + if (!page->buffers) + break; + /* Fallthrough */ + case 2: ++ /* racy fastpath check */ + if (!PageSwapCache(page)) + break; +- retval = exclusive_swap_page(page); ++ ++ if ((!write && !vm_swap_full()) || TryLockPage(page)) { ++ /* ++ * Don't remove the page from the swapcache if: ++ * - it was a read fault and... ++ * - the swap isn't full ++ * or if ++ * - we failed acquiring the page lock ++ * ++ * NOTE: if failed acquiring the lock we cannot remove the ++ * page from the swapcache, but still we can safely takeover ++ * the page if it's exclusive, see the swapcache check in ++ * the innermost critical section of exclusive_swap_page(). ++ */ ++ retval = exclusive_swap_page(page); ++ } else { ++ /* ++ * Here we've the page lock acquired and we're asked ++ * to try to drop this page from the swapcache. ++ */ ++ retval = remove_exclusive_swap_page(page); ++ unlock_page(page); ++ } + break; + case 1: + if (PageReserved(page)) +@@ -300,7 +323,7 @@ + + entry.val = page->index; + p = swap_info_get(entry); +- if (!p) ++ if (unlikely(!p)) + return 0; + + /* Is the only swap cache user the cache itself? */ +@@ -309,7 +332,11 @@ + /* Recheck the page count with the pagecache lock held.. */ + spin_lock(&pagecache_lock); + if (page_count(page) - !!page->buffers == 2) { ++ if (page->buffers && !try_to_free_buffers(page, 0)) ++ /* an anonymous page cannot have page->buffers set */ ++ BUG(); + __delete_from_swap_cache(page); ++ swap_entry_free(p, SWP_OFFSET(entry)); + SetPageDirty(page); + retval = 1; + } +@@ -317,11 +344,8 @@ + } + swap_info_put(p); + +- if (retval) { +- block_flushpage(page, 0); +- swap_free(entry); ++ if (retval) + page_cache_release(page); +- } + + return retval; + } +@@ -343,11 +367,7 @@ + } + if (page) { + page_cache_get(page); +- /* Only cache user (+us), or swap space full? Free it! */ +- if (page_count(page) == 2 || vm_swap_full()) { +- delete_from_swap_cache(page); +- SetPageDirty(page); +- } ++ remove_exclusive_swap_page(page); + UnlockPage(page); + page_cache_release(page); + } +diff -urN vm-ref/mm/vmscan.c vm/mm/vmscan.c +--- vm-ref/mm/vmscan.c Thu Nov 8 04:07:20 2001 ++++ vm/mm/vmscan.c Fri Nov 9 08:36:58 2001 +@@ -26,12 +26,28 @@ + #include + + /* +- * The "priority" of VM scanning is how much of the queues we +- * will scan in one go. A value of 6 for DEF_PRIORITY implies +- * that we'll scan 1/64th of the queues ("queue_length >> 6") +- * during a normal aging round. ++ * "vm_scan_ratio" is how much of the queues we will scan ++ * in one go. A value of 8 for vm_scan_ratio implies that we'll ++ * scan 1/8 of the inactive list during a normal aging round. ++ * So if 1/vm_scan_ratio of the inactive cache is unfreeable ++ * we'll start the background paging. + */ +-#define DEF_PRIORITY (6) ++int vm_scan_ratio = 8; ++ ++/* ++ * "vm_scan_ratio" controls when we start to swapout, the lower, ++ * the earlier we'll start to swapout. ++ */ ++int vm_mapped_ratio = 10; ++ ++/* ++ * "vm_balance_ratio" controls the balance between active and ++ * inactive cache. The bigger vm_balance_ratio is, the easier the ++ * active cache will grow, because we'll rotate the active list ++ * slowly. A value of 3 means we'll go towards a balance of ++ * 1/4 of the cache being inactive. ++ */ ++int vm_balance_ratio = 3; + + /* + * The swap-out function returns 1 if it successfully +@@ -50,13 +66,15 @@ + + /* Don't look at this pte if it's been accessed recently. */ + if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) { +- mark_page_accessed(page); ++ activate_page(page); + return 0; + } + ++#if 0 + /* Don't bother unmapping pages that are active */ + if (PageActive(page)) + return 0; ++#endif + + /* Don't bother replenishing zones not under pressure.. */ + if (!memclass(page->zone, classzone)) +@@ -113,6 +131,9 @@ + */ + if (page->mapping) + goto drop_pte; ++ if (page->buffers) ++ /* can happen if there's a page fault during vmtruncate */ ++ goto preserve; + if (!PageDirty(page)) + goto drop_pte; + +@@ -139,6 +160,7 @@ + swap_free(entry); + } + ++ preserve: + /* No swap space left */ + set_pte(page_table, pte); + UnlockPage(page); +@@ -249,6 +271,7 @@ + { + unsigned long address; + struct vm_area_struct* vma; ++ int tlb_flush = 0; + + /* + * Find the proper vm-area after freezing the vma chain +@@ -263,6 +286,7 @@ + } + vma = find_vma(mm, address); + if (vma) { ++ tlb_flush = 1; + if (address < vma->vm_start) + address = vma->vm_start; + +@@ -281,16 +305,18 @@ + + out_unlock: + spin_unlock(&mm->page_table_lock); ++ if (tlb_flush) ++ flush_tlb_mm(mm); + return count; + } + +-static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone)); +-static int swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone) ++static int FASTCALL(swap_out(zone_t * classzone)); ++static int swap_out(zone_t * classzone) + { + int counter, nr_pages = SWAP_CLUSTER_MAX; + struct mm_struct *mm; + +- counter = mmlist_nr; ++ counter = mmlist_nr << 1; + do { + if (unlikely(current->need_resched)) { + __set_current_state(TASK_RUNNING); +@@ -326,15 +352,13 @@ + return 0; + } + +-static int FASTCALL(shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority)); +-static int shrink_cache(int nr_pages, zone_t * classzone, unsigned int gfp_mask, int priority) ++static int FASTCALL(shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout)); ++static int shrink_cache(int nr_pages, int max_scan, zone_t * classzone, unsigned int gfp_mask, int * failed_swapout) + { + struct list_head * entry; +- int max_scan = nr_inactive_pages / priority; +- int max_mapped = nr_pages << (9 - priority); ++ int max_mapped = nr_pages * vm_mapped_ratio; + +- spin_lock(&pagemap_lru_lock); +- while (--max_scan >= 0 && (entry = inactive_list.prev) != &inactive_list) { ++ while (max_scan && classzone->nr_inactive_pages && (entry = inactive_list.prev) != &inactive_list) { + struct page * page; + + if (unlikely(current->need_resched)) { +@@ -365,6 +389,8 @@ + if (!memclass(page->zone, classzone)) + continue; + ++ max_scan--; ++ + /* Racy check to avoid trylocking when not worthwhile */ + if (!page->buffers && (page_count(page) != 1 || !page->mapping)) + goto page_mapped; +@@ -462,34 +488,43 @@ + spin_lock(&pagecache_lock); + + /* +- * this is the non-racy check for busy page. ++ * This is the non-racy check for busy page. ++ * It is critical to check PageDirty _after_ we made sure ++ * the page is freeable so not in use by anybody. ++ * At this point we're guaranteed that page->buffers is NULL, ++ * nobody can refill page->buffers under us because we still ++ * hold the page lock. + */ +- if (!page->mapping || !is_page_cache_freeable(page)) { ++ if (!page->mapping || page_count(page) > 1) { + spin_unlock(&pagecache_lock); + UnlockPage(page); +-page_mapped: +- if (--max_mapped >= 0) +- continue; ++ page_mapped: ++ if (--max_mapped < 0) { ++ spin_unlock(&pagemap_lru_lock); + +- /* +- * Alert! We've found too many mapped pages on the +- * inactive list, so we start swapping out now! +- */ +- spin_unlock(&pagemap_lru_lock); +- swap_out(priority, gfp_mask, classzone); +- return nr_pages; +- } ++ shrink_dcache_memory(vm_scan_ratio, gfp_mask); ++ shrink_icache_memory(vm_scan_ratio, gfp_mask); ++#ifdef CONFIG_QUOTA ++ shrink_dqcache_memory(vm_scan_ratio, gfp_mask); ++#endif + +- /* +- * It is critical to check PageDirty _after_ we made sure +- * the page is freeable* so not in use by anybody. +- */ ++ if (!*failed_swapout) ++ *failed_swapout = !swap_out(classzone); ++ max_mapped = nr_pages * vm_mapped_ratio; ++ ++ spin_lock(&pagemap_lru_lock); ++ } ++ continue; ++ ++ } + if (PageDirty(page)) { + spin_unlock(&pagecache_lock); + UnlockPage(page); + continue; + } + ++ __lru_cache_del(page); ++ + /* point of no return */ + if (likely(!PageSwapCache(page))) { + __remove_inode_page(page); +@@ -502,7 +537,6 @@ + swap_free(swap); + } + +- __lru_cache_del(page); + UnlockPage(page); + + /* effectively free the page here */ +@@ -524,74 +558,96 @@ + * We move them the other way when we see the + * reference bit on the page. + */ +-static void refill_inactive(int nr_pages) ++static void FASTCALL(refill_inactive(int nr_pages, zone_t * classzone)); ++static void refill_inactive(int nr_pages, zone_t * classzone) + { + struct list_head * entry; + +- spin_lock(&pagemap_lru_lock); + entry = active_list.prev; +- while (nr_pages-- && entry != &active_list) { ++ while (nr_pages && entry != &active_list) { + struct page * page; + + page = list_entry(entry, struct page, lru); + entry = entry->prev; ++ ++ if (!memclass(page->zone, classzone)) ++ continue; ++ + if (PageTestandClearReferenced(page)) { + list_del(&page->lru); + list_add(&page->lru, &active_list); + continue; + } + ++ nr_pages--; ++ + del_page_from_active_list(page); + add_page_to_inactive_list(page); + SetPageReferenced(page); + } +- spin_unlock(&pagemap_lru_lock); ++ if (entry != &active_list) { ++ list_del(&active_list); ++ list_add(&active_list, entry); ++ } + } + +-static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)); +-static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages) ++static int FASTCALL(shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout)); ++static int shrink_caches(zone_t * classzone, unsigned int gfp_mask, int nr_pages, int * failed_swapout) + { +- int chunk_size = nr_pages; ++ int max_scan, nr_pages_orig = nr_pages; + unsigned long ratio; + + nr_pages -= kmem_cache_reap(gfp_mask); + if (nr_pages <= 0) + return 0; + +- nr_pages = chunk_size; +- /* try to keep the active list 2/3 of the size of the cache */ +- ratio = (unsigned long) nr_pages * nr_active_pages / ((nr_inactive_pages + 1) * 2); +- refill_inactive(ratio); ++ nr_pages = nr_pages_orig; ++ spin_lock(&pagemap_lru_lock); ++ ratio = (unsigned long) nr_pages * classzone->nr_active_pages / (((unsigned long) classzone->nr_inactive_pages * vm_balance_ratio) + 1); ++ if (ratio > nr_pages * 2) ++ ratio = nr_pages * 2; ++ refill_inactive(ratio, classzone); + +- nr_pages = shrink_cache(nr_pages, classzone, gfp_mask, priority); +- if (nr_pages <= 0) +- return 0; +- +- shrink_dcache_memory(priority, gfp_mask); +- shrink_icache_memory(priority, gfp_mask); +-#ifdef CONFIG_QUOTA +- shrink_dqcache_memory(DEF_PRIORITY, gfp_mask); +-#endif ++ max_scan = classzone->nr_inactive_pages / vm_scan_ratio; ++ nr_pages = shrink_cache(nr_pages, max_scan, classzone, gfp_mask, failed_swapout); + + return nr_pages; + } + ++static int check_classzone_need_balance(zone_t * classzone); ++ + int try_to_free_pages(zone_t *classzone, unsigned int gfp_mask, unsigned int order) + { +- int priority = DEF_PRIORITY; +- int nr_pages = SWAP_CLUSTER_MAX; ++ for (;;) { ++ int tries = vm_scan_ratio << 2; ++ int failed_swapout = 0; + +- do { +- nr_pages = shrink_caches(classzone, priority, gfp_mask, nr_pages); +- if (nr_pages <= 0) +- return 1; +- } while (--priority); ++ do { ++ int nr_pages = SWAP_CLUSTER_MAX; ++ ++ nr_pages = shrink_caches(classzone, gfp_mask, nr_pages, &failed_swapout); ++ if (nr_pages <= 0) ++ return 1; ++ ++ shrink_dcache_memory(vm_scan_ratio, gfp_mask); ++ shrink_icache_memory(vm_scan_ratio, gfp_mask); ++#ifdef CONFIG_QUOTA ++ shrink_dqcache_memory(vm_scan_ratio, gfp_mask); ++#endif ++ ++ if (!failed_swapout) ++ failed_swapout = !swap_out(classzone); ++ } while (--tries); ++ ++ if (likely(current->pid != 1)) ++ break; ++ if (!check_classzone_need_balance(classzone)) ++ break; ++ current->policy |= SCHED_YIELD; ++ __set_current_state(TASK_RUNNING); ++ schedule(); ++ } + +- /* +- * Hmm.. Cache shrink failed - time to kill something? +- * Mhwahahhaha! This is the part I really like. Giggle. +- */ +- out_of_memory(); + return 0; + } + +@@ -624,7 +680,7 @@ + if (!try_to_free_pages(zone, GFP_KSWAPD, 0)) { + zone->need_balance = 0; + __set_current_state(TASK_INTERRUPTIBLE); +- schedule_timeout(HZ); ++ schedule_timeout(HZ*5); + continue; + } + if (check_classzone_need_balance(zone)) diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-7 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-7 new file mode 100644 index 000000000000..173c9d081ea2 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_highmem-debug-7 @@ -0,0 +1,37 @@ +diff -urN highmem-debug-ref/arch/i386/config.in highmem-debug/arch/i386/config.in +--- highmem-debug-ref/arch/i386/config.in Tue Oct 23 12:56:05 2001 ++++ highmem-debug/arch/i386/config.in Tue Oct 23 12:57:17 2001 +@@ -407,6 +407,9 @@ + bool ' Spinlock debugging' CONFIG_DEBUG_SPINLOCK + bool ' Verbose BUG() reporting (adds 70K)' CONFIG_DEBUG_BUGVERBOSE + bool ' Debug allocation faliures' CONFIG_DEBUG_GFP ++ if [ "$CONFIG_HIGHMEM" = "y" ]; then ++ bool ' Emulate HIGHMEM on lowmem machines' CONFIG_HIGHMEM_EMULATION ++ fi + fi + + endmenu +diff -urN highmem-debug-ref/arch/i386/kernel/setup.c highmem-debug/arch/i386/kernel/setup.c +--- highmem-debug-ref/arch/i386/kernel/setup.c Sun Oct 21 20:03:33 2001 ++++ highmem-debug/arch/i386/kernel/setup.c Tue Oct 23 12:57:35 2001 +@@ -821,7 +821,20 @@ + */ + #define VMALLOC_RESERVE (unsigned long)(128 << 20) + #define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE) ++#ifdef CONFIG_HIGHMEM_EMULATION ++#define ORDER_DOWN(x) ((x >> (MAX_ORDER-1)) << (MAX_ORDER-1)) ++#define MAXMEM_PFN \ ++({ \ ++ int __max_pfn; \ ++ if (max_pfn > PFN_DOWN(MAXMEM)) \ ++ __max_pfn = PFN_DOWN(MAXMEM); \ ++ else \ ++ __max_pfn = ORDER_DOWN(max_pfn / 5); \ ++ __max_pfn; \ ++}) ++#else + #define MAXMEM_PFN PFN_DOWN(MAXMEM) ++#endif + #define MAX_NONPAE_PFN (1 << 20) + + /* diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1 new file mode 100644 index 000000000000..2dc98673eb6a --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_numa-mm-1 @@ -0,0 +1,327 @@ +diff -urN vm-ref/arch/sparc64/mm/init.c vm-numa/arch/sparc64/mm/init.c +--- vm-ref/arch/sparc64/mm/init.c Thu Nov 1 20:05:09 2001 ++++ vm-numa/arch/sparc64/mm/init.c Thu Nov 1 20:05:26 2001 +@@ -1591,7 +1591,7 @@ + * Set up the zero page, mark it reserved, so that page count + * is not manipulated when freeing the page from user ptes. + */ +- mem_map_zero = _alloc_pages(GFP_KERNEL, 0); ++ mem_map_zero = alloc_pages(GFP_KERNEL, 0); + if (mem_map_zero == NULL) { + prom_printf("paging_init: Cannot alloc zero page.\n"); + prom_halt(); +diff -urN vm-ref/include/asm-alpha/max_numnodes.h vm-numa/include/asm-alpha/max_numnodes.h +--- vm-ref/include/asm-alpha/max_numnodes.h Thu Jan 1 01:00:00 1970 ++++ vm-numa/include/asm-alpha/max_numnodes.h Thu Nov 1 20:05:26 2001 +@@ -0,0 +1,13 @@ ++#ifndef _ASM_MAX_NUMNODES_H ++#define _ASM_MAX_NUMNODES_H ++ ++#include ++ ++#ifdef CONFIG_ALPHA_WILDFIRE ++#include ++#define MAX_NUMNODES WILDFIRE_MAX_QBB ++#else ++#define MAX_NUMNODES 1 ++#endif ++ ++#endif +diff -urN vm-ref/include/asm-alpha/mmzone.h vm-numa/include/asm-alpha/mmzone.h +--- vm-ref/include/asm-alpha/mmzone.h Thu Nov 1 20:05:09 2001 ++++ vm-numa/include/asm-alpha/mmzone.h Thu Nov 1 20:05:26 2001 +@@ -37,11 +37,9 @@ + #ifdef CONFIG_ALPHA_WILDFIRE + # define ALPHA_PA_TO_NID(pa) ((pa) >> 36) /* 16 nodes max due 43bit kseg */ + #define NODE_MAX_MEM_SIZE (64L * 1024L * 1024L * 1024L) /* 64 GB */ +-#define MAX_NUMNODES WILDFIRE_MAX_QBB + #else + # define ALPHA_PA_TO_NID(pa) (0) + #define NODE_MAX_MEM_SIZE (~0UL) +-#define MAX_NUMNODES 1 + #endif + + #define PHYSADDR_TO_NID(pa) ALPHA_PA_TO_NID(pa) +@@ -63,8 +61,6 @@ + } + #endif + +-#ifdef CONFIG_DISCONTIGMEM +- + /* + * Following are macros that each numa implmentation must define. + */ +@@ -121,7 +117,5 @@ + + #define numa_node_id() cputonode(smp_processor_id()) + #endif /* CONFIG_NUMA */ +- +-#endif /* CONFIG_DISCONTIGMEM */ + + #endif /* _ASM_MMZONE_H_ */ +diff -urN vm-ref/include/linux/mm.h vm-numa/include/linux/mm.h +--- vm-ref/include/linux/mm.h Thu Nov 1 20:05:09 2001 ++++ vm-numa/include/linux/mm.h Thu Nov 1 20:05:26 2001 +@@ -372,7 +372,6 @@ + * can allocate highmem pages, the *get*page*() variants return + * virtual kernel addresses to the allocated page(s). + */ +-extern struct page * FASTCALL(_alloc_pages(unsigned int gfp_mask, unsigned int order)); + extern struct page * FASTCALL(__alloc_pages(unsigned int gfp_mask, unsigned int order, zonelist_t *zonelist)); + extern struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order); + +@@ -383,7 +382,13 @@ + */ + if (order >= MAX_ORDER) + return NULL; +- return _alloc_pages(gfp_mask, order); ++ /* ++ * we get the zone list from the current node and the gfp_mask. ++ * This zone list contains a maximum of ++ * MAXNODES*MAX_NR_ZONES zones. ++ */ ++ return __alloc_pages(gfp_mask, order, ++ NODE_DATA(numa_node_id())->node_zonelists + (gfp_mask & GFP_ZONEMASK)); + } + + #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) +diff -urN vm-ref/include/linux/mmzone.h vm-numa/include/linux/mmzone.h +--- vm-ref/include/linux/mmzone.h Thu Nov 1 20:05:09 2001 ++++ vm-numa/include/linux/mmzone.h Thu Nov 1 20:05:26 2001 +@@ -79,8 +79,14 @@ + * so despite the zonelist table being relatively big, the cache + * footprint of this construct is very small. + */ ++#ifndef CONFIG_DISCONTIGMEM ++#define MAX_NUMNODES 1 ++#else ++#include ++#endif /* !CONFIG_DISCONTIGMEM */ ++ + typedef struct zonelist_struct { +- zone_t * zones [MAX_NR_ZONES+1]; // NULL delimited ++ zone_t * zones [MAX_NUMNODES * MAX_NR_ZONES+1]; // NULL delimited + } zonelist_t; + + #define GFP_ZONEMASK 0x0f +@@ -126,6 +132,7 @@ + extern void free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, + unsigned long *zones_size, unsigned long paddr, unsigned long *zholes_size, + struct page *pmap); ++extern void build_all_zonelists(void); + + extern pg_data_t contig_page_data; + +diff -urN vm-ref/init/main.c vm-numa/init/main.c +--- vm-ref/init/main.c Thu Nov 1 20:05:09 2001 ++++ vm-numa/init/main.c Thu Nov 1 20:05:26 2001 +@@ -553,6 +553,7 @@ + lock_kernel(); + printk(linux_banner); + setup_arch(&command_line); ++ build_all_zonelists(); + printk("Kernel command line: %s\n", saved_command_line); + parse_options(command_line); + trap_init(); +diff -urN vm-ref/kernel/ksyms.c vm-numa/kernel/ksyms.c +--- vm-ref/kernel/ksyms.c Thu Nov 1 20:05:09 2001 ++++ vm-numa/kernel/ksyms.c Thu Nov 1 20:05:35 2001 +@@ -93,7 +93,6 @@ + + /* internal kernel memory management */ + EXPORT_SYMBOL(start_aggressive_readahead); +-EXPORT_SYMBOL(_alloc_pages); + EXPORT_SYMBOL(__alloc_pages); + EXPORT_SYMBOL(alloc_pages_node); + EXPORT_SYMBOL(__get_free_pages); +@@ -113,7 +112,10 @@ + EXPORT_SYMBOL(kfree); + EXPORT_SYMBOL(vfree); + EXPORT_SYMBOL(__vmalloc); ++#ifndef CONFIG_DISCONTIGMEM ++EXPORT_SYMBOL(contig_page_data); + EXPORT_SYMBOL(mem_map); ++#endif + EXPORT_SYMBOL(remap_page_range); + EXPORT_SYMBOL(max_mapnr); + EXPORT_SYMBOL(high_memory); +diff -urN vm-ref/mm/numa.c vm-numa/mm/numa.c +--- vm-ref/mm/numa.c Thu Nov 1 20:05:09 2001 ++++ vm-numa/mm/numa.c Thu Nov 1 20:05:26 2001 +@@ -82,49 +82,4 @@ + memset(pgdat->valid_addr_bitmap, 0, size); + } + +-static struct page * alloc_pages_pgdat(pg_data_t *pgdat, unsigned int gfp_mask, +- unsigned int order) +-{ +- return __alloc_pages(gfp_mask, order, pgdat->node_zonelists + (gfp_mask & GFP_ZONEMASK)); +-} +- +-/* +- * This can be refined. Currently, tries to do round robin, instead +- * should do concentratic circle search, starting from current node. +- */ +-struct page * _alloc_pages(unsigned int gfp_mask, unsigned int order) +-{ +- struct page *ret = 0; +- pg_data_t *start, *temp; +-#ifndef CONFIG_NUMA +- unsigned long flags; +- static pg_data_t *next = 0; +-#endif +- +- if (order >= MAX_ORDER) +- return NULL; +-#ifdef CONFIG_NUMA +- temp = NODE_DATA(numa_node_id()); +-#else +- spin_lock_irqsave(&node_lock, flags); +- if (!next) next = pgdat_list; +- temp = next; +- next = next->node_next; +- spin_unlock_irqrestore(&node_lock, flags); +-#endif +- start = temp; +- while (temp) { +- if ((ret = alloc_pages_pgdat(temp, gfp_mask, order))) +- return(ret); +- temp = temp->node_next; +- } +- temp = pgdat_list; +- while (temp != start) { +- if ((ret = alloc_pages_pgdat(temp, gfp_mask, order))) +- return(ret); +- temp = temp->node_next; +- } +- return(0); +-} +- + #endif /* CONFIG_DISCONTIGMEM */ +diff -urN vm-ref/mm/page_alloc.c vm-numa/mm/page_alloc.c +--- vm-ref/mm/page_alloc.c Thu Nov 1 20:05:09 2001 ++++ vm-numa/mm/page_alloc.c Thu Nov 1 20:05:26 2001 +@@ -220,14 +220,6 @@ + return NULL; + } + +-#ifndef CONFIG_DISCONTIGMEM +-struct page *_alloc_pages(unsigned int gfp_mask, unsigned int order) +-{ +- return __alloc_pages(gfp_mask, order, +- contig_page_data.node_zonelists+(gfp_mask & GFP_ZONEMASK)); +-} +-#endif +- + static struct page * FASTCALL(balance_classzone(zone_t *, unsigned int, unsigned int, int *)); + static struct page * balance_classzone(zone_t * classzone, unsigned int gfp_mask, unsigned int order, int * freed) + { +@@ -710,13 +702,41 @@ + /* + * Builds allocation fallback zone lists. + */ +-static inline void build_zonelists(pg_data_t *pgdat) ++static int __init build_zonelists_node(pg_data_t *pgdat, zonelist_t *zonelist, int j, int k) ++{ ++ switch (k) { ++ zone_t *zone; ++ default: ++ BUG(); ++ case ZONE_HIGHMEM: ++ zone = pgdat->node_zones + ZONE_HIGHMEM; ++ if (zone->size) { ++#ifndef CONFIG_HIGHMEM ++ BUG(); ++#endif ++ zonelist->zones[j++] = zone; ++ } ++ case ZONE_NORMAL: ++ zone = pgdat->node_zones + ZONE_NORMAL; ++ if (zone->size) ++ zonelist->zones[j++] = zone; ++ case ZONE_DMA: ++ zone = pgdat->node_zones + ZONE_DMA; ++ if (zone->size) ++ zonelist->zones[j++] = zone; ++ } ++ ++ return j; ++} ++ ++static void __init build_zonelists(pg_data_t *pgdat) + { +- int i, j, k; ++ int i, j, k, node, local_node; + ++ local_node = pgdat->node_id; ++ printk("Building zonelist for node : %d\n", local_node); + for (i = 0; i <= GFP_ZONEMASK; i++) { + zonelist_t *zonelist; +- zone_t *zone; + + zonelist = pgdat->node_zonelists + i; + memset(zonelist, 0, sizeof(*zonelist)); +@@ -728,33 +748,32 @@ + if (i & __GFP_DMA) + k = ZONE_DMA; + +- switch (k) { +- default: +- BUG(); +- /* +- * fallthrough: +- */ +- case ZONE_HIGHMEM: +- zone = pgdat->node_zones + ZONE_HIGHMEM; +- if (zone->size) { +-#ifndef CONFIG_HIGHMEM +- BUG(); +-#endif +- zonelist->zones[j++] = zone; +- } +- case ZONE_NORMAL: +- zone = pgdat->node_zones + ZONE_NORMAL; +- if (zone->size) +- zonelist->zones[j++] = zone; +- case ZONE_DMA: +- zone = pgdat->node_zones + ZONE_DMA; +- if (zone->size) +- zonelist->zones[j++] = zone; +- } ++ j = build_zonelists_node(pgdat, zonelist, j, k); ++ /* ++ * Now we build the zonelist so that it contains the zones ++ * of all the other nodes. ++ * We don't want to pressure a particular node, so when ++ * building the zones for node N, we make sure that the ++ * zones coming right after the local ones are those from ++ * node N+1 (modulo N) ++ */ ++ for (node = local_node + 1; node < numnodes; node++) ++ j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); ++ for (node = 0; node < local_node; node++) ++ j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); ++ + zonelist->zones[j++] = NULL; + } + } + ++void __init build_all_zonelists(void) ++{ ++ int i; ++ ++ for(i = 0 ; i < numnodes ; i++) ++ build_zonelists(NODE_DATA(i)); ++} ++ + #define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) + + /* +@@ -910,7 +929,6 @@ + (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size); + } + } +- build_zonelists(pgdat); + } + + void __init free_area_init(unsigned long *zones_size) diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-2 new file mode 100644 index 000000000000..75c5a94320df --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/20_share-timeslice-2 @@ -0,0 +1,41 @@ +diff -urN parent-timeslice/include/linux/sched.h child-first/include/linux/sched.h +--- parent-timeslice/include/linux/sched.h Thu May 3 18:17:56 2001 ++++ child-first/include/linux/sched.h Thu May 3 18:19:44 2001 +@@ -301,7 +301,7 @@ + * all fields in a single cacheline that are needed for + * the goodness() loop in schedule(). + */ +- int counter; ++ volatile int counter; + int nice; + unsigned int policy; + struct mm_struct *mm; +diff -urN parent-timeslice/kernel/fork.c child-first/kernel/fork.c +--- parent-timeslice/kernel/fork.c Thu May 3 18:18:31 2001 ++++ child-first/kernel/fork.c Thu May 3 18:20:40 2001 +@@ -665,15 +665,18 @@ + p->pdeath_signal = 0; + + /* +- * "share" dynamic priority between parent and child, thus the +- * total amount of dynamic priorities in the system doesnt change, +- * more scheduling fairness. This is only important in the first +- * timeslice, on the long run the scheduling behaviour is unchanged. ++ * Scheduling the child first is especially useful in avoiding a ++ * lot of copy-on-write faults if the child for a fork() just wants ++ * to do a few simple things and then exec(). + */ +- p->counter = (current->counter + 1) >> 1; +- current->counter >>= 1; +- if (!current->counter) ++ { ++ int counter = current->counter; ++ p->counter = (counter + 1) >> 1; ++ current->counter = counter >> 1; ++ p->policy &= ~SCHED_YIELD; ++ current->policy |= SCHED_YIELD; + current->need_resched = 1; ++ } + + /* Tell the parent if it can get back its timeslice when child exits */ + p->get_child_timeslice = 1; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2 new file mode 100644 index 000000000000..8fd2ad956df1 Binary files /dev/null and b/sys-kernel/linux-sources/files/2.4.15pre1aa1/50_uml-patch-2.4.13-5.bz2 differ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-5 new file mode 100644 index 000000000000..9e1df6cc2e1b --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/51_uml-ac-to-aa-5 @@ -0,0 +1,60 @@ +diff -urN uml-ref/arch/um/kernel/time.c uml/arch/um/kernel/time.c +--- uml-ref/arch/um/kernel/time.c Mon Jul 23 17:07:17 2001 ++++ uml/arch/um/kernel/time.c Mon Jul 23 17:08:36 2001 +@@ -16,7 +16,7 @@ + #include "user.h" + #include "process.h" + +-extern struct timeval xtime; ++extern volatile struct timeval xtime; + + void timer_handler(int sig, void *sc, int usermode) + { +diff -urN uml-ref/arch/um/kernel/trap_kern.c uml/arch/um/kernel/trap_kern.c +--- uml-ref/arch/um/kernel/trap_kern.c Mon Jul 23 17:07:17 2001 ++++ uml/arch/um/kernel/trap_kern.c Mon Jul 23 17:08:36 2001 +@@ -41,7 +41,7 @@ + if(!vma) ok = 0; + else if(vma->vm_start > address){ + if((vma->vm_flags & VM_STACK_FLAGS) != VM_STACK_FLAGS) ok = 0; +- else if(expand_stack(vma, address)) ok = 0; ++ else if(expand_stack(vma, address, NULL)) ok = 0; + } + if(!ok){ + if (current->thread.fault_catcher != NULL) { +diff -urN uml-ref/include/asm-um/rwsem.h uml/include/asm-um/rwsem.h +--- uml-ref/include/asm-um/rwsem.h Mon Jul 23 17:07:17 2001 ++++ uml/include/asm-um/rwsem.h Thu Jan 1 01:00:00 1970 +@@ -1,10 +0,0 @@ +-#ifndef __UM_RWSEM_H__ +-#define __UM_RWSEM_H__ +- +-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96) +-#define __builtin_expect(exp,c) (exp) +-#endif +- +-#include "asm/arch/rwsem.h" +- +-#endif +diff -urN uml-ref/include/asm-um/rwsem_xchgadd.h uml/include/asm-um/rwsem_xchgadd.h +--- uml-ref/include/asm-um/rwsem_xchgadd.h Thu Jan 1 01:00:00 1970 ++++ uml/include/asm-um/rwsem_xchgadd.h Mon Jul 23 17:08:36 2001 +@@ -0,0 +1,6 @@ ++#ifndef __UM_RWSEM_H__ ++#define __UM_RWSEM_H__ ++ ++#include "asm/arch/rwsem_xchgadd.h" ++ ++#endif +diff -urN uml-ref/include/asm-um/timex.h uml/include/asm-um/timex.h +--- uml-ref/include/asm-um/timex.h Mon Jul 23 17:07:17 2001 ++++ uml/include/asm-um/timex.h Mon Jul 23 17:08:36 2001 +@@ -12,4 +12,8 @@ + return 0; + } + ++typedef long last_schedule_t; ++#define get_last_schedule() ({ jiffies; }) ++#define last_schedule_before(a, b) ({ a < b; }) ++ + #endif diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-1 new file mode 100644 index 000000000000..21b64f13fa64 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/54_uml-sa_interrupt-1 @@ -0,0 +1,18 @@ +--- 2.4.10pre2aa3/arch/um/kernel/irq.c.~1~ Sat Sep 1 02:40:55 2001 ++++ 2.4.10pre2aa3/arch/um/kernel/irq.c Sat Sep 1 02:59:47 2001 +@@ -141,10 +141,12 @@ + + status = 1; /* Force the "do bottom halves" bit */ + +- if (!(action->flags & SA_INTERRUPT)) +- __sti(); +- + do { ++ if (!(action->flags & SA_INTERRUPT)) ++ __sti(); ++ else ++ __cli(); ++ + status |= action->flags; + action->handler(irq, action->dev_id, regs); + action = action->next; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-6 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-6 new file mode 100644 index 000000000000..9f2aa4a718a8 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-alloc-6 @@ -0,0 +1,55 @@ +diff -urN atomic-alloc-ref/fs/buffer.c atomic-alloc/fs/buffer.c +--- atomic-alloc-ref/fs/buffer.c Thu Sep 20 01:44:06 2001 ++++ atomic-alloc/fs/buffer.c Thu Sep 20 20:00:42 2001 +@@ -2613,7 +2613,7 @@ + spin_unlock(&free_list[index].lock); + write_unlock(&hash_table_lock); + spin_unlock(&lru_list_lock); +- if (gfp_mask & __GFP_IO) { ++ if (gfp_mask & __GFP_IO && !(current->flags & PF_ATOMICALLOC)) { + if ((gfp_mask & __GFP_HIGHIO) || !PageHighMem(page)) { + if (sync_page_buffers(bh, gfp_mask)) { + /* no IO or waiting next time */ +diff -urN atomic-alloc-ref/include/linux/sched.h atomic-alloc/include/linux/sched.h +--- atomic-alloc-ref/include/linux/sched.h Thu Sep 20 20:00:21 2001 ++++ atomic-alloc/include/linux/sched.h Thu Sep 20 20:01:06 2001 +@@ -408,18 +408,16 @@ + /* + * Per process flags + */ +-#define PF_ALIGNWARN 0x00000001 /* Print alignment warning msgs */ +- /* Not implemented yet, only for 486*/ +-#define PF_STARTING 0x00000002 /* being created */ +-#define PF_EXITING 0x00000004 /* getting shut down */ +-#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +-#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ +-#define PF_DUMPCORE 0x00000200 /* dumped core */ +-#define PF_SIGNALED 0x00000400 /* killed by a signal */ +-#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +-#define PF_FREE_PAGES 0x00002000 /* per process page freeing */ ++#define PF_EXITING (1UL<<0) /* getting shut down */ ++#define PF_FORKNOEXEC (1UL<<1) /* forked but didn't exec */ ++#define PF_SUPERPRIV (1UL<<2) /* used super-user privileges */ ++#define PF_DUMPCORE (1UL<<3) /* dumped core */ ++#define PF_SIGNALED (1UL<<4) /* killed by a signal */ ++#define PF_MEMALLOC (1UL<<5) /* Allocating memory */ ++#define PF_USEDFPU (1UL<<6) /* task used FPU this quantum (SMP) */ ++#define PF_ATOMICALLOC (1UL<<7) /* do not block during memalloc */ ++#define PF_FREE_PAGES (1UL<<8) /* per process page freeing */ + +-#define PF_USEDFPU 0x00100000 /* task used FPU this quantum (SMP) */ + + /* + * Ptrace flags +diff -urN atomic-alloc-ref/mm/slab.c atomic-alloc/mm/slab.c +--- atomic-alloc-ref/mm/slab.c Thu Sep 20 01:44:20 2001 ++++ atomic-alloc/mm/slab.c Thu Sep 20 20:00:42 2001 +@@ -1715,7 +1715,7 @@ + unsigned int scan; + int ret = 0; + +- if (gfp_mask & __GFP_WAIT) ++ if (gfp_mask & __GFP_WAIT && !(current->flags & PF_ATOMICALLOC)) + down(&cache_chain_sem); + else + if (down_trylock(&cache_chain_sem)) diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5 new file mode 100644 index 000000000000..d07cfb6d3b02 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_atomic-lookup-5 @@ -0,0 +1,137 @@ +diff -urN 2.4.15pre1/fs/namei.c atomic-lookup/fs/namei.c +--- 2.4.15pre1/fs/namei.c Wed Oct 24 08:04:22 2001 ++++ atomic-lookup/fs/namei.c Fri Nov 9 04:34:12 2001 +@@ -448,9 +448,13 @@ + { + struct dentry *dentry; + struct inode *inode; +- int err; ++ int err, atomic; + unsigned int lookup_flags = nd->flags; + ++ atomic = 0; ++ if (lookup_flags & LOOKUP_ATOMIC) ++ atomic = 1; ++ + while (*name=='/') + name++; + if (!*name) +@@ -519,6 +523,9 @@ + /* This does the actual lookups.. */ + dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE); + if (!dentry) { ++ err = -EWOULDBLOCKIO; ++ if (atomic) ++ break; + dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) +@@ -582,6 +589,9 @@ + } + dentry = cached_lookup(nd->dentry, &this, 0); + if (!dentry) { ++ err = -EWOULDBLOCKIO; ++ if (atomic) ++ break; + dentry = real_lookup(nd->dentry, &this, 0); + err = PTR_ERR(dentry); + if (IS_ERR(dentry)) +@@ -924,6 +934,8 @@ + + if (f & O_DIRECTORY) + retval |= LOOKUP_DIRECTORY; ++ if (f & O_ATOMICLOOKUP) ++ retval |= LOOKUP_ATOMIC; + + return retval; + } +diff -urN 2.4.15pre1/include/asm-alpha/fcntl.h atomic-lookup/include/asm-alpha/fcntl.h +--- 2.4.15pre1/include/asm-alpha/fcntl.h Sun Sep 23 21:11:40 2001 ++++ atomic-lookup/include/asm-alpha/fcntl.h Fri Nov 9 04:34:12 2001 +@@ -20,6 +20,7 @@ + #define O_DIRECTORY 0100000 /* must be a directory */ + #define O_NOFOLLOW 0200000 /* don't follow links */ + #define O_LARGEFILE 0400000 /* will be set by the kernel on every open */ ++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ + #define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */ + + #define F_DUPFD 0 /* dup */ +diff -urN 2.4.15pre1/include/asm-i386/fcntl.h atomic-lookup/include/asm-i386/fcntl.h +--- 2.4.15pre1/include/asm-i386/fcntl.h Sun Sep 23 21:11:40 2001 ++++ atomic-lookup/include/asm-i386/fcntl.h Fri Nov 9 04:34:12 2001 +@@ -20,6 +20,7 @@ + #define O_LARGEFILE 0100000 + #define O_DIRECTORY 0200000 /* must be a directory */ + #define O_NOFOLLOW 0400000 /* don't follow links */ ++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ + + #define F_DUPFD 0 /* dup */ + #define F_GETFD 1 /* get close_on_exec */ +diff -urN 2.4.15pre1/include/asm-ia64/fcntl.h atomic-lookup/include/asm-ia64/fcntl.h +--- 2.4.15pre1/include/asm-ia64/fcntl.h Thu Nov 16 15:37:42 2000 ++++ atomic-lookup/include/asm-ia64/fcntl.h Fri Nov 9 04:34:12 2001 +@@ -28,6 +28,7 @@ + #define O_LARGEFILE 0100000 + #define O_DIRECTORY 0200000 /* must be a directory */ + #define O_NOFOLLOW 0400000 /* don't follow links */ ++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ + + #define F_DUPFD 0 /* dup */ + #define F_GETFD 1 /* get close_on_exec */ +diff -urN 2.4.15pre1/include/asm-ppc/fcntl.h atomic-lookup/include/asm-ppc/fcntl.h +--- 2.4.15pre1/include/asm-ppc/fcntl.h Tue Nov 6 02:04:53 2001 ++++ atomic-lookup/include/asm-ppc/fcntl.h Fri Nov 9 04:34:42 2001 +@@ -23,6 +23,7 @@ + #define O_NOFOLLOW 0100000 /* don't follow links */ + #define O_LARGEFILE 0200000 + #define O_DIRECT 0400000 /* direct disk access hint */ ++#define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */ + + #define F_DUPFD 0 /* dup */ + #define F_GETFD 1 /* get close_on_exec */ +diff -urN 2.4.15pre1/include/asm-sparc/fcntl.h atomic-lookup/include/asm-sparc/fcntl.h +--- 2.4.15pre1/include/asm-sparc/fcntl.h Sun Sep 23 21:11:42 2001 ++++ atomic-lookup/include/asm-sparc/fcntl.h Fri Nov 9 04:34:12 2001 +@@ -20,6 +20,7 @@ + #define O_DIRECTORY 0x10000 /* must be a directory */ + #define O_NOFOLLOW 0x20000 /* don't follow links */ + #define O_LARGEFILE 0x40000 ++#define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */ + #define O_DIRECT 0x100000 /* direct disk access hint */ + + #define F_DUPFD 0 /* dup */ +diff -urN 2.4.15pre1/include/asm-sparc64/fcntl.h atomic-lookup/include/asm-sparc64/fcntl.h +--- 2.4.15pre1/include/asm-sparc64/fcntl.h Sun Sep 23 21:11:42 2001 ++++ atomic-lookup/include/asm-sparc64/fcntl.h Fri Nov 9 04:34:12 2001 +@@ -20,6 +20,7 @@ + #define O_DIRECTORY 0x10000 /* must be a directory */ + #define O_NOFOLLOW 0x20000 /* don't follow links */ + #define O_LARGEFILE 0x40000 ++#define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */ + #define O_DIRECT 0x100000 /* direct disk access hint */ + + +diff -urN 2.4.15pre1/include/linux/errno.h atomic-lookup/include/linux/errno.h +--- 2.4.15pre1/include/linux/errno.h Fri Aug 17 05:02:27 2001 ++++ atomic-lookup/include/linux/errno.h Fri Nov 9 04:34:12 2001 +@@ -21,6 +21,9 @@ + #define EBADTYPE 527 /* Type not supported by server */ + #define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */ + ++/* Defined for TUX async IO */ ++#define EWOULDBLOCKIO 530 /* Would block due to block-IO */ ++ + #endif + + #endif +diff -urN 2.4.15pre1/include/linux/fs.h atomic-lookup/include/linux/fs.h +--- 2.4.15pre1/include/linux/fs.h Tue Nov 6 02:04:53 2001 ++++ atomic-lookup/include/linux/fs.h Fri Nov 9 04:34:12 2001 +@@ -1260,6 +1260,7 @@ + #define LOOKUP_POSITIVE (8) + #define LOOKUP_PARENT (16) + #define LOOKUP_NOALT (32) ++#define LOOKUP_ATOMIC (64) + /* + * Type of the last component on LOOKUP_PARENT + */ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1 new file mode 100644 index 000000000000..fee75a43d12a --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_net-exports-1 @@ -0,0 +1,183 @@ +diff -urN ref/include/linux/socket.h netexports/include/linux/socket.h +--- ref/include/linux/socket.h Sat Apr 28 20:29:47 2001 ++++ netexports/include/linux/socket.h Sun Apr 29 17:28:53 2001 +@@ -254,6 +254,11 @@ + extern int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen); + extern int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr); + extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); ++struct socket; ++struct file * sock_map_file(struct socket *sock); ++extern int sock_map_fd(struct socket *sock); ++extern struct socket *sockfd_lookup(int fd, int *err); ++ + #endif + #endif /* not kernel and not glibc */ + #endif /* _LINUX_SOCKET_H */ +diff -urN ref/include/net/tcp.h netexports/include/net/tcp.h +--- ref/include/net/tcp.h Sat Apr 28 20:34:59 2001 ++++ netexports/include/net/tcp.h Sun Apr 29 17:28:53 2001 +@@ -810,6 +810,7 @@ + extern void tcp_push_one(struct sock *, unsigned mss_now); + extern void tcp_send_ack(struct sock *sk); + extern void tcp_send_delayed_ack(struct sock *sk); ++extern void cleanup_rbuf(struct sock *sk, int copied); + + /* tcp_timer.c */ + extern void tcp_init_xmit_timers(struct sock *); +diff -urN ref/net/ipv4/tcp.c netexports/net/ipv4/tcp.c +--- ref/net/ipv4/tcp.c Sat Apr 28 05:24:49 2001 ++++ netexports/net/ipv4/tcp.c Sun Apr 29 17:28:53 2001 +@@ -1270,7 +1270,7 @@ + * calculation of whether or not we must ACK for the sake of + * a window update. + */ +-static void cleanup_rbuf(struct sock *sk, int copied) ++void cleanup_rbuf(struct sock *sk, int copied) + { + struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp); + int time_to_ack = 0; +diff -urN ref/net/netsyms.c netexports/net/netsyms.c +--- ref/net/netsyms.c Sat Apr 28 05:24:49 2001 ++++ netexports/net/netsyms.c Sun Apr 29 17:28:53 2001 +@@ -106,6 +106,8 @@ + EXPORT_SYMBOL(sock_create); + EXPORT_SYMBOL(sock_alloc); + EXPORT_SYMBOL(sock_release); ++EXPORT_SYMBOL(sock_map_fd); ++EXPORT_SYMBOL(sockfd_lookup); + EXPORT_SYMBOL(sock_setsockopt); + EXPORT_SYMBOL(sock_getsockopt); + EXPORT_SYMBOL(sock_sendmsg); +@@ -307,6 +309,7 @@ + EXPORT_SYMBOL(memcpy_fromiovecend); + EXPORT_SYMBOL(csum_partial_copy_fromiovecend); + EXPORT_SYMBOL(tcp_v4_lookup_listener); ++EXPORT_SYMBOL(cleanup_rbuf); + /* UDP/TCP exported functions for TCPv6 */ + EXPORT_SYMBOL(udp_ioctl); + EXPORT_SYMBOL(udp_connect); +@@ -324,6 +327,7 @@ + EXPORT_SYMBOL(tcp_getsockopt); + EXPORT_SYMBOL(tcp_recvmsg); + EXPORT_SYMBOL(tcp_send_synack); ++EXPORT_SYMBOL(tcp_send_skb); + EXPORT_SYMBOL(tcp_check_req); + EXPORT_SYMBOL(tcp_child_process); + EXPORT_SYMBOL(tcp_parse_options); +diff -urN ref/net/socket.c netexports/net/socket.c +--- ref/net/socket.c Sat Apr 28 05:24:50 2001 ++++ netexports/net/socket.c Sun Apr 29 17:28:53 2001 +@@ -114,7 +114,7 @@ + * in the operation structures but are done directly via the socketcall() multiplexor. + */ + +-static struct file_operations socket_file_ops = { ++struct file_operations socket_file_ops = { + llseek: sock_lseek, + read: sock_read, + write: sock_write, +@@ -330,51 +330,62 @@ + * but we take care of internal coherence yet. + */ + +-static int sock_map_fd(struct socket *sock) ++struct file * sock_map_file(struct socket *sock) + { +- int fd; ++ struct file *file; + struct qstr this; + char name[32]; + ++ file = get_empty_filp(); ++ ++ if (!file) ++ return ERR_PTR(-ENFILE); ++ ++ sprintf(name, "[%lu]", sock->inode->i_ino); ++ this.name = name; ++ this.len = strlen(name); ++ this.hash = sock->inode->i_ino; ++ ++ file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); ++ if (!file->f_dentry) { ++ put_filp(file); ++ return ERR_PTR(-ENOMEM); ++ } ++ file->f_dentry->d_op = &sockfs_dentry_operations; ++ d_add(file->f_dentry, sock->inode); ++ file->f_vfsmnt = mntget(sock_mnt); ++ ++ if (sock->file) ++ BUG(); ++ sock->file = file; ++ file->f_op = sock->inode->i_fop = &socket_file_ops; ++ file->f_mode = 3; ++ file->f_flags = O_RDWR; ++ file->f_pos = 0; ++ ++ return file; ++} ++ ++int sock_map_fd(struct socket *sock) ++{ ++ int fd; ++ struct file *file; ++ + /* + * Find a file descriptor suitable for return to the user. + */ + + fd = get_unused_fd(); +- if (fd >= 0) { +- struct file *file = get_empty_filp(); +- +- if (!file) { +- put_unused_fd(fd); +- fd = -ENFILE; +- goto out; +- } ++ if (fd < 0) ++ return fd; + +- sprintf(name, "[%lu]", sock->inode->i_ino); +- this.name = name; +- this.len = strlen(name); +- this.hash = sock->inode->i_ino; +- +- file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this); +- if (!file->f_dentry) { +- put_filp(file); +- put_unused_fd(fd); +- fd = -ENOMEM; +- goto out; +- } +- file->f_dentry->d_op = &sockfs_dentry_operations; +- d_add(file->f_dentry, sock->inode); +- file->f_vfsmnt = mntget(sock_mnt); +- +- sock->file = file; +- file->f_op = sock->inode->i_fop = &socket_file_ops; +- file->f_mode = 3; +- file->f_flags = O_RDWR; +- file->f_pos = 0; +- fd_install(fd, file); ++ file = sock_map_file(sock); ++ if (IS_ERR(file)) { ++ put_unused_fd(fd); ++ return PTR_ERR(file); + } ++ fd_install(fd, file); + +-out: + return fd; + } + +@@ -801,6 +812,8 @@ + } + + out: ++ if (sock->sk != sk) ++ BUG(); + release_sock(sock->sk); + return 0; + } diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-3 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-3 new file mode 100644 index 000000000000..e5b2e9b6264a --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_pagecache-atomic-3 @@ -0,0 +1,69 @@ +diff -urN pagecache-atomic-ref/include/linux/fs.h pagecache-atomic/include/linux/fs.h +--- pagecache-atomic-ref/include/linux/fs.h Mon Aug 13 03:21:42 2001 ++++ pagecache-atomic/include/linux/fs.h Mon Aug 13 03:22:41 2001 +@@ -1370,7 +1370,9 @@ + extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); + extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); + extern ssize_t generic_file_write(struct file *, const char *, size_t, loff_t *); +-extern void do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t); ++extern void __do_generic_file_read(struct file *, loff_t *, read_descriptor_t *, read_actor_t, int); ++#define do_generic_file_read(filp, ppos, desc, actor) __do_generic_file_read(filp, ppos, desc, actor, 0) ++#define do_generic_file_read_atomic(filp, ppos, desc, actor) __do_generic_file_read(filp, ppos, desc, actor, 1) + extern loff_t no_llseek(struct file *file, loff_t offset, int origin); + extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); + extern ssize_t generic_read_dir(struct file *, char *, size_t, loff_t *); +diff -urN pagecache-atomic-ref/kernel/ksyms.c pagecache-atomic/kernel/ksyms.c +--- pagecache-atomic-ref/kernel/ksyms.c Mon Aug 13 03:21:42 2001 ++++ pagecache-atomic/kernel/ksyms.c Mon Aug 13 03:21:54 2001 +@@ -208,7 +208,7 @@ + EXPORT_SYMBOL(generic_block_bmap); + EXPORT_SYMBOL(waitfor_one_page); + EXPORT_SYMBOL(generic_file_read); +-EXPORT_SYMBOL(do_generic_file_read); ++EXPORT_SYMBOL(__do_generic_file_read); + EXPORT_SYMBOL(generic_file_write); + EXPORT_SYMBOL(generic_direct_IO); + EXPORT_SYMBOL(generic_file_mmap); +diff -urN pagecache-atomic-ref/mm/filemap.c pagecache-atomic/mm/filemap.c +--- pagecache-atomic-ref/mm/filemap.c Mon Aug 13 03:21:42 2001 ++++ pagecache-atomic/mm/filemap.c Mon Aug 13 03:21:54 2001 +@@ -1128,7 +1128,7 @@ + * This is really ugly. But the goto's actually try to clarify some + * of the logic when it comes to error handling etc. + */ +-void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor) ++void __do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * desc, read_actor_t actor, int nonblock) + { + struct inode *inode = filp->f_dentry->d_inode; + struct address_space *mapping = inode->i_mapping; +@@ -1211,9 +1211,16 @@ + page_cache_get(page); + spin_unlock(&pagecache_lock); + +- if (!Page_Uptodate(page)) ++ if (!Page_Uptodate(page)) { ++ if (nonblock) { ++ page_cache_release(page); ++ desc->error = -EWOULDBLOCKIO; ++ break; ++ } + goto page_not_up_to_date; +- generic_file_readahead(reada_ok, filp, inode, page); ++ } ++ if (!nonblock) ++ generic_file_readahead(reada_ok, filp, inode, page); + page_ok: + /* If users can be writing to this page using arbitrary + * virtual addresses, take care about potential aliasing +@@ -1290,6 +1297,11 @@ + break; + + no_cached_page: ++ if (nonblock) { ++ spin_unlock(&pagecache_lock); ++ desc->error = -EWOULDBLOCKIO; ++ break; ++ } + /* + * Ok, it wasn't cached, so we need to create a new + * page.. diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2 new file mode 100644 index 000000000000..4625013db74c Binary files /dev/null and b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-2.4.13-ac5-B0.bz2 differ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-1 new file mode 100644 index 000000000000..13692e77c87a --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-config-stuff-1 @@ -0,0 +1,22 @@ +diff -rNu linux-2.4.9-ac5/net/Config.in linux/net/Config.in +--- linux-2.4.9-ac5/net/Config.in Tue Aug 21 14:26:10 2001 ++++ linux/net/Config.in Fri Aug 31 17:36:42 2001 +@@ -20,6 +20,7 @@ + tristate 'Unix domain sockets' CONFIG_UNIX + bool 'TCP/IP networking' CONFIG_INET + if [ "$CONFIG_INET" = "y" ]; then ++ source net/tux/Config.in + source net/ipv4/Config.in + if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then + # IPv6 as module will cause a CRASH if you try to unload it +diff -rNu linux-2.4.9-ac5/net/Makefile linux/net/Makefile +--- linux-2.4.9-ac5/net/Makefile Tue Aug 21 14:26:19 2001 ++++ linux/net/Makefile Fri Aug 31 17:36:42 2001 +@@ -26,6 +26,7 @@ + endif + endif + ++subdir-$(CONFIG_TUX) += tux + subdir-$(CONFIG_KHTTPD) += khttpd + subdir-$(CONFIG_NETLINK) += netlink + subdir-$(CONFIG_PACKET) += packet diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-1 new file mode 100644 index 000000000000..01b63bd39dc3 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-create_child-1 @@ -0,0 +1,23 @@ +diff -rNu linux-2.4.9-ac10/include/net/sock.h linux/include/net/sock.h +--- linux-2.4.9-ac10/include/net/sock.h Sat Sep 8 16:35:03 2001 ++++ linux/include/net/sock.h Sat Sep 22 09:49:09 2001 +@@ -677,6 +677,7 @@ + + int (*backlog_rcv) (struct sock *sk, + struct sk_buff *skb); ++ void (*create_child)(struct sock *sk, struct sock *newsk); + void (*destruct)(struct sock *sk); + }; + +diff -rNu linux-2.4.9-ac10/net/ipv4/tcp_minisocks.c linux/net/ipv4/tcp_minisocks.c +--- linux-2.4.9-ac10/net/ipv4/tcp_minisocks.c Sat Sep 8 16:35:04 2001 ++++ linux/net/ipv4/tcp_minisocks.c Sat Sep 22 09:49:09 2001 +@@ -682,6 +682,8 @@ + if ((filter = newsk->filter) != NULL) + sk_filter_charge(newsk, filter); + #endif ++ if (sk->create_child) ++ sk->create_child(sk, newsk); + + /* Now setup tcp_opt */ + newtp = &(newsk->tp_pinfo.af_tcp); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-1 new file mode 100644 index 000000000000..07f7287a6a9f --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-data-1 @@ -0,0 +1,12 @@ +--- tuxlayerprivate/include/net/sock.h.~1~ Sat Apr 28 20:32:17 2001 ++++ tuxlayerprivate/include/net/sock.h Sun Apr 29 16:44:42 2001 +@@ -669,6 +669,9 @@ + /* RPC layer private data */ + void *user_data; + ++ /* TUX application layer private data */ ++ void *tux_data; ++ + /* Callbacks */ + void (*state_change)(struct sock *sk); + void (*data_ready)(struct sock *sk,int bytes); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-2 new file mode 100644 index 000000000000..dc05bb6d9b6b --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-dprintk-2 @@ -0,0 +1,40 @@ +diff -urN 2.4.10pre14/net/netsyms.c tux-dprintk/net/netsyms.c +--- 2.4.10pre14/net/netsyms.c Sat Sep 22 08:06:26 2001 ++++ tux-dprintk/net/netsyms.c Sat Sep 22 11:07:38 2001 +@@ -24,6 +24,7 @@ + #include + #include + #include ++#include + #ifdef CONFIG_HIPPI + #include + #endif +@@ -574,3 +575,7 @@ + EXPORT_SYMBOL(softnet_data); + + #endif /* CONFIG_NET */ ++ ++EXPORT_SYMBOL(tux_Dprintk); ++EXPORT_SYMBOL(tux_TDprintk); ++ +diff -urN 2.4.10pre14/net/socket.c tux-dprintk/net/socket.c +--- 2.4.10pre14/net/socket.c Sat Sep 22 08:06:26 2001 ++++ tux-dprintk/net/socket.c Sat Sep 22 11:08:27 2001 +@@ -85,6 +85,7 @@ + #include + #include + #include ++#include + #include + + static int sock_no_open(struct inode *irrelevant, struct file *dontcare); +@@ -1738,6 +1739,9 @@ + bluez_init(); + #endif + } ++ ++int tux_Dprintk; ++int tux_TDprintk = 1; + + int socket_get_info(char *buffer, char **start, off_t offset, int length) + { diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1 new file mode 100644 index 000000000000..67061fc3cc79 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-exports-1 @@ -0,0 +1,122 @@ +diff -urN 2.4.4/arch/alpha/kernel/alpha_ksyms.c exports/arch/alpha/kernel/alpha_ksyms.c +--- 2.4.4/arch/alpha/kernel/alpha_ksyms.c Sat Apr 28 05:24:29 2001 ++++ exports/arch/alpha/kernel/alpha_ksyms.c Sun Apr 29 18:55:56 2001 +@@ -127,15 +127,11 @@ + /* In-kernel system calls. */ + EXPORT_SYMBOL(kernel_thread); + EXPORT_SYMBOL(sys_open); +-EXPORT_SYMBOL(sys_dup); + EXPORT_SYMBOL(sys_exit); +-EXPORT_SYMBOL(sys_write); +-EXPORT_SYMBOL(sys_read); + EXPORT_SYMBOL(sys_lseek); + EXPORT_SYMBOL(__kernel_execve); + EXPORT_SYMBOL(sys_setsid); + EXPORT_SYMBOL(sys_sync); +-EXPORT_SYMBOL(sys_wait4); + + /* Networking helper routines. */ + EXPORT_SYMBOL(csum_tcpudp_magic); +diff -urN 2.4.4/include/asm-alpha/unistd.h exports/include/asm-alpha/unistd.h +--- 2.4.4/include/asm-alpha/unistd.h Sun Apr 1 20:11:24 2001 ++++ exports/include/asm-alpha/unistd.h Sun Apr 29 18:55:53 2001 +@@ -515,7 +515,7 @@ + return sys_open(name, mode, flags); + } + +-extern long sys_dup(int); ++extern long sys_dup(unsigned int); + static inline long dup(int fd) + { + return sys_dup(fd); +@@ -540,13 +540,11 @@ + + #define exit(x) _exit(x) + +-extern long sys_write(int, const char *, int); + static inline long write(int fd, const char * buf, int nr) + { + return sys_write(fd, buf, nr); + } + +-extern long sys_read(int, char *, int); + static inline long read(int fd, char * buf, int nr) + { + return sys_read(fd, buf, nr); +diff -urN 2.4.4/include/linux/fs.h exports/include/linux/fs.h +--- 2.4.4/include/linux/fs.h Sat Apr 28 05:24:47 2001 ++++ exports/include/linux/fs.h Sun Apr 29 18:55:53 2001 +@@ -554,6 +554,14 @@ + + extern int fcntl_getlk(unsigned int, struct flock *); + extern int fcntl_setlk(unsigned int, unsigned int, struct flock *); ++extern asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg); ++extern asmlinkage long sys_dup(unsigned int fildes); ++extern asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd); ++extern asmlinkage ssize_t sys_read(unsigned int fd, char * buf, size_t count); ++extern asmlinkage ssize_t sys_write(unsigned int fd, const char * buf, size_t count); ++extern asmlinkage long sys_chroot(const char * filename); ++extern asmlinkage long sys_chdir(const char * filename); ++ + + extern int fcntl_getlk64(unsigned int, struct flock64 *); + extern int fcntl_setlk64(unsigned int, unsigned int, struct flock64 *); +diff -urN 2.4.4/kernel/ksyms.c exports/kernel/ksyms.c +--- 2.4.4/kernel/ksyms.c Sat Apr 28 05:24:48 2001 ++++ exports/kernel/ksyms.c Sun Apr 29 18:55:53 2001 +@@ -9,6 +9,7 @@ + * by Bjorn Ekwall + */ + ++#define __KERNEL_SYSCALLS__ + #include + #include + #include +@@ -48,6 +49,8 @@ + #include + #include + #include ++#include ++ + + #if defined(CONFIG_PROC_FS) + #include +@@ -149,6 +152,13 @@ + EXPORT_SYMBOL(lookup_one); + EXPORT_SYMBOL(lookup_hash); + EXPORT_SYMBOL(sys_close); ++EXPORT_SYMBOL(sys_read); ++EXPORT_SYMBOL(sys_write); ++EXPORT_SYMBOL(sys_dup); ++EXPORT_SYMBOL(sys_chroot); ++EXPORT_SYMBOL(sys_chdir); ++EXPORT_SYMBOL(sys_fcntl); ++EXPORT_SYMBOL(do_pipe); + EXPORT_SYMBOL(dcache_lock); + EXPORT_SYMBOL(d_alloc_root); + EXPORT_SYMBOL(d_delete); +@@ -176,6 +186,7 @@ + EXPORT_SYMBOL(invalidate_inodes); + EXPORT_SYMBOL(invalidate_inode_pages); + EXPORT_SYMBOL(truncate_inode_pages); ++EXPORT_SYMBOL(invalidate_inode_pages2); + EXPORT_SYMBOL(fsync_dev); + EXPORT_SYMBOL(permission); + EXPORT_SYMBOL(vfs_permission); +@@ -358,6 +369,8 @@ + EXPORT_SYMBOL(add_wait_queue_exclusive); + EXPORT_SYMBOL(remove_wait_queue); + ++EXPORT_SYMBOL(flush_signal_handlers); ++ + /* The notion of irq probe/assignment is foreign to S/390 */ + + #if !defined(CONFIG_ARCH_S390) +@@ -427,6 +440,7 @@ + EXPORT_SYMBOL(interruptible_sleep_on_timeout); + EXPORT_SYMBOL(schedule); + EXPORT_SYMBOL(schedule_timeout); ++EXPORT_SYMBOL(sys_wait4); + EXPORT_SYMBOL(jiffies); + EXPORT_SYMBOL(xtime); + EXPORT_SYMBOL(do_gettimeofday); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2 new file mode 100644 index 000000000000..e4edc9d743e6 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-kstat-2 @@ -0,0 +1,136 @@ +diff -urN 2.4.5pre5/fs/proc/proc_misc.c tux-kstat/fs/proc/proc_misc.c +--- 2.4.5pre5/fs/proc/proc_misc.c Tue May 1 19:35:29 2001 ++++ tux-kstat/fs/proc/proc_misc.c Wed May 23 19:07:26 2001 +@@ -259,6 +259,66 @@ + } + #endif + ++ ++/* ++ * print out TUX internal statistics into /proc/stat. ++ * (Most of them are not maintained if CONFIG_TUX_DEBUG is off.) ++ */ ++ ++static int print_tux_procinfo (char *page) ++{ ++ unsigned int len = 0, i; ++ ++#define P(x) \ ++ do { len += sprintf(page + len, #x ": %u\n", x); } while(0) ++ ++ P(kstat.input_fastpath); ++ P(kstat.input_slowpath); ++ P(kstat.inputqueue_got_packet); ++ P(kstat.inputqueue_no_packet); ++ P(kstat.nr_keepalive_optimized); ++ P(kstat.parse_static_incomplete); ++ P(kstat.parse_static_redirect); ++ P(kstat.parse_static_cachemiss); ++ P(kstat.parse_static_nooutput); ++ P(kstat.parse_static_normal); ++ P(kstat.parse_dynamic_incomplete); ++ P(kstat.parse_dynamic_redirect); ++ P(kstat.parse_dynamic_cachemiss); ++ P(kstat.parse_dynamic_nooutput); ++ P(kstat.parse_dynamic_normal); ++ P(kstat.complete_parsing); ++ P(kstat.nr_free_pending); ++ P(kstat.nr_allocated); ++ P(kstat.nr_idle_input_pending); ++ P(kstat.nr_output_space_pending); ++ P(kstat.nr_input_pending); ++ P(kstat.nr_cachemiss_pending); ++ P(kstat.nr_secondary_pending); ++ P(kstat.nr_output_pending); ++ P(kstat.nr_redirect_pending); ++ P(kstat.nr_finish_pending); ++ P(kstat.nr_userspace_pending); ++ P(kstat.nr_postpone_pending); ++ P(kstat.static_lookup_cachemisses); ++ P(kstat.static_sendfile_cachemisses); ++ P(kstat.user_lookup_cachemisses); ++ P(kstat.user_fetch_cachemisses); ++ P(kstat.user_sendobject_cachemisses); ++ P(kstat.user_sendobject_write_misses); ++ P(kstat.nr_keepalive_reqs); ++ P(kstat.nr_nonkeepalive_reqs); ++ ++ len += sprintf(page + len, "keephist: "); ++ for (i = 0; i < KEEPALIVE_HIST_SIZE; i++) ++ if (kstat.keepalive_hist[i]) ++ len += sprintf(page + len, "%d(%d) ", ++ i, kstat.keepalive_hist[i]); ++ len += sprintf(page + len, "\n"); ++#undef P ++ ++ return len; ++} + static int kstat_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) + { +@@ -333,6 +393,8 @@ + kstat.context_swtch, + xtime.tv_sec - jif / HZ, + total_forks); ++ ++ len += print_tux_procinfo(page+len); + + return proc_calc_metrics(page, start, off, count, eof, len); + } +diff -urN 2.4.5pre5/include/linux/kernel_stat.h tux-kstat/include/linux/kernel_stat.h +--- 2.4.5pre5/include/linux/kernel_stat.h Tue May 15 21:40:17 2001 ++++ tux-kstat/include/linux/kernel_stat.h Wed May 23 19:06:38 2001 +@@ -33,6 +33,53 @@ + unsigned int ierrors, oerrors; + unsigned int collisions; + unsigned int context_swtch; ++ unsigned int context_swtch_cross; ++ unsigned int nr_free_pending; ++ unsigned int nr_allocated; ++ unsigned int nr_idle_input_pending; ++ unsigned int nr_output_space_pending; ++ unsigned int nr_work_pending; ++ unsigned int nr_input_pending; ++ unsigned int nr_cachemiss_pending; ++ unsigned int nr_secondary_pending; ++ unsigned int nr_output_pending; ++ unsigned int nr_redirect_pending; ++ unsigned int nr_postpone_pending; ++ unsigned int nr_finish_pending; ++ unsigned int nr_userspace_pending; ++ unsigned int static_lookup_cachemisses; ++ unsigned int static_sendfile_cachemisses; ++ unsigned int user_lookup_cachemisses; ++ unsigned int user_fetch_cachemisses; ++ unsigned int user_sendobject_cachemisses; ++ unsigned int user_sendobject_write_misses; ++ unsigned int user_sendbuf_cachemisses; ++ unsigned int user_sendbuf_write_misses; ++#define URL_HIST_SIZE 1000 ++ unsigned int url_hist_hits[URL_HIST_SIZE]; ++ unsigned int url_hist_misses[URL_HIST_SIZE]; ++ unsigned int input_fastpath; ++ unsigned int input_slowpath; ++ unsigned int inputqueue_got_packet; ++ unsigned int inputqueue_no_packet; ++ unsigned int nr_keepalive_optimized; ++ ++ unsigned int parse_static_incomplete; ++ unsigned int parse_static_redirect; ++ unsigned int parse_static_cachemiss; ++ unsigned int parse_static_nooutput; ++ unsigned int parse_static_normal; ++ unsigned int parse_dynamic_incomplete; ++ unsigned int parse_dynamic_redirect; ++ unsigned int parse_dynamic_cachemiss; ++ unsigned int parse_dynamic_nooutput; ++ unsigned int parse_dynamic_normal; ++ unsigned int complete_parsing; ++ ++ unsigned int nr_keepalive_reqs; ++ unsigned int nr_nonkeepalive_reqs; ++#define KEEPALIVE_HIST_SIZE 100 ++ unsigned int keepalive_hist[KEEPALIVE_HIST_SIZE]; + }; + + extern struct kernel_stat kstat; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-1 new file mode 100644 index 000000000000..2284d3d4f5b9 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-process-1 @@ -0,0 +1,42 @@ +diff -urN ref/include/linux/sched.h tuxsched/include/linux/sched.h +--- ref/include/linux/sched.h Sat Apr 28 20:29:48 2001 ++++ tuxsched/include/linux/sched.h Sun Apr 29 17:31:18 2001 +@@ -396,6 +396,10 @@ + int (*notifier)(void *priv); + void *notifier_data; + sigset_t *notifier_mask; ++ ++ /* TUX state */ ++ void *tux_info; ++ void (*tux_exit)(void); + + /* Thread group tracking */ + u32 parent_exec_id; +diff -urN ref/kernel/exit.c tuxsched/kernel/exit.c +--- ref/kernel/exit.c Sat Apr 28 18:37:45 2001 ++++ tuxsched/kernel/exit.c Sun Apr 29 17:30:54 2001 +@@ -439,6 +439,13 @@ + #ifdef CONFIG_BSD_PROCESS_ACCT + acct_process(code); + #endif ++ if (current->tux_info) { ++#ifdef CONFIG_TUX_DEBUG ++ printk("Possibly unexpected TUX-thread exit(%ld) at %p?\n", ++ code, __builtin_return_address(0)); ++#endif ++ current->tux_exit(); ++ } + __exit_mm(tsk); + + lock_kernel(); +diff -urN ref/kernel/fork.c tuxsched/kernel/fork.c +--- ref/kernel/fork.c Sat Apr 28 18:37:45 2001 ++++ tuxsched/kernel/fork.c Sun Apr 29 17:30:54 2001 +@@ -574,6 +574,7 @@ + goto fork_out; + + *p = *current; ++ p->tux_info = NULL; + + retval = -EAGAIN; + if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur) diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2 new file mode 100644 index 000000000000..040043331c7b --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-syscall-2 @@ -0,0 +1,113 @@ +diff -urN 2.4.11pre2/arch/alpha/kernel/entry.S tux-syscall/arch/alpha/kernel/entry.S +--- 2.4.11pre2/arch/alpha/kernel/entry.S Sat Aug 11 08:03:53 2001 ++++ tux-syscall/arch/alpha/kernel/entry.S Tue Oct 2 23:58:24 2001 +@@ -988,7 +988,15 @@ + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 220 */ + .quad alpha_ni_syscall ++#ifdef CONFIG_TUX ++ .quad __sys_tux ++#else ++# ifdef CONFIG_TUX_MODULE ++ .quad sys_tux ++# else + .quad alpha_ni_syscall ++# endif ++#endif + .quad alpha_ni_syscall + .quad alpha_ni_syscall + .quad alpha_ni_syscall /* 225 */ +diff -urN 2.4.11pre2/arch/i386/kernel/entry.S tux-syscall/arch/i386/kernel/entry.S +--- 2.4.11pre2/arch/i386/kernel/entry.S Tue Oct 2 00:08:30 2001 ++++ tux-syscall/arch/i386/kernel/entry.S Tue Oct 2 23:58:56 2001 +@@ -619,7 +619,15 @@ + .long SYMBOL_NAME(sys_madvise) + .long SYMBOL_NAME(sys_getdents64) /* 220 */ + .long SYMBOL_NAME(sys_fcntl64) ++#ifdef CONFIG_TUX ++ .long SYMBOL_NAME(__sys_tux) ++#else ++# ifdef CONFIG_TUX_MODULE ++ .long SYMBOL_NAME(sys_tux) ++# else + .long SYMBOL_NAME(sys_ni_syscall) /* reserved for TUX */ ++# endif ++#endif + .long SYMBOL_NAME(sys_ni_syscall) /* Reserved for Security */ + + .rept NR_syscalls-(.-sys_call_table)/4 +diff -urN 2.4.11pre2/net/netsyms.c tux-syscall/net/netsyms.c +--- 2.4.11pre2/net/netsyms.c Sun Sep 23 21:11:43 2001 ++++ tux-syscall/net/netsyms.c Tue Oct 2 23:58:24 2001 +@@ -55,7 +55,7 @@ + + extern struct net_proto_family inet_family_ops; + +-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) ++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_TUX) || defined (CONFIG_TUX_MODULE) + #include + #include + #include +@@ -285,7 +285,7 @@ + EXPORT_SYMBOL(register_inet6addr_notifier); + EXPORT_SYMBOL(unregister_inet6addr_notifier); + #endif +-#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) ++#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_TUX) || defined (CONFIG_TUX_MODULE) + /* inet functions common to v4 and v6 */ + EXPORT_SYMBOL(inet_release); + EXPORT_SYMBOL(inet_stream_connect); +@@ -572,5 +572,11 @@ + + EXPORT_SYMBOL(net_call_rx_atomic); + EXPORT_SYMBOL(softnet_data); ++ ++#ifdef CONFIG_TUX_MODULE ++EXPORT_SYMBOL(tux_module_lock); ++EXPORT_SYMBOL(tux_module); ++EXPORT_SYMBOL(sys_tux_ptr); ++#endif + + #endif /* CONFIG_NET */ +diff -urN 2.4.11pre2/net/socket.c tux-syscall/net/socket.c +--- 2.4.11pre2/net/socket.c Tue Oct 2 00:08:46 2001 ++++ tux-syscall/net/socket.c Tue Oct 2 23:58:24 2001 +@@ -1764,3 +1764,38 @@ + len = 0; + return len; + } ++ ++#ifdef CONFIG_TUX_MODULE ++ ++int (*sys_tux_ptr) (unsigned int action, user_req_t *u_info) = NULL; ++ ++struct module *tux_module = NULL; ++spinlock_t tux_module_lock = SPIN_LOCK_UNLOCKED; ++ ++asmlinkage int sys_tux (unsigned int action, user_req_t *u_info) ++{ ++ int ret; ++ ++ if (current->tux_info) ++ return sys_tux_ptr(action, u_info); ++ ++ ret = -ENOSYS; ++ spin_lock(&tux_module_lock); ++ if (!tux_module) ++ goto out_unlock; ++ __MOD_INC_USE_COUNT(tux_module); ++ spin_unlock(&tux_module_lock); ++ ++ if (!sys_tux_ptr) ++ TUX_BUG(); ++ ret = sys_tux_ptr(action, u_info); ++ ++ spin_lock(&tux_module_lock); ++ __MOD_DEC_USE_COUNT(tux_module); ++out_unlock: ++ spin_unlock(&tux_module_lock); ++ ++ return ret; ++} ++ ++#endif diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-2 new file mode 100644 index 000000000000..26b98e71b68a --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-sysctl-2 @@ -0,0 +1,69 @@ +diff -urN 2.4.5pre5/include/linux/sysctl.h tux-sysctl/include/linux/sysctl.h +--- 2.4.5pre5/include/linux/sysctl.h Tue May 22 22:04:27 2001 ++++ tux-sysctl/include/linux/sysctl.h Wed May 23 19:20:48 2001 +@@ -157,7 +157,8 @@ + NET_TR=14, + NET_DECNET=15, + NET_ECONET=16, +- NET_KHTTPD=17 ++ NET_KHTTPD=17, ++ NET_TUX=18 + }; + + /* /proc/sys/kernel/random */ +@@ -471,6 +472,55 @@ + NET_DECNET_DST_GC_INTERVAL = 9, + NET_DECNET_CONF = 10, + NET_DECNET_DEBUG_LEVEL = 255 ++}; ++ ++/* /proc/sys/net/tux/ */ ++enum { ++ NET_TUX_DOCROOT = 1, ++ NET_TUX_LOGFILE = 2, ++ NET_TUX_EXTCGI = 3, ++ NET_TUX_STOP = 4, ++ NET_TUX_CLIENTPORT = 5, ++ NET_TUX_LOGGING = 6, ++ NET_TUX_SERVERPORT = 7, ++ NET_TUX_THREADS = 8, ++ NET_TUX_KEEPALIVE_TIMEOUT = 9, ++ NET_TUX_MAX_KEEPALIVE_BW = 10, ++ NET_TUX_DEFER_ACCEPT = 11, ++ NET_TUX_MAX_FREE_REQUESTS = 12, ++ NET_TUX_MAX_CONNECT = 13, ++ NET_TUX_MAX_BACKLOG = 14, ++ NET_TUX_MODE_FORBIDDEN = 15, ++ NET_TUX_MODE_ALLOWED = 16, ++ NET_TUX_MODE_USERSPACE = 17, ++ NET_TUX_MODE_CGI = 18, ++ NET_TUX_CGI_UID = 19, ++ NET_TUX_CGI_GID = 20, ++ NET_TUX_CGIROOT = 21, ++ NET_TUX_LOGENTRY_ALIGN_ORDER = 22, ++ NET_TUX_NONAGLE = 23, ++ NET_TUX_ACK_PINGPONG = 24, ++ NET_TUX_PUSH_ALL = 25, ++ NET_TUX_ZEROCOPY_PARSE = 26, ++ NET_CONFIG_TUX_DEBUG_BLOCKING = 27, ++ NET_TUX_PAGE_AGE_START = 28, ++ NET_TUX_PAGE_AGE_ADV = 29, ++ NET_TUX_PAGE_AGE_MAX = 30, ++ NET_TUX_VIRTUAL_SERVER = 31, ++ NET_TUX_MAX_OBJECT_SIZE = 32, ++ NET_TUX_COMPRESSION = 33, ++ NET_TUX_NOID = 34, ++ NET_TUX_CGI_INHERIT_CPU = 35, ++ NET_TUX_CGI_CPU_MASK = 36, ++ NET_TUX_ZEROCOPY_HEADER = 37, ++ NET_TUX_ZEROCOPY_SENDFILE = 38, ++ NET_TUX_ALL_USERSPACE = 39, ++ NET_TUX_REDIRECT_LOGGING = 40, ++ NET_TUX_REFERER_LOGGING = 41, ++ NET_TUX_MAX_HEADER_LEN = 42, ++ NET_TUX_404_PAGE = 43, ++ NET_TUX_APPLICATION_PROTOCOL = 44, ++ NET_TUX_MAX_KEEPALIVES = 45, + }; + + /* /proc/sys/net/khttpd/ */ diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-1 new file mode 100644 index 000000000000..df7850171025 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-timer_t-1 @@ -0,0 +1,11 @@ +--- 2.4.11pre2aa1/include/linux/timer.h.~1~ Wed Oct 3 00:35:01 2001 ++++ 2.4.11pre2aa1/include/linux/timer.h Wed Oct 3 00:44:55 2001 +@@ -20,6 +20,8 @@ + void (*function)(unsigned long); + }; + ++typedef struct timer_list timer_t; ++ + extern void add_timer(struct timer_list * timer); + extern int del_timer(struct timer_list * timer); + diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2 new file mode 100644 index 000000000000..da98d2536a4a --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/60_tux-vfs-2 @@ -0,0 +1,79 @@ +diff -urN ref/fs/dcache.c new/fs/dcache.c +--- ref/fs/dcache.c Sat Jun 9 00:04:48 2001 ++++ new/fs/dcache.c Tue Jun 12 15:44:51 2001 +@@ -62,6 +62,10 @@ + dentry->d_op->d_release(dentry); + if (dname_external(dentry)) + kfree(dentry->d_name.name); ++ if (dentry->d_tux_data) { ++ kfree(dentry->d_tux_data); ++ dentry->d_tux_data = NULL; ++ } + kmem_cache_free(dentry_cache, dentry); + dentry_stat.nr_dentry--; + } +@@ -616,6 +620,7 @@ + dentry->d_name.hash = name->hash; + dentry->d_op = NULL; + dentry->d_fsdata = NULL; ++ dentry->d_tux_data = NULL; + dentry->d_mounted = 0; + INIT_LIST_HEAD(&dentry->d_hash); + INIT_LIST_HEAD(&dentry->d_lru); +@@ -1162,6 +1167,26 @@ + } + out: + return ino; ++} ++ ++void flush_dentry_tuxinfo (void) ++{ ++ struct list_head *chain, *tmp; ++ struct dentry *dentry; ++ int i; ++ ++ spin_lock(&dcache_lock); ++ for (i = 0; i <= d_hash_mask; i++) { ++ chain = dentry_hashtable + i; ++ tmp = chain->next; ++ while (tmp != chain) { ++ dentry = list_entry(tmp, struct dentry, d_hash); ++ kfree(dentry->d_tux_data); ++ dentry->d_tux_data = NULL; ++ tmp = tmp->next; ++ } ++ } ++ spin_unlock(&dcache_lock); + } + + static void __init dcache_init(unsigned long mempages) +diff -urN ref/include/linux/dcache.h new/include/linux/dcache.h +--- ref/include/linux/dcache.h Tue Jun 12 05:30:15 2001 ++++ new/include/linux/dcache.h Tue Jun 12 15:44:07 2001 +@@ -80,6 +80,7 @@ + struct super_block * d_sb; /* The root of the dentry tree */ + unsigned long d_vfs_flags; + void * d_fsdata; /* fs-specific data */ ++ void *d_tux_data; /* TUX-specific data */ + unsigned char d_iname[DNAME_INLINE_LEN]; /* small names */ + }; + +@@ -167,6 +168,7 @@ + extern void shrink_dcache_sb(struct super_block *); + extern void shrink_dcache_parent(struct dentry *); + extern int d_invalidate(struct dentry *); ++extern void flush_dentry_tuxinfo (void); + + #define shrink_dcache() prune_dcache(0) + struct zone_struct; +diff -urN ref/kernel/ksyms.c new/kernel/ksyms.c +--- ref/kernel/ksyms.c Tue Jun 12 15:43:46 2001 ++++ new/kernel/ksyms.c Tue Jun 12 15:44:07 2001 +@@ -229,6 +229,7 @@ + EXPORT_SYMBOL(prune_dcache); + EXPORT_SYMBOL(shrink_dcache_sb); + EXPORT_SYMBOL(shrink_dcache_parent); ++EXPORT_SYMBOL(flush_dentry_tuxinfo); + EXPORT_SYMBOL(find_inode_number); + EXPORT_SYMBOL(is_subdir); + EXPORT_SYMBOL(get_unused_fd); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-2 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-2 new file mode 100644 index 000000000000..033be85d71d6 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-generic-file-read-2 @@ -0,0 +1,21 @@ +diff -urN tux-gen-read-ref/net/tux/output.c tux-gen-read/net/tux/output.c +--- tux-gen-read-ref/net/tux/output.c Fri Oct 12 08:53:29 2001 ++++ tux-gen-read/net/tux/output.c Fri Oct 12 08:54:31 2001 +@@ -191,7 +191,7 @@ + req->desc.buf = (char *) &sock_desc; + req->desc.error = 0; + Dprintk("sendfile(), desc.count: %d.\n", req->desc.count); +- do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, sock_send_actor, nonblock); ++ __do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, sock_send_actor, nonblock); + if (req->desc.written > 0) { + req->bytes_sent += req->desc.written; + req->output_len -= req->desc.written; +@@ -259,7 +259,7 @@ + req->desc.buf = NULL; + req->desc.error = 0; + +- do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, ++ __do_generic_file_read(&req->in_file, &req->in_file.f_pos, &req->desc, + file_fetch_actor, nonblock); + if (nonblock && (req->desc.error == -EWOULDBLOCKIO)) + return 1; diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1 new file mode 100644 index 000000000000..fda52175a473 --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-invalidate_inode_pages2-1 @@ -0,0 +1,11 @@ +--- 2.4.10pre2aa2/net/tux/logger.c.~1~ Fri Aug 31 17:52:05 2001 ++++ 2.4.10pre2aa2/net/tux/logger.c Fri Aug 31 17:53:41 2001 +@@ -613,7 +613,7 @@ + * Reduce the cache footprint of the logger file - it's + * typically write-once. + */ +- flush_inode_pages(log_filp->f_dentry->d_inode); ++ invalidate_inode_pages2(log_filp->f_dentry->d_inode->i_mapping); + + out_lock: + spin_lock(&log_lock); diff --git a/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-1 b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-1 new file mode 100644 index 000000000000..a7a49c64f04c --- /dev/null +++ b/sys-kernel/linux-sources/files/2.4.15pre1aa1/62_tux-uml-1 @@ -0,0 +1,32 @@ +diff -urN uml-ref/arch/um/kernel/sys_call_table.c uml/arch/um/kernel/sys_call_table.c +--- uml-ref/arch/um/kernel/sys_call_table.c Tue Jul 10 17:58:59 2001 ++++ uml/arch/um/kernel/sys_call_table.c Tue Jul 10 17:59:15 2001 +@@ -12,12 +12,9 @@ + extern syscall_handler_t sys_ni_syscall; + extern syscall_handler_t sys_exit; + extern syscall_handler_t sys_fork; +-extern syscall_handler_t sys_read; +-extern syscall_handler_t sys_write; + extern syscall_handler_t sys_creat; + extern syscall_handler_t sys_link; + extern syscall_handler_t sys_unlink; +-extern syscall_handler_t sys_chdir; + extern syscall_handler_t sys_mknod; + extern syscall_handler_t sys_chmod; + extern syscall_handler_t sys_lchown16; +@@ -56,15 +53,12 @@ + extern syscall_handler_t sys_umount; + extern syscall_handler_t sys_ni_syscall; + extern syscall_handler_t sys_ioctl; +-extern syscall_handler_t sys_fcntl; + extern syscall_handler_t sys_ni_syscall; + extern syscall_handler_t sys_setpgid; + extern syscall_handler_t sys_ni_syscall; + extern syscall_handler_t sys_olduname; + extern syscall_handler_t sys_umask; +-extern syscall_handler_t sys_chroot; + extern syscall_handler_t sys_ustat; +-extern syscall_handler_t sys_dup2; + extern syscall_handler_t sys_getppid; + extern syscall_handler_t sys_getpgrp; + extern syscall_handler_t sys_sigaction; -- cgit v1.2.3-65-gdbad